[llvm] [MCA] New option to report scheduling information: -scheduling-info (PR #126703)
Julien Villette via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 02:52:20 PST 2025
https://github.com/jvillette38 updated https://github.com/llvm/llvm-project/pull/126703
>From 8c3e6816149227bd78291b4d5476fd5c3ef51527 Mon Sep 17 00:00:00 2001
From: Julien Villette <julien.villette at sipearl.com>
Date: Wed, 5 Feb 2025 12:07:56 +0100
Subject: [PATCH 1/3] [mca] New option -scheduling-info
Outputs micro ops, latency, bypass latency, throughput, llvm opcode
name, used resources and parsed assembly instruction with comments.
This option is used to compare scheduling info from micro architecture documents.
Reference scheduling information (from Architecture and micro
architecture) are in comment section after each instruction (// or
/* */).
These information may be generated from Architecture Description Language.
By this way, it is easy to compare information from llvm and from
documentation/ADL.
LLVM Opcode name help to find right instruction regexp to fix in
Target Scheduling Info specification.
Example:
Input: abs D20, D11 // ABS <V><d>, <V><n>
\\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
Output: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv1i64 |
abs d20, d11 // ABS <V><d>, <V><n>
\\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
---
llvm/docs/CommandGuide/llvm-mca.rst | 14 +
llvm/include/llvm/MC/MCSchedule.h | 4 +
llvm/lib/MC/MCSchedule.cpp | 37 +
.../AArch64/Neoverse/V1-scheduling-info.s | 7588 +++++++++++++++++
llvm/tools/llvm-mca/CMakeLists.txt | 1 +
.../llvm-mca/Views/InstructionInfoView.h | 1 +
.../llvm-mca/Views/SchedulingInfoView.cpp | 212 +
.../tools/llvm-mca/Views/SchedulingInfoView.h | 97 +
llvm/tools/llvm-mca/llvm-mca.cpp | 42 +-
9 files changed, 7985 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
create mode 100644 llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp
create mode 100644 llvm/tools/llvm-mca/Views/SchedulingInfoView.h
diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst
index f610ea2f21682..5c945907785ac 100644
--- a/llvm/docs/CommandGuide/llvm-mca.rst
+++ b/llvm/docs/CommandGuide/llvm-mca.rst
@@ -170,6 +170,20 @@ option specifies "``-``", then the output will also be sent to standard output.
Enable extra scheduler statistics. This view collects and analyzes instruction
issue events. This view is disabled by default.
+.. option:: -scheduling-info
+
+ Enable scheduling info view. This view reports scheduling information defined
+ in LLVM target description in the form:
+ uOps | Latency | Bypass Latency | Throughput | LLVM OpcodeName | Resources
+ units | assembly instruction and its comment (// or /* */) if defined.
+ It allows to compare scheduling info with architecture documents and fix them
+ in target description by fixing InstrRW for the reported LLVM opcode.
+ Scheduling information can be defined in the same order in each instruction
+ comments to check easily reported and reference scheduling information.
+ Suggested information in comment:
+ ``// <architecture instruction form> \\ <scheduling documentation title> \\
+ <uOps>, <Latency>, <Bypass Latency>, <Throughput>, <Resources units>``
+
.. option:: -retire-stats
Enable extra retire control unit statistics. This view is disabled by default.
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index fe731d086f70a..4e72f633596a3 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -402,6 +402,10 @@ struct MCSchedModel {
static unsigned getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
unsigned WriteResourceIdx = 0);
+ /// Returns the maximum forwarding delay for maximum write latency.
+ static unsigned getForwardingDelayCycles(const MCSubtargetInfo &STI,
+ const MCSchedClassDesc &SCDesc);
+
/// Returns the default initialized model.
static const MCSchedModel Default;
};
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index ed243cecabb76..36147f1fa9983 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -174,3 +174,40 @@ MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
return std::abs(DelayCycles);
}
+
+unsigned
+MCSchedModel::getForwardingDelayCycles(const MCSubtargetInfo &STI,
+ const MCSchedClassDesc &SCDesc) {
+
+ ArrayRef<MCReadAdvanceEntry> Entries = STI.getReadAdvanceEntries(SCDesc);
+ if (Entries.empty())
+ return 0;
+
+ unsigned Latency = 0;
+ unsigned maxLatency = 0;
+ unsigned WriteResourceID = 0;
+ unsigned DefEnd = SCDesc.NumWriteLatencyEntries;
+
+ for (unsigned DefIdx = 0; DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI.getWriteLatencyEntry(&SCDesc, DefIdx);
+ // Early exit if we found an invalid latency.
+ // Consider no bypass
+ if (WLEntry->Cycles < 0)
+ return 0;
+ maxLatency = std::max(Latency, static_cast<unsigned>(WLEntry->Cycles));
+ if (maxLatency > Latency) {
+ WriteResourceID = WLEntry->WriteResourceID;
+ }
+ Latency = maxLatency;
+ }
+
+ for (const MCReadAdvanceEntry &E : Entries) {
+ if (E.WriteResourceID == WriteResourceID) {
+ return E.Cycles;
+ }
+ }
+
+ llvm_unreachable("WriteResourceID not found in MCReadAdvanceEntry entries");
+}
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
new file mode 100644
index 0000000000000..c421166f22ea4
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
@@ -0,0 +1,7588 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 -scheduling-info < %s | FileCheck %s
+
+ .text
+ .file "V1-scheduling-info.s"
+ .globl test
+ .p2align 4
+ .type test, at function
+test:
+ .cfi_startproc
+ abs D15, D11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
+ abs V25.2S, V25.2S // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ abs Z26.B, P6/M, Z27.B // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adc W13, W6, W4 // ADC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ adc X8, X12, X10 // ADC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ adcs W29, W7, W30 // ADCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adcs X11, X3, X5 // ADCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ add WSP, WSP, W10 // ADD <Wd|WSP>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ add WSP, WSP, W2, UXTB // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ add WSP, WSP, W13, UXTH #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ add WSP, WSP, W13, LSL #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 2 2 2.00 V1UnitI
+ add X22, X2, X27 // ADD <Xd|SP>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X25, X9, W25, UXTB // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 2 2 2.00 V1UnitI
+ add X4, X28, W3, UXTB #3 // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+ add X0, X28, X26, LSL #3 // ADD <Xd|SP>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, #3765 // ADD <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, #3547, LSL #12 // ADD <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X7, X30, #803 // ADD <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X7, X2, #319, LSL #12 // ADD <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add Z13.D, Z13.D, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add Z16.D, Z16.D, #233, LSL #8 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add W3, W2, W21, LSL #3 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ add W6, W21, W17, LSL #15 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ add W28, W30, W19, ASR #30 // ADD <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ add X8, X3, X28, LSL #3 // ADD <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ add X12, X13, X0, LSL #44 // ADD <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ add X5, X20, X28, LSR #16 // ADD <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ add D0, D23, D21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ add V19.4S, V24.4S, V15.4S // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ add Z29.D, P5/M, Z29.D, Z29.D // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add Z10.H, Z22.H, Z13.H // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ addhn V26.4H, V5.4S, V9.4S // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ addhn2 V1.16B, V19.8H, V6.8H // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ addp D1, V14.2D // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ addp V7.2S, V1.2S, V2.2S // ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ addpl X27, X6, #-6 // ADDPL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ adds W17, WSP, W25 // ADDS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ adds W6, WSP, W15, UXTH // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ adds W22, WSP, W30, UXTB #2 // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds W12, WSP, W29, LSL #4 // ADDS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ adds X14, X0, X10 // ADDS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds X13, X23, W8, UXTB // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds X4, X26, W28, UXTB #1 // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ adds X10, X3, X29, LSL #2 // ADDS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds W23, WSP, #502 // ADDS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds W2, WSP, #2980, LSL #12 // ADDS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ adds X12, X4, #1345 // ADDS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds X25, X18, #3037, LSL #12 // ADDS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ adds W12, W13, W26 // ADDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds W0, W23, W20, LSL #0 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds W13, W16, W12, LSL #28 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ adds W20, W19, W16, ASR #0 // ADDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ adds X23, X12, X4 // ADDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds X0, X13, X4, LSL #2 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ adds X4, X7, X6, LSL #31 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ adds X9, X8, X9, ASR #41 // ADDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ addv B0, V28.8B // ADDV B<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ addv B1, V26.16B // ADDV B<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ addv H18, V13.4H // ADDV H<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ addv H29, V17.8H // ADDV H<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ addv S22, V18.4S // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ addvl X1, X27, #-8 // ADDVL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ adr X3, test // ADR <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+ adr Z26.D, [Z1.D, Z8.D] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z22.S, [Z28.S, Z8.S, LSL #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z11.D, [Z2.D, Z29.D, SXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z3.D, [Z9.D, Z9.D, SXTW #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z6.D, [Z7.D, Z13.D, UXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z4.D, [Z24.D, Z22.D, UXTW #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adrp X0, test // ADRP <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+ and WSP, W16, #0xe00 // AND <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ and X2, X22, #0x1e00 // AND <Xd|SP>, <Xn>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ and Z1.B, Z1.B, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z7.H, Z7.H, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z7.S, Z7.S, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z7.D, Z7.D, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and P5.B, P1/Z, P6.B, P4.B // AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ and W11, W14, W24 // AND <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and W2, W21, W22, LSR #25 // AND <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and X1, X20, X29 // AND <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and X8, X11, X22, ASR #56 // AND <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and V29.8B, V26.8B, V26.8B // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ and Z17.D, P6/M, Z17.D, Z12.D // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z9.D, Z5.D, Z17.D // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ ands W14, W8, #0x70 // ANDS <Wd>, <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ ands X4, X10, #0x60 // ANDS <Xd>, <Xn>, #<immd> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ ands W29, W28, W12 // ANDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ ands W7, W13, W23, ASR #3 // ANDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ ands X21, X9, X6 // ANDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ ands X10, X27, X7, ASR #20 // ANDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ ands P5.B, P1/Z, P2.B, P7.B // ANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ andv H7, P6, Z31.H // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 4 12 12 0.50 V1UnitV01[4]
+ asr W30, W14, #5 // ASR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ asr X12, X21, #28 // ASR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ asr Z7.B, P5/M, Z7.B, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z6.H, P6/M, Z6.H, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z28.S, P0/M, Z28.S, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z26.D, P5/M, Z26.D, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z10.B, Z14.B, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z23.H, Z18.H, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z29.S, Z11.S, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z20.D, Z26.D, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr W3, W0, W20 // ASR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ asr X7, X5, X21 // ASR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ asr Z3.S, P0/M, Z3.S, Z10.S // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z9.S, P2/M, Z9.S, Z8.D // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z26.S, Z21.S, Z21.D // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asrd Z6.B, P4/M, Z6.B, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrd Z19.H, P3/M, Z19.H, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrd Z16.S, P3/M, Z16.S, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrd Z9.D, P6/M, Z9.D, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrr Z0.B, P0/M, Z0.B, Z19.B // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asrv W24, W28, W13 // ASRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ asrv X3, X21, X24 // ASRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ at s12e1r, X28 // AT <at_op>, <Xt> \\ No description \\ No scheduling info
+ b test // B <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.eq test // B.eq <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.none test // B.none <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.ne test // B.ne <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.any test // B.any <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.cs test // B.cs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.hs test // B.hs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.nlast test // B.nlast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.cc test // B.cc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.lo test // B.lo <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.last test // B.last <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.mi test // B.mi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.first test // B.first <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.pl test // B.pl <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.nfrst test // B.nfrst <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.vs test // B.vs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.vc test // B.vc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.hi test // B.hi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.pmore test // B.pmore <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.ls test // B.ls <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.plast test // B.plast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.ge test // B.ge <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.tcont test // B.tcont <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.lt test // B.lt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.tstop test // B.tstop <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.gt test // B.gt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.le test // B.le <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.al test // B.al <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.nv test // B.nv <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ bfcvt H6, S20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
+ bfcvt Z16.H, P6/M, Z1.S // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+ bfcvtn V12.4H, V15.4S // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ bfcvtn2 V15.8H, V13.4S // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ bfcvtnt Z11.H, P7/M, Z24.S // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+ bfdot V0.2S, V24.4H, V14.2H[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+ bfdot Z24.S, Z26.H, Z2.H[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+ bfdot V31.4S, V21.8H, V14.8H // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+ bfdot Z15.S, Z3.H, Z7.H // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+ bfi W10, W26, #31, #1 // BFI <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+ bfi X25, X7, #8, #1 // BFI <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+ bfm W30, W26, #14, #12 // BFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+ bfm X15, X20, #0, #35 // BFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+ bfmlalb Z13.S, Z30.H, Z0.H[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalb Z3.S, Z14.H, Z13.H // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalb V22.4S, V11.8H, V11.H[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalt V17.4S, V4.8H, V11.H[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalb V13.4S, V5.8H, V17.8H // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalt V10.4S, V16.8H, V1.8H // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalt Z23.S, Z3.H, Z2.H[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalt Z25.S, Z21.H, Z22.H // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmmla V15.4S, V28.8H, V23.8H // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
+ bfmmla Z26.S, Z2.H, Z12.H // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 1 5 3 2.0 V1UnitV01
+ bfxil W27, W23, #14, #14 // BFXIL <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+ bfxil X0, X5, #11, #22 // BFXIL <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+ bic Z28.B, Z28.B, #0x70 // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z18.H, Z18.H, #0x60 // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z12.S, Z12.S, #0x2 // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z6.D, Z6.D, #0x4 // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic P4.B, P4/Z, P6.B, P0.B // BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ bic W0, W26, W22 // BIC <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic W23, W10, W7, LSL #11 // BIC <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic X21, X20, X14 // BIC <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic X21, X3, X17, LSR #35 // BIC <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic V6.4H, #217 // BIC <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V23.8H, #101, LSL #0 // BIC <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V24.2S, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V31.2S, #192, LSL #0 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V25.16B, V10.16B, V9.16B // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic Z15.D, P4/M, Z15.D, Z25.D // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z7.D, Z8.D, Z28.D // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bics W24, W1, W25 // BICS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ bics W21, W0, W24, LSL #11 // BICS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ bics X27, X25, X10 // BICS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ bics X22, X6, X27, LSL #62 // BICS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ bics P2.B, P4/Z, P1.B, P7.B // BICS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ bif V0.8B, V25.8B, V4.8B // BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+ bit V5.8B, V12.8B, V22.8B // BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+ bl test // BL <label> \\ Branch and link, immed \\ 2 1 1 2.0 V1UnitB,V1UnitS
+ blr X11 // BLR <Xn> \\ Branch and link, register \\ 2 1 1 2.0 V1UnitB,V1UnitS
+ br X17 // BR <Xn> \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+ brk #33813 // BRK #<imm> \\ No description \\ No scheduling info
+ brka P7.B, P7/Z, P5.B // BRKA <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkas P6.B, P5/Z, P0.B // BRKAS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+ brkb P5.B, P0/Z, P1.B // BRKB <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkbs P6.B, P1/Z, P4.B // BRKBS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+ brkn P7.B, P0/Z, P6.B, P7.B // BRKN <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkns P3.B, P1/Z, P7.B, P3.B // BRKNS <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+ brkpa P3.B, P5/Z, P0.B, P1.B // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkpas P2.B, P5/Z, P1.B, P3.B // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+ brkpb P1.B, P0/Z, P7.B, P6.B // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkpbs P7.B, P1/Z, P6.B, P1.B // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+ bsl V27.16B, V13.16B, V21.16B // BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+ cbnz W21, test // CBNZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ cbnz X26, test // CBNZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ cbz W6, test // CBZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ cbz X4, test // CBZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ ccmn W8, #14, #3, HS // CCMN <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmn X23, #17, #0, GT // CCMN <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmn W17, W18, #12, CS // CCMN <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmn X19, X29, #12, LAST // CCMN <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp W24, #2, #5, NLAST // CCMP <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp X12, #8, #2, LO // CCMP <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp W2, W9, #3, TSTOP // CCMP <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp X11, X10, #13, LS // CCMP <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ cinc W23, W5, TSTOP // CINC <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+ cinc X2, X1, NFRST // CINC <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+ cinv W9, W12, TCONT // CINV <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+ cinv X9, X30, FIRST // CINV <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+ clasta B11, P4, B11, Z21.B // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clasta W8, P0, W8, Z6.B // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+ clasta Z25.S, P1, Z25.S, Z14.S // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clastb D6, P7, D6, Z31.D // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clastb W28, P6, W28, Z12.B // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+ clastb Z27.H, P6, Z27.H, Z22.H // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clrex // CLREX \\ No description \\ No scheduling info
+ clrex #12 // CLREX #<imm> \\ No description \\ No scheduling info
+ cls V5.8B, V22.8B // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+ cls W25, W0 // CLS <Wd>, <Wn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+ cls X22, X6 // CLS <Xd>, <Xn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+ cls Z28.D, P3/M, Z2.D // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ clz V24.8H, V30.8H // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+ clz W26, W27 // CLZ <Wd>, <Wn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+ clz X4, X0 // CLZ <Xd>, <Xn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+ clz Z3.S, P3/M, Z18.S // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ cmeq D26, D5, D25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmeq V9.8H, V16.8H, V24.8H // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmeq D7, D26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmeq V14.4H, V18.4H, #0 // CMEQ <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge D26, D21, D28 // CMGE <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge V22.8H, V16.8H, V3.8H // CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge D30, D12, #0 // CMGE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge V22.16B, V30.16B, #0 // CMGE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt D23, D25, D12 // CMGT <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt V3.2D, V29.2D, V11.2D // CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt D28, D14, #0 // CMGT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt V22.2S, V10.2S, #0 // CMGT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhi D29, D16, D5 // CMHI <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhi V28.4H, V25.4H, V21.4H // CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhs D5, D3, D12 // CMHS <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhs V6.8B, V31.8B, V12.8B // CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmle D14, D21, #0 // CMLE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmle V21.2S, V19.2S, #0 // CMLE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmlt D21, D24, #0 // CMLT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmlt V26.4H, V12.4H, #0 // CMLT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmn WSP, W7 // CMN <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmn WSP, W8, SXTB // CMN <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmn WSP, W3, UXTB #3 // CMN <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn WSP, W7, LSL #3 // CMN <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmn X2, X28 // CMN <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn X3, W0, UXTB // CMN <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn X0, W4, UXTB #3 // CMN <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ cmn X14, X26, LSL #2 // CMN <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn WSP, #613 // CMN <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn WSP, #2991, LSL #12 // CMN <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ cmn X23, #3803 // CMN <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn X29, #3786, LSL #12 // CMN <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ cmn W12, W0 // CMN <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn W19, W27, LSL #1 // CMN <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn W2, W11, LSL #29 // CMN <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmn W0, W0, ASR #30 // CMN <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmn X23, X28 // CMN <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn X6, X1, LSL #2 // CMN <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmn X28, X30, LSL #26 // CMN <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmn X25, X15, LSR #49 // CMN <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmp WSP, W26 // CMP <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmp WSP, W13, SXTH // CMP <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmp WSP, W12, SXTH #3 // CMP <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmp WSP, W30, LSL #4 // CMP <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ cmp X22, X18 // CMP <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp X16, W27, UXTB // CMP <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp X16, W7, UXTB #4 // CMP <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ cmp X21, X24, LSL #4 // CMP <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp WSP, #2342 // CMP <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp WSP, #3664, LSL #12 // CMP <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ cmp X5, #1482 // CMP <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp X4, #3684, LSL #12 // CMP <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ cmp W14, W0, LSL #4 // CMP <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp W0, W23, LSL #29 // CMP <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmp W2, W28, LSR #20 // CMP <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmp X27, X10, LSL #1 // CMP <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ cmp X18, X12, LSL #14 // CMP <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmp X6, X7, LSR #0 // CMP <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ cmpeq P2.H, P0/Z, Z26.H, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpge P1.B, P4/Z, Z28.B, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpgt P1.B, P0/Z, Z13.B, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphi P1.D, P3/Z, Z23.D, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphs P7.D, P5/Z, Z23.D, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmple P5.B, P2/Z, Z9.B, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplo P3.S, P5/Z, Z18.S, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpls P6.D, P6/Z, Z31.D, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplt P0.H, P6/Z, Z29.H, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpne P5.S, P4/Z, Z18.S, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpeq P6.S, P5/Z, Z2.S, Z9.S // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpge P7.S, P4/Z, Z15.S, Z15.S // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpgt P2.H, P4/Z, Z26.H, Z11.H // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphi P0.S, P4/Z, Z8.S, Z4.S // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphs P1.D, P6/Z, Z26.D, Z15.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpne P4.B, P3/Z, Z21.B, Z16.B // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpeq P2.D, P3/Z, Z13.D, Z18.D // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpge P2.B, P3/Z, Z3.B, Z16.D // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpgt P2.H, P2/Z, Z28.H, Z30.D // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphi P0.H, P5/Z, Z30.H, Z16.D // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphs P7.H, P2/Z, Z1.H, Z26.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmple P7.B, P7/Z, Z3.B, Z13.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplo P6.D, P2/Z, Z16.D, Z16.D // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpls P3.H, P2/Z, Z12.H, Z26.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplt P0.D, P4/Z, Z29.D, Z26.D // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpne P0.S, P4/Z, Z30.S, Z8.D // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmple P1.D, P3/Z, Z2.D, Z26.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplo P7.B, P0/Z, Z4.B, Z25.B // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpls P4.D, P4/Z, Z2.D, Z14.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplt P2.S, P2/Z, Z31.S, Z21.S // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmtst D10, D6, D5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmtst V13.2D, V13.2D, V13.2D // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cneg W3, W17, HI // CNEG <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+ cneg X26, X8, LAST // CNEG <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+ cnot Z7.S, P7/M, Z8.S // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ cnt V12.16B, V14.16B // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+ cnt Z26.H, P0/M, Z27.H // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ cntb X18 // CNTB <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntb X9, VL128 // CNTB <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntb X28, VL8, MUL #13 // CNTB <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntd X20 // CNTD <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntd X27, VL7 // CNTD <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntd X8, VL7, MUL #2 // CNTD <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cnth X27 // CNTH <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cnth X0, VL1 // CNTH <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cnth X16, VL3, MUL #6 // CNTH <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntw X22 // CNTW <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntw X23, VL3 // CNTW <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntw X6, VL16, MUL #11 // CNTW <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntp X22, P1, P2.S // CNTP <Xd>, <Pg>, <Pn>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ compact Z17.S, P1, Z18.S // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ cpy Z13.B, P0/M, B6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z3.B, P6/M, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z11.S, P5/M, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z0.H, P0/M, #-11, LSL #0 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z5.B, P1/Z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z12.H, P1/Z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z25.D, P3/Z, #-81, LSL #8 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z24.H, P0/M, W19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ cpy Z23.S, P2/M, WSP // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ crc32b W27, W12, W15 // CRC32B <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32h W3, W15, W21 // CRC32H <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32w W9, W18, W24 // CRC32W <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32x W19, W6, X25 // CRC32X <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32cb W25, W28, W30 // CRC32CB <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32ch W25, W26, W16 // CRC32CH <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32cw W27, W12, W23 // CRC32CW <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32cx W21, W28, X5 // CRC32CX <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ csdb // CSDB \\ No description \\ No scheduling info
+ csel W25, W16, W30, LS // CSEL <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csel X28, X1, X2, PL // CSEL <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ cset W6, NE // CSET <Wd>, <cond> \\ No description \\ No scheduling info
+ cset X11, LT // CSET <Xd>, <cond> \\ No description \\ No scheduling info
+ csetm W3, HI // CSETM <Wd>, <cond> \\ No description \\ No scheduling info
+ csetm X6, NE // CSETM <Xd>, <cond> \\ No description \\ No scheduling info
+ csinc W9, W3, W14, LT // CSINC <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csinc X20, X11, X23, TCONT // CSINC <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csinv W1, W4, W3, NLAST // CSINV <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csinv X27, X21, X15, NE // CSINV <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csneg W5, W13, W4, HI // CSNEG <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csneg X8, X29, X29, PMORE // CSNEG <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ ctermeq X4, X11 // CTERMEQ <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+ ctermne X0, X16 // CTERMNE <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+ dc CSW, X16 // DC <dc_op>, <Xt> \\ No description \\ No scheduling info
+ dcps1 // DCPS1 \\ No description \\ No scheduling info
+ dcps1 #4391 // DCPS1 #<imm> \\ No description \\ No scheduling info
+ dcps2 // DCPS2 \\ No description \\ No scheduling info
+ dcps2 #26756 // DCPS2 #<imm> \\ No description \\ No scheduling info
+ dcps3 // DCPS3 \\ No description \\ No scheduling info
+ dcps3 #47330 // DCPS3 #<imm> \\ No description \\ No scheduling info
+ decb X22 // DECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decb X5, VL256 // DECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decb X21, VL256, MUL #7 // DECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd X11 // DECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd X19, ALL // DECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd X24, VL2, MUL #10 // DECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ dech X16 // DECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ dech X20, MUL4 // DECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ dech X0, MUL3, MUL #15 // DECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decw X27 // DECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decw X18, VL32 // DECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decw X29, VL6, MUL #3 // DECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd Z19.D // DECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ decd Z22.D, MUL3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ decd Z1.D, VL128, MUL #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ dech Z23.H // DECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ dech Z29.H, VL5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ dech Z28.H, VL64, MUL #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ decw Z8.S // DECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ decw Z4.S, VL64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ decw Z27.S, VL4, MUL #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ decp X6, P6.B // DECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ decp Z22.H, P1 // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+ dmb SY // DMB <option> \\ No description \\ No scheduling info
+ dmb #6 // DMB #<imm> \\ No description \\ No scheduling info
+ drps // DRPS \\ No description \\ No scheduling info
+ dup B15, V25.B[12] // DUP B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup H2, V31.H[5] // DUP H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup S10, V2.S[1] // DUP S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup D24, V7.D[1] // DUP D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V25.8B, V21.B[4] // DUP <Vd>.<Tb>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V28.8H, V29.H[1] // DUP <Vd>.<Th>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V24.4S, V9.S[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V20.2D, V3.D[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V19.4S, W27 // DUP <Vd>.<T>, <R><n> \\ ASIMD duplicate, gen reg \\ 1 3 3 1.0 V1UnitM0
+ dup Z30.B, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z15.H, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z22.D, #-14, LSL #0 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z2.B, Z26.B[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z23.H, Z22.H[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z29.S, Z30.S[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z4.D, Z7.D[0] // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z25.D, X28 // DUP <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ dup Z18.S, WSP // DUP <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ dupm Z18.B, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z12.H, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z16.S, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z16.D, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ eon W29, W4, W19 // EON <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon W14, W24, W28, ASR #14 // EON <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon X19, X12, X2 // EON <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon X23, X23, X23, ASR #41 // EON <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon Z7.B, Z7.B, #0x70 // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z3.H, Z3.H, #0x60 // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z2.S, Z2.S, #0x2 // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z24.D, Z24.D, #0x4 // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor WSP, W4, #0xe00 // EOR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ eor X27, X25, #0x1e00 // EOR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ eor Z19.B, Z19.B, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z18.H, Z18.H, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z10.S, Z10.S, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z29.D, Z29.D, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor P6.B, P7/Z, P3.B, P5.B // EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ eor W8, W27, W2 // EOR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor W8, W7, W29, ASR #30 // EOR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor X22, X16, X6 // EOR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor X0, X23, X30, LSL #11 // EOR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor V8.16B, V10.16B, V19.16B // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ eor Z8.H, P3/M, Z8.H, Z14.H // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z30.D, Z26.D, Z20.D // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eors P1.B, P0/Z, P3.B, P1.B // EORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ eorv H17, P1, Z15.H // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 4 12 12 0.50 V1UnitV01[4]
+ eret // ERET \\ No description \\ No scheduling info
+ esb // ESB \\ No description \\ No scheduling info
+ ext V12.8B, V22.8B, V31.8B, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+ ext V17.16B, V18.16B, V8.16B, #10 // EXT <Vd>.16B, <Vn>.16B, <Vm>.16B, #<index16> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+ extr W19, W20, W20, #16 // EXTR <Wd>, <Wn>, <Wn>, #<lsbs> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+ extr W27, W4, W5, #23 // EXTR <Wd>, <Wn>, <Wm>, #<lsbs> \\ Bitfield extract, two regs \\ 2 3 3 2.00 V1UnitM
+ extr X25, X22, X22, #62 // EXTR <Xd>, <Xn>, <Xn>, #<lsbd> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+ extr X0, X12, X13, #17 // EXTR <Xd>, <Xn>, <Xm>, #<lsbd> \\ Bitfield extract, two regs \\ 2 3 3 2.00 V1UnitM
+ fabd H27, H20, H17 // FABD <Hd>, <Hn>, <Hm> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd S16, S29, S6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd V13.8H, V28.8H, V12.8H // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd V12.4S, V4.4S, V31.4S // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd Z11.H, P6/M, Z11.H, Z5.H // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+ fabs H25, H7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs S17, S12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs D30, D8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs V16.4S, V31.4S // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs V17.2S, V28.2S // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs Z26.S, P7/M, Z24.S // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+ facge P0.H, P5/Z, Z15.H, Z18.H // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ facgt P7.S, P7/Z, Z10.S, Z4.S // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ facge H24, H26, H29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facge D25, D24, D7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facge V25.4H, V16.4H, V11.4H // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facge V19.2S, V24.2S, V5.2S // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt H0, H4, H10 // FACGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt S29, S3, S2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt V22.8H, V14.8H, V31.8H // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt V22.4S, V8.4S, V2.4S // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facle P7.H, P5/Z, Z22.H, Z27.H // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ faclt P5.H, P5/Z, Z31.H, Z16.H // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fadd Z4.H, P7/M, Z4.H, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fadd H23, H27, H22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd S1, S23, S27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd D16, D15, D21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd V7.2D, V30.2D, V20.2D // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd V16.2D, V13.2D, V11.2D // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd Z26.H, P4/M, Z26.H, Z1.H // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fadd Z23.S, Z7.S, Z16.S // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fadda H8, P3, H8, Z28.H // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 18 19 19 0.06 V1UnitV[18], V1UnitV0[18], V1UnitV01[18], V1UnitV02[18]
+ fadda S11, P6, S11, Z1.S // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 10 11 11 0.10 V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10]
+ fadda D27, P4, D27, Z27.D // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 3 8 8 0.67 V1UnitV01[3]
+ faddp H10, V19.2H // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddp D11, V28.2D // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddp V16.2D, V11.2D, V5.2D // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddp V16.4S, V11.4S, V18.4S // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddv H21, P2, Z3.H // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+ faddv S16, P2, Z25.S // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+ faddv D18, P4, Z7.D // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+ fcadd Z29.H, P2/M, Z29.H, Z15.H, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 1 3 3 2.0 V1UnitV01
+ fccmp H31, H3, #11, HS // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmp S5, S6, #0, CC // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmp D17, D15, #0, ANY // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmpe H6, H1, #12, ANY // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmpe S16, S13, #10, VS // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmpe D17, D14, #15, PLAST // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq P7.D, P1/Z, Z23.D, Z21.D // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmge P6.H, P1/Z, Z19.H, Z10.H // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmgt P5.S, P2/Z, Z29.S, Z5.S // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmne P5.D, P0/Z, Z22.D, Z15.D // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmuo P0.D, P2/Z, Z15.D, Z23.D // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq P4.D, P5/Z, Z19.D, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmge P0.D, P5/Z, Z10.D, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmgt P6.D, P1/Z, Z8.D, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmle P2.D, P4/Z, Z26.D, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmlt P5.D, P5/Z, Z23.D, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmne P2.H, P3/Z, Z7.H, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq H30, H6, H1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq S17, S0, S21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V19.2S, V31.2S, V19.2S // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V12.4S, V11.4S, V26.4S // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq H19, H23, #0.0 // FCMEQ <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq S25, S18, #0.0 // FCMEQ <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V8.2S, V16.2S, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V18.2D, V17.2D, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge H1, H16, H12 // FCMGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge D29, D9, D3 // FCMGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V20.8H, V19.8H, V22.8H // FCMGE <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V17.2D, V11.2D, V13.2D // FCMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge H10, H23, #0.0 // FCMGE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge D5, D17, #0.0 // FCMGE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V18.4H, V27.4H, #0.0 // FCMGE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V17.2S, V11.2S, #0.0 // FCMGE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt H4, H5, H0 // FCMGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt S13, S20, S3 // FCMGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V24.8H, V24.8H, V28.8H // FCMGT <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V19.4S, V20.4S, V13.4S // FCMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt H0, H18, #0.0 // FCMGT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt D30, D23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V0.8H, V11.8H, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V19.2D, V31.2D, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmla Z20.H, Z12.H, Z4.H[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+ fcmla Z1.S, Z27.S, Z6.S[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+ fcmla Z25.S, P3/M, Z13.S, Z23.S, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+ fcmle P5.S, P3/Z, Z28.S, Z12.S // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmle H18, H28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmle D18, D16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmle V16.8H, V11.8H, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmle V22.4S, V30.4S, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt P1.S, P1/Z, Z13.S, Z24.S // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmlt H23, H7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt D22, D28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt V8.4H, V2.4H, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt V7.2D, V16.2D, #0.0 // FCMLT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmp H5, H21 // FCMP <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp H5, #0.0 // FCMP <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp S7, S0 // FCMP <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp S28, #0.0 // FCMP <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp D1, D27 // FCMP <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp D16, #0.0 // FCMP <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe H22, H21 // FCMPE <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe H13, #0.0 // FCMPE <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe S11, S29 // FCMPE <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe S15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe D27, D22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe D9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcpy Z2.H, P7/M, #0.5 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fcsel H26, H2, H11, NLAST // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+ fcsel S5, S1, S4, VC // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+ fcsel D14, D0, D19, NONE // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+ fcvt S13, H13 // FCVT <Sd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt D10, H6 // FCVT <Dd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt H1, S1 // FCVT <Hd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt D9, S23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt H17, D16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt S31, D27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt Z0.S, P1/M, Z4.H // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.50 V1UnitV0[2]
+ fcvt Z6.D, P0/M, Z17.H // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z7.H, P7/M, Z5.S // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.50 V1UnitV0[2]
+ fcvt Z11.D, P2/M, Z18.S // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z26.H, P0/M, Z30.D // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z13.S, P2/M, Z3.D // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvtas W23, H3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas X14, H29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas W0, S13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas X23, S15 // FCVTAS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas W1, D31 // FCVTAS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas X2, D3 // FCVTAS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas H27, H24 // FCVTAS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtas S16, S0 // FCVTAS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtas D14, D7 // FCVTAS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtas V5.4H, V16.4H // FCVTAS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtas V13.8H, V30.8H // FCVTAS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtas V12.2S, V1.2S // FCVTAS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtas V9.4S, V31.4S // FCVTAS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtas V2.2D, V22.2D // FCVTAS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau W13, H27 // FCVTAU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau X8, H12 // FCVTAU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau W20, S10 // FCVTAU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau X27, S22 // FCVTAU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau W6, D26 // FCVTAU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau X16, D13 // FCVTAU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau H6, H29 // FCVTAU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtau S23, S7 // FCVTAU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtau D1, D26 // FCVTAU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau V12.4H, V13.4H // FCVTAU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtau V21.8H, V0.8H // FCVTAU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtau V31.2S, V6.2S // FCVTAU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau V29.4S, V26.4S // FCVTAU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtau V9.2D, V7.2D // FCVTAU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtl V30.4S, V4.4H // FCVTL <Vd>.4S, <Vn>.4H \\ ASIMD FP convert, long (F16 to F32) \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtl V28.2D, V13.2S // FCVTL <Vd>.2D, <Vn>.2S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+ fcvtl2 V14.4S, V29.8H // FCVTL2 <Vd>.4S, <Vn>.8H \\ ASIMD FP convert, long (F16 to F32) \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtl2 V0.2D, V9.4S // FCVTL2 <Vd>.2D, <Vn>.4S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+ fcvtms W15, H1 // FCVTMS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms X5, H2 // FCVTMS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms W1, S16 // FCVTMS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms X27, S22 // FCVTMS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms W18, D21 // FCVTMS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms X6, D26 // FCVTMS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms H19, H29 // FCVTMS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtms S30, S14 // FCVTMS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtms D8, D20 // FCVTMS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtms V27.4H, V7.4H // FCVTMS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtms V26.8H, V11.8H // FCVTMS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtms V13.2S, V2.2S // FCVTMS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtms V18.4S, V21.4S // FCVTMS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtms V15.2D, V16.2D // FCVTMS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu W20, H6 // FCVTMU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu X7, H18 // FCVTMU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu W24, S19 // FCVTMU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu X7, S15 // FCVTMU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu W16, D16 // FCVTMU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu X1, D18 // FCVTMU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu H20, H13 // FCVTMU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtmu S28, S25 // FCVTMU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtmu D3, D27 // FCVTMU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu V18.4H, V2.4H // FCVTMU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtmu V10.8H, V11.8H // FCVTMU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtmu V27.2S, V14.2S // FCVTMU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu V31.4S, V4.4S // FCVTMU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtmu V6.2D, V26.2D // FCVTMU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtn V4.4H, V22.4S // FCVTN <Vd>.4H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtn V14.2S, V2.2D // FCVTN <Vd>.2S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtn2 V0.8H, V30.4S // FCVTN2 <Vd>.8H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtn2 V21.4S, V13.2D // FCVTN2 <Vd>.4S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtns W19, H15 // FCVTNS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns X20, H0 // FCVTNS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns W10, S5 // FCVTNS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns X14, S12 // FCVTNS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns W30, D2 // FCVTNS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns X0, D12 // FCVTNS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns H16, H25 // FCVTNS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtns S23, S19 // FCVTNS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtns D30, D1 // FCVTNS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtns V28.4H, V19.4H // FCVTNS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtns V19.8H, V19.8H // FCVTNS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtns V20.2S, V4.2S // FCVTNS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtns V28.4S, V29.4S // FCVTNS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtns V21.2D, V31.2D // FCVTNS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu W12, H3 // FCVTNU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu X23, H27 // FCVTNU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu W4, S23 // FCVTNU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu X5, S28 // FCVTNU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu W4, D11 // FCVTNU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu X12, D8 // FCVTNU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu H24, H22 // FCVTNU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtnu S29, S22 // FCVTNU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtnu D18, D15 // FCVTNU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu V5.4H, V12.4H // FCVTNU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtnu V26.8H, V20.8H // FCVTNU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtnu V15.2S, V1.2S // FCVTNU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu V7.4S, V16.4S // FCVTNU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtnu V13.2D, V8.2D // FCVTNU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps W27, H14 // FCVTPS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps X26, H20 // FCVTPS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps W5, S27 // FCVTPS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps X29, S6 // FCVTPS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps W23, D25 // FCVTPS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps X10, D16 // FCVTPS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps H31, H22 // FCVTPS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtps S3, S3 // FCVTPS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtps D10, D26 // FCVTPS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps V13.4H, V26.4H // FCVTPS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtps V26.8H, V10.8H // FCVTPS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtps V18.2S, V8.2S // FCVTPS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps V12.4S, V18.4S // FCVTPS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtps V3.2D, V2.2D // FCVTPS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu W25, H22 // FCVTPU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu X4, H24 // FCVTPU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu W13, S0 // FCVTPU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu X0, S17 // FCVTPU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu W16, D25 // FCVTPU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu X15, D12 // FCVTPU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu H1, H29 // FCVTPU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtpu S21, S30 // FCVTPU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtpu D16, D26 // FCVTPU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu V2.4H, V25.4H // FCVTPU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtpu V24.8H, V26.8H // FCVTPU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtpu V6.2S, V23.2S // FCVTPU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu V10.4S, V6.4S // FCVTPU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtpu V7.2D, V23.2D // FCVTPU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtxn S29, D4 // FCVTXN <Vb><d>, <Va><n> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtxn V25.2S, V15.2D // FCVTXN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtxn2 V21.4S, V6.2D // FCVTXN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs W28, H26, #26 // FCVTZS <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzs X22, H17, #58 // FCVTZS <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzs W17, S23, #22 // FCVTZS <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzs X15, S30, #2 // FCVTZS <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzs W13, D17, #17 // FCVTZS <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzs X14, D9, #24 // FCVTZS <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzs W15, H10 // FCVTZS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X4, H21 // FCVTZS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W1, S4 // FCVTZS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X27, S27 // FCVTZS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W24, D30 // FCVTZS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X18, D21 // FCVTZS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs H29, H23, #16 // FCVTZS H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzs S23, S15, #2 // FCVTZS S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzs D20, D26, #57 // FCVTZS D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V20.4H, V24.4H, #11 // FCVTZS <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzs V18.8H, V10.8H, #7 // FCVTZS <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzs V16.2S, V2.2S, #11 // FCVTZS <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V22.4S, V18.4S, #5 // FCVTZS <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzs V14.2D, V30.2D, #54 // FCVTZS <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs H16, H27 // FCVTZS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzs S4, S5 // FCVTZS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzs D4, D23 // FCVTZS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V8.4H, V16.4H // FCVTZS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzs V2.8H, V16.8H // FCVTZS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzs V27.2S, V28.2S // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V29.4S, V18.4S // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzs V13.2D, V31.2D // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs Z1.H, P2/M, Z6.H // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 4 6 6 0.25 V1UnitV0[4]
+ fcvtzs Z19.S, P4/M, Z16.H // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+ fcvtzs Z14.D, P0/M, Z6.H // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+ fcvtzs Z25.S, P5/M, Z23.S // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.50 V1UnitV0[2]
+ fcvtzs Z3.D, P1/M, Z31.S // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+ fcvtzs Z28.S, P5/M, Z23.D // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs Z22.D, P6/M, Z29.D // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W12, H19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzu X17, H23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzu W16, S3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzu X27, S15, #8 // FCVTZU <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzu W21, D10, #23 // FCVTZU <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzu X26, D30, #27 // FCVTZU <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+ fcvtzu W26, H30 // FCVTZU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X9, H11 // FCVTZU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W20, S16 // FCVTZU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X7, S21 // FCVTZU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W25, D30 // FCVTZU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X13, D8 // FCVTZU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu H19, H8, #12 // FCVTZU H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzu S25, S27, #10 // FCVTZU S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzu D30, D16, #42 // FCVTZU D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V19.4H, V26.4H, #9 // FCVTZU <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzu V27.8H, V6.8H, #11 // FCVTZU <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzu V30.2S, V4.2S, #19 // FCVTZU <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V31.4S, V6.4S, #22 // FCVTZU <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzu V10.2D, V12.2D, #53 // FCVTZU <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu H25, H30 // FCVTZU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzu S2, S19 // FCVTZU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzu D4, D7 // FCVTZU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V3.4H, V2.4H // FCVTZU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzu V30.8H, V25.8H // FCVTZU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ fcvtzu V25.2S, V25.2S // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V21.4S, V2.4S // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ fcvtzu V23.2D, V15.2D // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu Z15.H, P0/M, Z8.H // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 4 6 6 0.25 V1UnitV0[4]
+ fcvtzu Z8.S, P5/M, Z18.H // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+ fcvtzu Z11.D, P4/M, Z24.H // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+ fcvtzu Z13.S, P7/M, Z8.S // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.50 V1UnitV0[2]
+ fcvtzu Z20.D, P2/M, Z13.S // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+ fcvtzu Z31.S, P3/M, Z20.D // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu Z4.D, P1/M, Z25.D // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fdiv H1, H26, H23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 0.29 V1UnitV[7], V1UnitV02[7]
+ fdiv S31, S18, S12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
+ fdiv D6, D3, D0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
+ fdiv V21.4H, V15.4H, V22.4H // FDIV <Vd>.4H, <Vn>.4H, <Vm>.4H \\ ASIMD FP divide, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+ fdiv V31.8H, V12.8H, V15.8H // FDIV <Vd>.8H, <Vn>.8H, <Vm>.8H \\ ASIMD FP divide, Q-form, F16 \\ 1 13 13 0.14 V1UnitV02[14]
+ fdiv V15.2S, V23.2S, V2.2S // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+ fdiv V7.4S, V27.4S, V22.4S // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.2 V1UnitV02[10]
+ fdiv V31.2D, V25.2D, V8.2D // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.13 V1UnitV02[15]
+ fdiv Z21.H, P7/M, Z21.H, Z15.H // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[13]
+ fdiv Z17.S, P4/M, Z17.S, Z20.S // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.1 V1UnitV0[10]
+ fdiv Z13.D, P3/M, Z13.D, Z28.D // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[15]
+ fdivr Z29.H, P4/M, Z29.H, Z1.H // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[13]
+ fdivr Z13.S, P0/M, Z13.S, Z29.S // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.1 V1UnitV0[10]
+ fdivr Z14.D, P3/M, Z14.D, Z31.D // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[15]
+ fdup Z19.S, #0.5 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fexpa Z6.H, Z3.H // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ fmad Z9.S, P5/M, Z9.S, Z7.S // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmadd H27, H0, H6, H28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmadd S13, S24, S15, S5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmadd D19, D4, D2, D17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmax Z25.D, P2/M, Z25.D, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmax H8, H7, H11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax S9, S21, S2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax D4, D26, D26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax V0.4S, V13.4S, V21.4S // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax V12.4S, V27.4S, V11.4S // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax Z16.S, P5/M, Z16.S, Z12.S // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnm Z25.D, P5/M, Z25.D, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnm H29, H13, H14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm S25, S20, S0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm D29, D25, D16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm V6.4S, V3.4S, V3.4S // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm V9.2D, V15.2D, V11.2D // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm Z6.S, P5/M, Z6.S, Z17.S // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnmp H25, V19.2H // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmp D17, V29.2D // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmp V31.4S, V4.4S, V2.4S // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmp V23.4S, V15.4S, V1.4S // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmv H0, V13.4H // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fmaxnmv H12, V11.8H // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+ fmaxnmv S28, V31.4S // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fmaxnmv H9, P3, Z2.H // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+ fmaxnmv S26, P6, Z0.S // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+ fmaxnmv D7, P1, Z29.D // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+ fmaxp H15, V25.2H // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxp S6, V2.2S // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxp V21.2S, V17.2S, V13.2S // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxp V10.4S, V5.4S, V25.4S // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxv H23, V4.4H // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fmaxv H25, V15.8H // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+ fmaxv S23, V2.4S // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fmaxv H12, P0, Z22.H // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+ fmaxv S24, P5, Z12.S // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+ fmaxv D1, P6, Z25.D // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+ fmin Z24.D, P4/M, Z24.D, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmin H4, H13, H17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin S1, S14, S22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin D18, D19, D22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin V6.4S, V25.4S, V27.4S // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin V12.2S, V30.2S, V25.2S // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin Z11.H, P3/M, Z11.H, Z16.H // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnm Z19.H, P4/M, Z19.H, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnm H29, H23, H17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm S24, S14, S30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm D0, D26, D8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm V16.2S, V23.2S, V27.2S // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm V23.4S, V19.4S, V22.4S // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm Z24.S, P3/M, Z24.S, Z13.S // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnmp H20, V14.2H // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmp D15, V8.2D // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmp V27.2D, V27.2D, V16.2D // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmp V2.4S, V14.4S, V14.4S // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmv H19, V25.4H // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fminnmv H23, V17.8H // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+ fminnmv S29, V17.4S // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fminnmv H24, P3, Z1.H // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+ fminnmv S30, P3, Z9.S // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+ fminnmv D18, P5, Z8.D // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+ fminp H7, V10.2H // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminp S17, V7.2S // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminp V25.4S, V2.4S, V15.4S // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminp V14.2S, V28.2S, V15.2S // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminv H3, V30.4H // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fminv H29, V12.8H // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+ fminv S16, V19.4S // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+ fminv H15, P2, Z25.H // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+ fminv S4, P0, Z6.S // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+ fminv D20, P1, Z5.D // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+ fmla H23, H24, V15.H[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla S9, S20, V28.S[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla D12, D20, V7.D[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V29.8H, V15.8H, V10.H[4] // FMLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V2.2S, V16.2S, V28.S[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V14.4S, V14.4S, V5.S[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V10.2D, V14.2D, V21.D[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla Z2.H, Z4.H, Z7.H[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla Z22.S, Z15.S, Z1.S[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla Z1.D, Z30.D, Z11.D[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla V1.4S, V24.4S, V12.4S // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V30.2D, V16.2D, V6.2D // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla Z6.S, P1/M, Z24.S, Z24.S // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls H8, H14, V7.H[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls S20, S17, V5.S[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls D11, D24, V29.D[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V30.8H, V18.8H, V4.H[6] // FMLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V10.2S, V27.2S, V0.S[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V27.4S, V7.4S, V24.S[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V10.2D, V22.2D, V29.D[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls Z3.H, Z31.H, Z0.H[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls Z30.S, Z8.S, Z0.S[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls Z10.D, Z20.D, Z0.D[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls V6.2S, V3.2S, V12.2S // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V6.8H, V15.8H, V23.8H // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls Z26.S, P5/M, Z28.S, Z26.S // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmov W15, H31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov X21, H14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov H6, W5 // FMOV <Hd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov S22, W0 // FMOV <Sd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov W23, S30 // FMOV <Wd>, <Sn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov H16, X27 // FMOV <Hd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov D22, X12 // FMOV <Dd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov V7.D[1], X8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ fmov X26, D29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov X4, V26.D[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov Z2.S, P0/M, #0.5 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov Z14.S, #0.5 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov H18, H28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov S13, S23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov D27, D17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov H29, #0.5 // FMOV <Hd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov S22, #0.5 // FMOV <Sd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov D18, #0.5 // FMOV <Dd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov V12.2S, #0.5 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov V10.2S, #0.5 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov V0.2D, #0.5 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov Z2.D, P2/M, #0.0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov Z5.S, #0.0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmsb Z25.S, P5/M, Z25.S, Z29.S // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmsub H25, H28, H12, H24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmsub S31, S0, S23, S24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmsub D12, D10, D20, D16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmul H18, H4, V7.H[3] // FMUL <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul S17, S23, V30.S[2] // FMUL S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul D27, D8, V10.D[1] // FMUL D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V10.4H, V2.4H, V7.H[5] // FMUL <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V5.2S, V12.2S, V9.S[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V15.4S, V30.4S, V2.S[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V11.2D, V31.2D, V24.D[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul Z17.H, P5/M, Z17.H, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z27.H, Z30.H, Z0.H[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z6.S, Z16.S, Z1.S[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z4.D, Z30.D, Z2.D[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul H28, H14, H3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul S28, S16, S24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul D19, D19, D0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V0.2D, V14.2D, V20.2D // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V9.2D, V29.2D, V7.2D // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul Z22.D, P1/M, Z22.D, Z3.D // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z19.S, Z14.S, Z26.S // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmulx H18, H17, V7.H[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx S23, S3, V3.S[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx D3, D13, V30.D[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V28.4H, V25.4H, V15.H[1] // FMULX <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V3.2S, V22.2S, V23.S[3] // FMULX <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V5.4S, V28.4S, V15.S[3] // FMULX <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V22.2D, V18.2D, V25.D[1] // FMULX <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx H20, H25, H0 // FMULX <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 2 2 4.00 V1UnitV
+ fmulx D18, D19, D22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 2 2 4.00 V1UnitV
+ fmulx V22.2D, V18.2D, V4.2D // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V16.2S, V4.2S, V27.2S // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx Z7.H, P5/M, Z7.H, Z21.H // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fneg H2, H9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg S11, S19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg D5, D16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg V26.2D, V2.2D // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg V14.2S, V24.2S // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg Z16.S, P0/M, Z25.S // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fnmad Z6.H, P2/M, Z14.H, Z21.H // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmadd H3, H18, H31, H24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmadd S8, S18, S2, S14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmadd D19, D29, D28, D30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmla Z15.D, P0/M, Z8.D, Z29.D // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmls Z13.D, P0/M, Z8.D, Z12.D // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmsb Z30.D, P7/M, Z8.D, Z9.D // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmsub H3, H29, H24, H17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmsub S29, S26, S17, S4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmsub D7, D13, D13, D4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmul H3, H15, H7 // FNMUL <Hd>, <Hn>, <Hm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+ fnmul S16, S11, S2 // FNMUL <Sd>, <Sn>, <Sm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+ fnmul D12, D22, D14 // FNMUL <Dd>, <Dn>, <Dm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+ frecpe H20, H8 // FRECPE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 4 4 2.00 V1UnitV02
+ frecpe S27, S7 // FRECPE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe D2, D1 // FRECPE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe V28.4H, V27.4H // FRECPE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ frecpe V9.8H, V6.8H // FRECPE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 2 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frecpe V25.2S, V28.2S // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe V21.4S, V18.4S // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ frecpe V10.2D, V26.2D // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ frecpe Z14.H, Z0.H // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 4 6 6 0.25 V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4]
+ frecpe Z5.S, Z16.S // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+ frecpe Z27.D, Z11.D // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+ frecps H29, H19, H8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps D25, D17, D12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps V12.8H, V25.8H, V4.8H // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps V7.2D, V29.2D, V18.2D // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps Z11.S, Z31.S, Z1.S // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+ frecpx H18, H11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+ frecpx S13, S30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+ frecpx Z15.S, P4/M, Z12.S // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 1 3 3 1.0 V1UnitV0
+ frintn Z30.H, P3/M, Z31.H // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintn Z17.S, P4/M, Z23.S // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintn Z28.D, P1/M, Z25.D // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frinta Z10.H, P6/M, Z17.H // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frinta Z7.S, P4/M, Z27.S // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frinta Z17.D, P4/M, Z17.D // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintm Z26.H, P7/M, Z0.H // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintm Z6.S, P0/M, Z28.S // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintm Z29.D, P4/M, Z3.D // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintp Z20.H, P4/M, Z12.H // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintp Z3.S, P7/M, Z18.S // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintp Z28.D, P7/M, Z4.D // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintz Z27.H, P2/M, Z12.H // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintz Z12.S, P6/M, Z3.S // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintz Z12.D, P2/M, Z31.D // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frinti Z16.H, P4/M, Z9.H // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frinti Z18.S, P6/M, Z27.S // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frinti Z26.D, P2/M, Z12.D // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintx Z17.H, P0/M, Z9.H // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintx Z27.S, P7/M, Z16.S // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintx Z21.D, P4/M, Z23.D // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frinta H22, H10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinta S15, S7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinta D30, D10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinta V24.4H, V10.4H // FRINTA <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frinta V5.8H, V3.8H // FRINTA <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frinta V23.2S, V22.2S // FRINTA <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frinta V28.4S, V28.4S // FRINTA <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frinta V3.2D, V13.2D // FRINTA <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frinti H31, H14 // FRINTI <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinti S23, S9 // FRINTI <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinti D8, D12 // FRINTI <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinti V6.4H, V10.4H // FRINTI <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frinti V22.8H, V7.8H // FRINTI <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frinti V9.2S, V25.2S // FRINTI <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frinti V23.4S, V7.4S // FRINTI <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frinti V28.2D, V5.2D // FRINTI <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintm H0, H21 // FRINTM <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintm S22, S10 // FRINTM <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintm D5, D30 // FRINTM <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintm V3.4H, V8.4H // FRINTM <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintm V19.8H, V26.8H // FRINTM <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frintm V15.2S, V8.2S // FRINTM <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintm V20.4S, V26.4S // FRINTM <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintm V20.2D, V11.2D // FRINTM <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintn H12, H3 // FRINTN <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintn S27, S14 // FRINTN <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintn D30, D17 // FRINTN <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintn V27.4H, V4.4H // FRINTN <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintn V17.8H, V19.8H // FRINTN <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frintn V23.2S, V23.2S // FRINTN <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintn V2.4S, V4.4S // FRINTN <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintn V24.2D, V12.2D // FRINTN <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintp H17, H31 // FRINTP <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintp S14, S10 // FRINTP <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintp D25, D13 // FRINTP <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintp V22.4H, V25.4H // FRINTP <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintp V18.8H, V11.8H // FRINTP <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frintp V31.2S, V5.2S // FRINTP <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintp V0.4S, V24.4S // FRINTP <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintp V1.2D, V3.2D // FRINTP <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintx H4, H5 // FRINTX <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintx S10, S28 // FRINTX <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintx D17, D19 // FRINTX <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintx V24.4H, V25.4H // FRINTX <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintx V1.8H, V27.8H // FRINTX <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frintx V2.2S, V14.2S // FRINTX <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintx V27.4S, V31.4S // FRINTX <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintx V24.2D, V20.2D // FRINTX <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintz H10, H29 // FRINTZ <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintz S11, S23 // FRINTZ <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintz D6, D11 // FRINTZ <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintz V13.4H, V5.4H // FRINTZ <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintz V20.8H, V21.8H // FRINTZ <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frintz V15.2S, V19.2S // FRINTZ <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintz V11.4S, V18.4S // FRINTZ <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ frintz V12.2D, V22.2D // FRINTZ <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frsqrte H23, H26 // FRSQRTE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 4 4 2.00 V1UnitV02
+ frsqrte S23, S5 // FRSQRTE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte D3, D11 // FRSQRTE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte V16.4H, V15.4H // FRSQRTE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ frsqrte V14.8H, V0.8H // FRSQRTE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 2 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ frsqrte V6.2S, V8.2S // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte V30.4S, V21.4S // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ frsqrte V15.2D, V14.2D // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ frsqrte Z6.H, Z30.H // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 4 6 6 0.25 V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4]
+ frsqrte Z27.S, Z15.S // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+ frsqrte Z6.D, Z17.D // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+ frsqrts H28, H26, H1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts S28, S1, S11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts V8.4H, V9.4H, V30.4H // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts V20.4S, V26.4S, V27.4S // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts Z10.H, Z25.H, Z22.H // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+ fscale Z2.H, P0/M, Z2.H, Z21.H // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fsqrt H13, H24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 0.29 V1UnitV[7], V1UnitV02[7]
+ fsqrt S20, S15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
+ fsqrt D25, D21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
+ fsqrt V24.4H, V14.4H // FSQRT <Vd>.4H, <Vn>.4H \\ ASIMD FP square root, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+ fsqrt V12.8H, V3.8H // FSQRT <Vd>.8H, <Vn>.8H \\ ASIMD FP square root, Q-form, F16 \\ 1 13 13 0.14 V1UnitV02[14]
+ fsqrt V30.2S, V20.2S // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+ fsqrt V2.4S, V24.4S // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.2 V1UnitV02[10]
+ fsqrt V28.2D, V25.2D // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.12 V1UnitV02[16]
+ fsqrt Z13.H, P3/M, Z11.H // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 1 13 13 0.08 V1UnitV0[13]
+ fsqrt Z2.S, P7/M, Z0.S // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 1 10 10 0.1 V1UnitV0[10]
+ fsqrt Z17.D, P6/M, Z17.D // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 1 16 16 0.07 V1UnitV0[15]
+ fsub Z12.D, P6/M, Z12.D, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsub H20, H11, H18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub S15, S4, S24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub D25, D26, D4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub V13.8H, V15.8H, V17.8H // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub V1.2S, V31.2S, V27.2S // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub Z24.S, P4/M, Z24.S, Z10.S // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsub Z19.H, Z8.H, Z29.H // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsubr Z22.H, P7/M, Z22.H, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsubr Z13.S, P2/M, Z13.S, Z4.S // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ ftmad Z19.D, Z19.D, Z6.D, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ ftsmul Z21.S, Z0.S, Z10.S // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ ftssel Z5.D, Z0.D, Z15.D // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ hint #9 // HINT #<imm> \\ No description \\ No scheduling info
+ hlt #31335 // HLT #<imm> \\ No description \\ No scheduling info
+ hvc #60601 // HVC #<imm> \\ No description \\ No scheduling info
+ ic IALLUIS // IC <ic_op> \\ No description \\ No scheduling info
+ ic IVAU, X6 // IC <ic_op2>, <Xt> \\ No description \\ No scheduling info
+ incb X18 // INCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incb X17, VL3 // INCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incb X17, MUL3, MUL #7 // INCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd X19 // INCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd X17, VL3 // INCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd X11, VL64, MUL #7 // INCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ inch X24 // INCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ inch X23, ALL // INCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ inch X22, VL1, MUL #8 // INCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incw X29 // INCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incw X2, VL64 // INCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incw X2, VL8, MUL #1 // INCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd Z24.D // INCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ incd Z23.D, VL8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ incd Z20.D, VL2, MUL #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ inch Z29.H // INCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ inch Z28.H, VL16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ inch Z29.H, VL16, MUL #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ incw Z17.S // INCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ incw Z31.S, MUL3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ incw Z12.S, VL4, MUL #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ incp X7, P0.H // INCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ incp Z2.D, P6 // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+ index Z8.B, #15, W14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z14.H, #11, W10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z17.S, #14, W21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z5.D, #11, X15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 4 8 8 0.50 V1UnitM0[2],V1UnitV0[2]
+ index Z16.B, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+ index Z13.H, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+ index Z20.S, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+ index Z13.D, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 2 5 5 0.50 V1UnitV0[2]
+ index Z28.B, W27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z13.H, W28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z22.S, W7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z0.D, X25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 4 8 8 0.50 V1UnitM0[2],V1UnitV0[2]
+ index Z6.B, W24, W8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z20.H, W4, W7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z10.S, W2, W19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+ index Z2.D, X23, X7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 4 8 8 0.50 V1UnitM0[2],V1UnitV0[2]
+ ins V15.B[7], V6.B[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V17.H[1], V3.H[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V4.S[1], V7.S[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V22.D[1], V25.D[1] // INS <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V14.B[3], W12 // INS <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ ins V25.H[2], W14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ ins V14.S[1], W29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ ins V19.D[1], X27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ insr Z4.D, D0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+ insr Z4.D, X14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ isb // ISB \\ No description \\ No scheduling info
+ isb SY // ISB <option> \\ No description \\ No scheduling info
+ isb #1 // ISB #<imm> \\ No description \\ No scheduling info
+ lasta B3, P1, Z3.B // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+ lasta W16, P0, Z10.B // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ lastb D3, P1, Z17.D // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+ lastb X4, P3, Z31.D // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ ld1 { V23.8B }, [X11] // LD1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V25.8B }, [X30], #8 // LD1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V14.8B }, [X1], X26 // LD1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V12.16B }, [X19] // LD1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V24.16B }, [X28], #16 // LD1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V21.16B }, [X25], X28 // LD1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V8.4H }, [X30] // LD1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V4.4H }, [X10], #8 // LD1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V17.4H }, [X12], X16 // LD1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V24.8H }, [X27] // LD1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V21.8H }, [X24], #16 // LD1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V9.8H }, [X9], X27 // LD1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V4.2S }, [X2] // LD1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V19.2S }, [X27], #8 // LD1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V25.2S }, [X13], X19 // LD1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V3.4S }, [X4] // LD1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V24.4S }, [X20], #16 // LD1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V29.4S }, [X25], X23 // LD1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V24.1D }, [X9] // LD1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V23.1D }, [X3], #8 // LD1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V19.1D }, [X10], X19 // LD1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V3.2D }, [X28] // LD1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V8.2D }, [X16], #16 // LD1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V5.2D }, [X1], X29 // LD1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V24.8B, V25.8B }, [X6] // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V17.8B, V18.8B }, [X18], #16 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V18.8B, V19.8B }, [X6], X11 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V0.16B, V1.16B }, [X14] // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V20.16B, V21.16B }, [X2], #32 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V5.16B, V6.16B }, [X17], X25 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V25.4H, V26.4H }, [X3] // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V10.4H, V11.4H }, [X14], #16 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V0.4H, V1.4H }, [X24], X15 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V1.8H, V2.8H }, [X27] // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V22.8H, V23.8H }, [X13], #32 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V9.8H, V10.8H }, [X4], X13 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V6.2S, V7.2S }, [X29] // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V23.2S, V24.2S }, [X10], #16 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V26.2S, V27.2S }, [X21], X29 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V11.4S, V12.4S }, [X30] // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V23.4S, V24.4S }, [X14], #32 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V12.4S, V13.4S }, [X27], X22 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V27.1D, V28.1D }, [X7] // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V13.1D, V14.1D }, [X29], #16 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V1.1D, V2.1D }, [X7], X20 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V13.2D, V14.2D }, [X13] // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V13.2D, V14.2D }, [X10], #32 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V20.2D, V21.2D }, [X29], X28 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V7.8B, V8.8B, V9.8B }, [X12] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V13.8B, V14.8B, V15.8B }, [X10], #24 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V28.8B, V29.8B, V30.8B }, [X2], X21 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V19.16B, V20.16B, V21.16B }, [X10] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V8.16B, V9.16B, V10.16B }, [X29], #48 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V14.16B, V15.16B, V16.16B }, [X5], X17 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V10.4H, V11.4H, V12.4H }, [X28] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V22.4H, V23.4H, V24.4H }, [X6], #24 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V11.4H, V12.4H, V13.4H }, [X13], X23 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V21.8H, V22.8H, V23.8H }, [X22] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V26.8H, V27.8H, V28.8H }, [X2], #48 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V6.8H, V7.8H, V8.8H }, [X22], X6 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V16.2S, V17.2S, V18.2S }, [X27] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V3.2S, V4.2S, V5.2S }, [X30], #24 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V14.2S, V15.2S, V16.2S }, [X11], X28 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V0.4S, V1.4S, V2.4S }, [X24] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V17.4S, V18.4S, V19.4S }, [X28], #48 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V5.4S, V6.4S, V7.4S }, [X20], X13 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V14.1D, V15.1D, V16.1D }, [X3] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V21.1D, V22.1D, V23.1D }, [X24], #24 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V25.1D, V26.1D, V27.1D }, [X18], X14 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V12.2D, V13.2D, V14.2D }, [X15] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+ ld1 { V13.2D, V14.2D, V15.2D }, [X4], #48 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V15.2D, V16.2D, V17.2D }, [X10], X6 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+ ld1 { V4.8B, V5.8B, V6.8B, V7.8B }, [X13] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V8.8B, V9.8B, V10.8B, V11.8B }, [X30], #32 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V4.8B, V5.8B, V6.8B, V7.8B }, [X20], X3 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V13.16B, V14.16B, V15.16B, V16.16B }, [X9] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+ ld1 { V3.16B, V4.16B, V5.16B, V6.16B }, [X17], #64 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V10.16B, V11.16B, V12.16B, V13.16B }, [X19], X29 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V20.4H, V21.4H, V22.4H, V23.4H }, [X15] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V4.4H, V5.4H, V6.4H, V7.4H }, [X12], #32 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V24.4H, V25.4H, V26.4H, V27.4H }, [X25], X0 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V0.8H, V1.8H, V2.8H, V3.8H }, [X21] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+ ld1 { V12.8H, V13.8H, V14.8H, V15.8H }, [X21], #64 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V14.8H, V15.8H, V16.8H, V17.8H }, [X12], X23 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V21.2S, V22.2S, V23.2S, V24.2S }, [X21] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V27.2S, V28.2S, V29.2S, V30.2S }, [X11], #32 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V24.2S, V25.2S, V26.2S, V27.2S }, [X1], X22 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V15.4S, V16.4S, V17.4S, V18.4S }, [X28] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+ ld1 { V14.4S, V15.4S, V16.4S, V17.4S }, [X8], #64 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V11.4S, V12.4S, V13.4S, V14.4S }, [X2], X28 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V22.1D, V23.1D, V24.1D, V25.1D }, [X4] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+ ld1 { V3.1D, V4.1D, V5.1D, V6.1D }, [X23], #32 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V22.1D, V23.1D, V24.1D, V25.1D }, [X9], X22 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ld1 { V18.2D, V19.2D, V20.2D, V21.2D }, [X6] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+ ld1 { V3.2D, V4.2D, V5.2D, V6.2D }, [X3], #64 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V6.2D, V7.2D, V8.2D, V9.2D }, [X17], X18 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V18.B }[3], [X23] // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V18.H }[3], [X1] // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V8.S }[0], [X24] // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V11.D }[0], [X13] // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V23.B }[1], [X13], #1 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V10.B }[9], [X25], X14 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V6.H }[2], [X26], #2 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V30.H }[6], [X27], X3 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V5.S }[1], [X10], #4 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V13.S }[3], [X6], X24 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V26.D }[1], [X28], #8 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V1.D }[1], [X20], X30 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1b { Z20.B }, P1/Z, [X25] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z10.B }, P1/Z, [X16, #-1, MUL VL] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z31.H }, P1/Z, [X4] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z5.H }, P5/Z, [X8, #6, MUL VL] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z1.S }, P3/Z, [X12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z24.S }, P2/Z, [X28, #1, MUL VL] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z25.D }, P5/Z, [X2] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z0.D }, P6/Z, [X22, #5, MUL VL] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z7.B }, P0/Z, [X24, X11] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z26.H }, P5/Z, [X5, X21] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z22.S }, P3/Z, [X16, X12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z7.D }, P5/Z, [X18, X12] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z2.D }, P0/Z, [X15, Z18.D, UXTW] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1b { Z20.S }, P6/Z, [X2, Z0.S, SXTW] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ld1b { Z15.D }, P4/Z, [X23, Z9.D] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1b { Z8.S }, P4/Z, [Z25.S, #22] // LD1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1b { Z13.D }, P2/Z, [Z3.D, #30] // LD1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1d { Z21.D }, P1/Z, [X24, Z31.D, SXTW #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1d { Z7.D }, P0/Z, [X13, Z15.D, SXTW] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1d { Z14.D }, P1/Z, [X26, Z27.D, LSL #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1d { Z30.D }, P7/Z, [X14, Z16.D] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1d { Z22.D }, P1/Z, [Z15.D] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1d { Z8.D }, P4/Z, [Z12.D, #200] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z3.H }, P2/Z, [X21] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+ ld1h { Z15.H }, P0/Z, [X25, #-3, MUL VL] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+ ld1h { Z9.S }, P1/Z, [X17] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+ ld1h { Z1.S }, P3/Z, [X14, #5, MUL VL] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+ ld1h { Z10.D }, P3/Z, [X9] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+ ld1h { Z2.D }, P7/Z, [X1, #4, MUL VL] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+ ld1h { Z26.H }, P5/Z, [X10, X19, LSL #1] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z29.S }, P7/Z, [X23, X11, LSL #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z2.D }, P5/Z, [X30, X9, LSL #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z14.S }, P7/Z, [X14, Z28.S, SXTW #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z28.D }, P7/Z, [X8, Z9.D, SXTW #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z16.D }, P5/Z, [X7, Z9.D, UXTW] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z27.S }, P4/Z, [X4, Z7.S, UXTW] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ld1h { Z6.D }, P7/Z, [X30, Z26.D, LSL #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z11.D }, P2/Z, [X20, Z25.D] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z6.S }, P7/Z, [Z31.S] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1h { Z1.S }, P3/Z, [Z12.S, #8] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1h { Z7.D }, P7/Z, [Z9.D] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1h { Z13.D }, P3/Z, [Z5.D, #8] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1r { V8.8B }, [X23] // LD1R { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V4.8B }, [X25], #1 // LD1R { <Vt>.8B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V14.8B }, [X24], X14 // LD1R { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V8.16B }, [X24] // LD1R { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V21.16B }, [X30], #1 // LD1R { <Vt>.16B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V1.16B }, [X3], X9 // LD1R { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V28.4H }, [X9] // LD1R { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V10.4H }, [X27], #2 // LD1R { <Vt>.4H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V12.4H }, [X8], X20 // LD1R { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V3.8H }, [X16] // LD1R { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V27.8H }, [X18], #2 // LD1R { <Vt>.8H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V20.8H }, [X20], X4 // LD1R { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V10.2S }, [X20] // LD1R { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V28.2S }, [X8], #4 // LD1R { <Vt>.2S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V4.2S }, [X0], X12 // LD1R { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V11.4S }, [X3] // LD1R { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V18.4S }, [X3], #4 // LD1R { <Vt>.4S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V2.4S }, [X4], X1 // LD1R { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V3.1D }, [X15] // LD1R { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V16.1D }, [X2], #8 // LD1R { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V24.1D }, [X21], X3 // LD1R { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V18.2D }, [X0] // LD1R { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V8.2D }, [X18], #8 // LD1R { <Vt>.2D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V8.2D }, [X16], X28 // LD1R { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1rb { Z13.B }, P0/Z, [X9] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z30.B }, P6/Z, [X21, #28] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z10.H }, P1/Z, [X9] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z25.H }, P3/Z, [X26, #6] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z24.S }, P2/Z, [X19] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z16.S }, P1/Z, [X8, #54] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z17.D }, P7/Z, [X4] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z4.D }, P7/Z, [X20, #18] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rd { Z12.D }, P7/Z, [X20] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rd { Z19.D }, P5/Z, [X13, #384] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z13.H }, P7/Z, [X0] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z23.H }, P0/Z, [X18, #56] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z24.S }, P6/Z, [X27] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z6.S }, P7/Z, [X1, #84] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z3.D }, P4/Z, [X25] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z25.D }, P5/Z, [X5, #108] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqb { Z31.B }, P1/Z, [X6] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqb { Z21.B }, P7/Z, [X29, #112] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqb { Z7.B }, P6/Z, [X26, X26] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1rqd { Z10.D }, P0/Z, [X28] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqd { Z29.D }, P5/Z, [X6, #-16] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqd { Z5.D }, P6/Z, [X7, X8, LSL #3] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1rqh { Z29.H }, P3/Z, [X3] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqh { Z29.H }, P4/Z, [X30, #112] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqh { Z9.H }, P0/Z, [X23, X11, LSL #1] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load broadcast, scalar + scalar + S \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1rqw { Z11.S }, P0/Z, [X26] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqw { Z7.S }, P3/Z, [X16, #-80] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqw { Z2.S }, P0/Z, [X21, X23, LSL #2] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z6.H }, P6/Z, [X23] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z28.H }, P3/Z, [X21, #43] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z13.S }, P5/Z, [X14] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z26.S }, P3/Z, [X15, #4] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z23.D }, P2/Z, [X21] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z29.D }, P6/Z, [X14, #25] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z25.S }, P2/Z, [X4] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z30.S }, P5/Z, [X6, #124] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z24.D }, P4/Z, [X6] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z14.D }, P3/Z, [X20, #98] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsw { Z2.D }, P0/Z, [X23] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsw { Z18.D }, P7/Z, [X11, #0] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z12.S }, P7/Z, [X9] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z25.S }, P7/Z, [X17, #60] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z22.D }, P5/Z, [X1] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z2.D }, P3/Z, [X3, #36] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z28.H }, P6/Z, [X9] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z22.H }, P2/Z, [X19, #7, MUL VL] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z22.S }, P3/Z, [X23] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z2.S }, P6/Z, [X22, #-2, MUL VL] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z31.D }, P6/Z, [X10] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z23.D }, P5/Z, [X2, #-4, MUL VL] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z3.H }, P5/Z, [X10, X23] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z16.S }, P7/Z, [X27, X16] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z13.D }, P7/Z, [X28, X18] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z30.D }, P6/Z, [X22, Z27.D, UXTW] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sb { Z23.S }, P5/Z, [X17, Z10.S, UXTW] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ld1sb { Z23.D }, P2/Z, [X28, Z10.D] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sb { Z14.S }, P4/Z, [Z18.S, #24] // LD1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1sb { Z5.D }, P0/Z, [Z25.D, #31] // LD1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sh { Z8.S }, P3/Z, [X21] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z29.S }, P4/Z, [X11, #-4, MUL VL] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z13.D }, P6/Z, [X18] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z19.D }, P2/Z, [X29, #-3, MUL VL] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z28.S }, P0/Z, [X6, X28, LSL #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1sh { Z26.D }, P0/Z, [X7, X12, LSL #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1sh { Z22.S }, P3/Z, [X7, Z1.S, UXTW #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sh { Z3.D }, P6/Z, [X11, Z14.D, SXTW #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sh { Z27.D }, P3/Z, [X19, Z23.D, SXTW] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sh { Z12.S }, P5/Z, [X27, Z13.S, SXTW] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ld1sh { Z9.D }, P0/Z, [X22, Z8.D, LSL #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sh { Z22.D }, P0/Z, [X27, Z12.D] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sh { Z1.S }, P2/Z, [Z9.S, #44] // LD1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1sh { Z11.D }, P5/Z, [Z30.D, #34] // LD1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sw { Z7.D }, P1/Z, [X19] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sw { Z28.D }, P1/Z, [X26, #4, MUL VL] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sw { Z26.D }, P4/Z, [X20, X17, LSL #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sw { Z22.D }, P1/Z, [X14, Z23.D, SXTW #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sw { Z4.D }, P3/Z, [X20, Z15.D, SXTW] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sw { Z1.D }, P4/Z, [X20, Z23.D, LSL #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sw { Z2.D }, P7/Z, [X4, Z0.D] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sw { Z12.D }, P7/Z, [Z21.D] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1sw { Z27.D }, P3/Z, [Z10.D, #24] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z9.S }, P0/Z, [X18, Z9.S, SXTW #2] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z14.D }, P5/Z, [X26, Z2.D, UXTW #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z31.D }, P6/Z, [X17, Z2.D, UXTW] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z14.S }, P2/Z, [X18, Z28.S, SXTW] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ld1w { Z13.D }, P3/Z, [X5, Z11.D, LSL #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z24.D }, P3/Z, [X2, Z17.D] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z4.S }, P0/Z, [Z1.S] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1w { Z17.S }, P6/Z, [Z26.S, #60] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ld1w { Z31.D }, P7/Z, [Z22.D] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld1w { Z2.D }, P3/Z, [Z6.D, #116] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ld2 { V13.8B, V14.8B }, [X4] // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V20.8B, V21.8B }, [X11], #16 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V13.8B, V14.8B }, [X4], X7 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V26.16B, V27.16B }, [X16] // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+ ld2 { V15.16B, V16.16B }, [X3], #32 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V24.16B, V25.16B }, [X7], X30 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V0.4H, V1.4H }, [X21] // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V5.4H, V6.4H }, [X30], #16 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V5.4H, V6.4H }, [X22], X1 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V8.8H, V9.8H }, [X28] // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+ ld2 { V14.8H, V15.8H }, [X19], #32 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V28.8H, V29.8H }, [X26], X7 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V2.2S, V3.2S }, [X16] // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V23.2S, V24.2S }, [X5], #16 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V22.2S, V23.2S }, [X11], X12 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V22.4S, V23.4S }, [X4] // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+ ld2 { V27.4S, V28.4S }, [X18], #32 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V22.4S, V23.4S }, [X26], X29 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V22.2D, V23.2D }, [X17] // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, D \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+ ld2 { V12.2D, V13.2D }, [X19], #32 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, D \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V6.2D, V7.2D }, [X11], X24 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, D \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V29.B, V30.B }[3], [X1] // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V23.H, V24.H }[7], [X14] // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V26.S, V27.S }[1], [X17] // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V1.D, V2.D }[0], [X10] // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2 { V20.B, V21.B }[9], [X24], #2 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V29.B, V30.B }[6], [X18], X19 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V2.H, V3.H }[3], [X12], #4 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V11.H, V12.H }[3], [X18], X17 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V15.S, V16.S }[1], [X7], #8 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD load, 2 element, one lane, S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V29.S, V30.S }[1], [X12], X0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V1.D, V2.D }[1], [X3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2 { V10.D, V11.D }[1], [X18], X27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2b { Z9.B, Z10.B }, P2/Z, [X22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2b { Z28.B, Z29.B }, P3/Z, [X22, #4, MUL VL] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2b { Z26.B, Z27.B }, P1/Z, [X3, X12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 9 9 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2d { Z12.D, Z13.D }, P5/Z, [X24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2d { Z22.D, Z23.D }, P2/Z, [X21, #-2, MUL VL] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2d { Z22.D, Z23.D }, P6/Z, [X14, X4, LSL #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 9 9 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2h { Z5.H, Z6.H }, P5/Z, [X20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2h { Z27.H, Z28.H }, P7/Z, [X11, #14, MUL VL] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2h { Z18.H, Z19.H }, P3/Z, [X9, X17, LSL #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2]
+ ld2r { V10.8B, V11.8B }, [X20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V18.8B, V19.8B }, [X11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V28.8B, V29.8B }, [X30], X14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V10.16B, V11.16B }, [X23] // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V24.16B, V25.16B }, [X1], #2 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V20.16B, V21.16B }, [X11], X7 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V25.4H, V26.4H }, [X11] // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V28.4H, V29.4H }, [X18], #4 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V21.4H, V22.4H }, [X2], X17 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V23.8H, V24.8H }, [X10] // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V19.8H, V20.8H }, [X29], #4 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V13.8H, V14.8H }, [X13], X5 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V25.2S, V26.2S }, [X19] // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V5.2S, V6.2S }, [X28], #8 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V4.2S, V5.2S }, [X14], X19 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V8.4S, V9.4S }, [X17] // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V22.4S, V23.4S }, [X5], #8 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V29.4S, V30.4S }, [X4], X18 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V9.1D, V10.1D }, [X25] // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V15.1D, V16.1D }, [X26], #16 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, D-form, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V10.1D, V11.1D }, [X28], X26 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V26.2D, V27.2D }, [X8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+ ld2r { V14.2D, V15.2D }, [X3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2r { V24.2D, V25.2D }, [X6], X14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+ ld2w { Z21.S, Z22.S }, P4/Z, [X12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2w { Z29.S, Z30.S }, P2/Z, [X19, #6, MUL VL] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld2w { Z18.S, Z19.S }, P6/Z, [X22, X22, LSL #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 9 9 1.00 V1UnitV01[2],V1UnitL01[2]
+ ld3 { V8.8B, V9.8B, V10.8B }, [X0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V6.8B, V7.8B, V8.8B }, [X26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V20.8B, V21.8B, V22.8B }, [X25], X24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V15.16B, V16.16B, V17.16B }, [X5] // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+ ld3 { V19.16B, V20.16B, V21.16B }, [X3], #48 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V26.16B, V27.16B, V28.16B }, [X8], X29 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V15.4H, V16.4H, V17.4H }, [X8] // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V4.4H, V5.4H, V6.4H }, [X5], #24 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V24.4H, V25.4H, V26.4H }, [X25], X0 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V7.8H, V8.8H, V9.8H }, [X21] // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+ ld3 { V4.8H, V5.8H, V6.8H }, [X26], #48 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V12.8H, V13.8H, V14.8H }, [X0], X25 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V16.2S, V17.2S, V18.2S }, [X0] // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V9.2S, V10.2S, V11.2S }, [X1], #24 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V27.2S, V28.2S, V29.2S }, [X23], X4 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V12.4S, V13.4S, V14.4S }, [X25] // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+ ld3 { V12.4S, V13.4S, V14.4S }, [X27], #48 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V2.4S, V3.4S, V4.4S }, [X22], X21 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V10.2D, V11.2D, V12.2D }, [X18] // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, D \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+ ld3 { V25.2D, V26.2D, V27.2D }, [X4], #48 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, D \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V6.2D, V7.2D, V8.2D }, [X10], X24 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, D \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V17.B, V18.B, V19.B }[2], [X27] // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V18.H, V19.H, V20.H }[5], [X16] // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V1.S, V2.S, V3.S }[3], [X14] // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V5.D, V6.D, V7.D }[1], [X14] // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, D \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3 { V16.B, V17.B, V18.B }[3], [X15], #3 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V14.B, V15.B, V16.B }[4], [X23], X6 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V11.H, V12.H, V13.H }[1], [X28], #6 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V4.H, V5.H, V6.H }[2], [X5], X15 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V26.S, V27.S, V28.S }[0], [X14], #12 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD load, 3 element, one lane, S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V1.S, V2.S, V3.S }[0], [X26], X20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V14.D, V15.D, V16.D }[1], [X30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3 { V23.D, V24.D, V25.D }[0], [X24], X14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3b { Z29.B, Z30.B, Z31.B }, P3/Z, [X17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3b { Z23.B, Z24.B, Z25.B }, P7/Z, [X12, #18, MUL VL] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3b { Z23.B, Z24.B, Z25.B }, P3/Z, [X12, X12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+ ld3d { Z20.D, Z21.D, Z22.D }, P2/Z, [X6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3d { Z1.D, Z2.D, Z3.D }, P2/Z, [X9, #-15, MUL VL] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3d { Z13.D, Z14.D, Z15.D }, P6/Z, [X27, X30, LSL #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+ ld3h { Z26.H, Z27.H, Z28.H }, P1/Z, [X29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3h { Z14.H, Z15.H, Z16.H }, P3/Z, [X18, #9, MUL VL] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3h { Z5.H, Z6.H, Z7.H }, P3/Z, [X6, X21, LSL #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+ ld3r { V24.8B, V25.8B, V26.8B }, [X10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V14.8B, V15.8B, V16.8B }, [X11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V22.8B, V23.8B, V24.8B }, [X0], X11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V17.16B, V18.16B, V19.16B }, [X3] // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V7.16B, V8.16B, V9.16B }, [X29], #3 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V3.16B, V4.16B, V5.16B }, [X20], X5 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V3.4H, V4.4H, V5.4H }, [X1] // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V8.4H, V9.4H, V10.4H }, [X3], #6 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V4.4H, V5.4H, V6.4H }, [X0], X28 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V6.8H, V7.8H, V8.8H }, [X28] // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V4.8H, V5.8H, V6.8H }, [X11], #6 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V3.8H, V4.8H, V5.8H }, [X17], X0 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V18.2S, V19.2S, V20.2S }, [X24] // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V8.2S, V9.2S, V10.2S }, [X22], #12 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V12.2S, V13.2S, V14.2S }, [X0], X14 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V28.4S, V29.4S, V30.4S }, [X2] // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V21.4S, V22.4S, V23.4S }, [X22], #12 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V28.4S, V29.4S, V30.4S }, [X13], X25 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V1.1D, V2.1D, V3.1D }, [X28] // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, D \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V0.1D, V1.1D, V2.1D }, [X7], #24 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, D-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V22.1D, V23.1D, V24.1D }, [X9], X15 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V8.2D, V9.2D, V10.2D }, [X3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+ ld3r { V3.2D, V4.2D, V5.2D }, [X25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3r { V8.2D, V9.2D, V10.2D }, [X18], X13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+ ld3w { Z23.S, Z24.S, Z25.S }, P1/Z, [X8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3w { Z6.S, Z7.S, Z8.S }, P4/Z, [X0, #18, MUL VL] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ ld3w { Z27.S, Z28.S, Z29.S }, P3/Z, [X3, X6, LSL #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+ ld4 { V6.8B, V7.8B, V8.8B, V9.8B }, [X27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V20.8B, V21.8B, V22.8B, V23.8B }, [X10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V18.8B, V19.8B, V20.8B, V21.8B }, [X24], X11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V11.16B, V12.16B, V13.16B, V14.16B }, [X5] // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+ ld4 { V10.16B, V11.16B, V12.16B, V13.16B }, [X12], #64 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V12.16B, V13.16B, V14.16B, V15.16B }, [X4], X17 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V21.4H, V22.4H, V23.4H, V24.4H }, [X14] // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V10.4H, V11.4H, V12.4H, V13.4H }, [X19], #32 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V5.4H, V6.4H, V7.4H, V8.4H }, [X15], X17 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V9.8H, V10.8H, V11.8H, V12.8H }, [X1] // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+ ld4 { V2.8H, V3.8H, V4.8H, V5.8H }, [X0], #64 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V4.8H, V5.8H, V6.8H, V7.8H }, [X17], X17 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V23.2S, V24.2S, V25.2S, V26.2S }, [X24] // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V25.2S, V26.2S, V27.2S, V28.2S }, [X3], #32 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V22.2S, V23.2S, V24.2S, V25.2S }, [X14], X15 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V17.4S, V18.4S, V19.4S, V20.4S }, [X4] // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+ ld4 { V25.4S, V26.4S, V27.4S, V28.4S }, [X19], #64 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V4.4S, V5.4S, V6.4S, V7.4S }, [X28], X3 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V2.2D, V3.2D, V4.2D, V5.2D }, [X24] // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, D \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+ ld4 { V18.2D, V19.2D, V20.2D, V21.2D }, [X0], #64 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, D \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V27.2D, V28.2D, V29.2D, V30.2D }, [X27], X4 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, D \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+ ld4 { V4.B, V5.B, V6.B, V7.B }[12], [X27] // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V5.H, V6.H, V7.H, V8.H }[0], [X4] // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V0.S, V1.S, V2.S, V3.S }[0], [X26] // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V2.D, V3.D, V4.D, V5.D }[0], [X29] // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, D \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4 { V26.B, V27.B, V28.B, V29.B }[4], [X13], #4 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V10.B, V11.B, V12.B, V13.B }[11], [X24], X21 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V8.H, V9.H, V10.H, V11.H }[0], [X17], #8 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V21.H, V22.H, V23.H, V24.H }[2], [X21], X24 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V1.S, V2.S, V3.S, V4.S }[1], [X28], #16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD load, 4 element, one lane, S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V20.S, V21.S, V22.S, V23.S }[1], [X27], X16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V18.D, V19.D, V20.D, V21.D }[1], [X26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4 { V8.D, V9.D, V10.D, V11.D }[0], [X23], X0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4b { Z16.B, Z17.B, Z18.B, Z19.B }, P3/Z, [X23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P5/Z, [X3, #12, MUL VL] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P4/Z, [X20, X12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+ ld4d { Z26.D, Z27.D, Z28.D, Z29.D }, P7/Z, [X10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4d { Z27.D, Z28.D, Z29.D, Z30.D }, P0/Z, [X6, #24, MUL VL] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4d { Z7.D, Z8.D, Z9.D, Z10.D }, P4/Z, [X25, X8, LSL #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+ ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P4/Z, [X19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P1/Z, [X16, #-8, MUL VL] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4h { Z10.H, Z11.H, Z12.H, Z13.H }, P2/Z, [X8, X28, LSL #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+ ld4r { V20.8B, V21.8B, V22.8B, V23.8B }, [X23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V24.8B, V25.8B, V26.8B, V27.8B }, [X15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V4.8B, V5.8B, V6.8B, V7.8B }, [X26], X6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V1.16B, V2.16B, V3.16B, V4.16B }, [X25] // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V1.16B, V2.16B, V3.16B, V4.16B }, [X14], #4 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V0.16B, V1.16B, V2.16B, V3.16B }, [X29], X11 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V16.4H, V17.4H, V18.4H, V19.4H }, [X6] // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V14.4H, V15.4H, V16.4H, V17.4H }, [X0], #8 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V21.4H, V22.4H, V23.4H, V24.4H }, [X25], X22 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V4.8H, V5.8H, V6.8H, V7.8H }, [X23] // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V25.8H, V26.8H, V27.8H, V28.8H }, [X7], #8 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V13.8H, V14.8H, V15.8H, V16.8H }, [X19], X27 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V0.2S, V1.2S, V2.2S, V3.2S }, [X30] // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V23.2S, V24.2S, V25.2S, V26.2S }, [X29], #16 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V19.2S, V20.2S, V21.2S, V22.2S }, [X9], X0 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V7.4S, V8.4S, V9.4S, V10.4S }, [X23] // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V9.4S, V10.4S, V11.4S, V12.4S }, [X3], #16 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V3.4S, V4.4S, V5.4S, V6.4S }, [X10], X22 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V7.1D, V8.1D, V9.1D, V10.1D }, [X26] // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, D \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V11.1D, V12.1D, V13.1D, V14.1D }, [X5], #32 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, D-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V12.1D, V13.1D, V14.1D, V15.1D }, [X30], X17 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V7.2D, V8.2D, V9.2D, V10.2D }, [X8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+ ld4r { V12.2D, V13.2D, V14.2D, V15.2D }, [X2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4r { V17.2D, V18.2D, V19.2D, V20.2D }, [X21], X13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+ ld4w { Z18.S, Z19.S, Z20.S, Z21.S }, P6/Z, [X4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4w { Z21.S, Z22.S, Z23.S, Z24.S }, P5/Z, [X16, #-8, MUL VL] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ ld4w { Z25.S, Z26.S, Z27.S, Z28.S }, P2/Z, [X23, X8, LSL #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+ ldapur W7, [X24] // LDAPUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapur W25, [X29, #68] // LDAPUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapur X20, [X13] // LDAPUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapur X29, [X4, #-199] // LDAPUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapurb W13, [X17] // LDAPURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapurb W20, [X19, #124] // LDAPURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapurh W3, [X22] // LDAPURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapurh W1, [X6, #113] // LDAPURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursb W7, [X8] // LDAPURSB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursb W29, [X22, #-76] // LDAPURSB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursb X29, [X7] // LDAPURSB <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursb X6, [X0, #-254] // LDAPURSB <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursh W17, [X19] // LDAPURSH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursh W26, [X18, #-114] // LDAPURSH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursh X3, [X3] // LDAPURSH <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursh X13, [X25, #30] // LDAPURSH <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursw X3, [X18] // LDAPURSW <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursw X21, [X25, #0] // LDAPURSW <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldar W9, [X20] // LDAR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldar W15, [X0, #0] // LDAR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldar X5, [X25] // LDAR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldar X11, [X2, #0] // LDAR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldarb W16, [X21] // LDARB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldarb W14, [X30, #0] // LDARB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldarh W26, [X25] // LDARH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldarh W21, [X2, #0] // LDARH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxp W13, W22, [X28] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxp W11, W19, [X20, #0] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxp X25, X8, [X16] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxp X28, X17, [X25, #0] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxr W4, [X5] // LDAXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxr W10, [X7, #0] // LDAXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxr X22, [X21] // LDAXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxr X7, [X1, #0] // LDAXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxrb W12, [X30] // LDAXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxrb W27, [X2, #0] // LDAXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxrh W30, [X16] // LDAXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxrh W14, [X3, #0] // LDAXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldff1b { Z10.B }, P3/Z, [X10] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z2.B }, P5/Z, [X28, X2] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z2.H }, P0/Z, [X14] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z30.H }, P3/Z, [X25, X18] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z17.S }, P5/Z, [X24] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z17.S }, P7/Z, [X11, X15] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z9.D }, P2/Z, [X3] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z5.D }, P2/Z, [X6, X8] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z7.D }, P3/Z, [X27, Z19.D, SXTW] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1b { Z13.S }, P3/Z, [X24, Z25.S, SXTW] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ldff1b { Z27.D }, P0/Z, [X13, Z16.D] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1b { Z7.S }, P7/Z, [Z16.S] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1b { Z11.S }, P5/Z, [Z8.S, #25] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1b { Z2.D }, P7/Z, [Z19.D] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1b { Z3.D }, P5/Z, [Z0.D, #11] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1d { Z21.D }, P2/Z, [X20] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1d { Z9.D }, P3/Z, [X28, X30, LSL #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1d { Z21.D }, P4/Z, [X11, Z12.D, SXTW #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1d { Z6.D }, P4/Z, [X15, Z1.D, UXTW] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1d { Z12.D }, P7/Z, [X11, Z28.D, LSL #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1d { Z26.D }, P4/Z, [X30, Z5.D] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1d { Z10.D }, P5/Z, [Z10.D] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1d { Z21.D }, P6/Z, [Z3.D, #48] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z14.H }, P3/Z, [X22] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z15.H }, P2/Z, [X24, X8, LSL #1] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z23.S }, P0/Z, [X12] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z18.S }, P0/Z, [X7, X25, LSL #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z16.D }, P0/Z, [X11] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z25.D }, P3/Z, [X24, X19, LSL #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z9.S }, P2/Z, [X3, Z24.S, SXTW #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z7.D }, P0/Z, [X8, Z17.D, UXTW #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z9.D }, P5/Z, [X4, Z10.D, SXTW] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z4.S }, P4/Z, [X6, Z27.S, UXTW] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ldff1h { Z25.D }, P1/Z, [X29, Z6.D, LSL #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z10.D }, P7/Z, [X1, Z26.D] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z4.S }, P1/Z, [Z27.S] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1h { Z5.S }, P3/Z, [Z8.S, #62] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1h { Z16.D }, P5/Z, [Z10.D] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1h { Z15.D }, P2/Z, [Z19.D, #34] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sb { Z0.H }, P2/Z, [X2] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z29.H }, P1/Z, [X16, X21] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z20.S }, P7/Z, [X8] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z8.S }, P2/Z, [X4, X14] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z11.D }, P4/Z, [X6] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z17.D }, P4/Z, [X16, X10] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z13.D }, P2/Z, [X28, Z8.D, SXTW] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sb { Z3.S }, P2/Z, [X26, Z24.S, SXTW] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ldff1sb { Z10.D }, P7/Z, [X20, Z6.D] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sb { Z18.S }, P3/Z, [Z9.S] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1sb { Z25.S }, P2/Z, [Z29.S, #25] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1sb { Z8.D }, P0/Z, [Z24.D] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sb { Z7.D }, P0/Z, [Z4.D, #9] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z2.S }, P2/Z, [X6] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z9.S }, P3/Z, [X30, X16, LSL #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z7.D }, P4/Z, [X30] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z1.D }, P0/Z, [X29, X0, LSL #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z25.S }, P4/Z, [X5, Z9.S, SXTW #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z17.D }, P3/Z, [X0, Z25.D, SXTW #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z12.D }, P7/Z, [X5, Z15.D, SXTW] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z8.S }, P5/Z, [X3, Z21.S, UXTW] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ldff1sh { Z14.D }, P6/Z, [X17, Z27.D, LSL #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z23.D }, P4/Z, [X22, Z0.D] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z6.S }, P4/Z, [Z6.S] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1sh { Z3.S }, P7/Z, [Z26.S, #16] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1sh { Z25.D }, P3/Z, [Z17.D] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sh { Z2.D }, P3/Z, [Z31.D, #26] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sw { Z16.D }, P2/Z, [X8] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sw { Z27.D }, P1/Z, [X6, X11, LSL #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sw { Z27.D }, P3/Z, [X5, Z20.D, UXTW #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sw { Z15.D }, P1/Z, [X13, Z26.D, SXTW] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sw { Z24.D }, P2/Z, [X7, Z23.D, LSL #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sw { Z8.D }, P3/Z, [X5, Z22.D] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sw { Z16.D }, P6/Z, [Z12.D] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1sw { Z3.D }, P1/Z, [Z13.D, #60] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z2.S }, P5/Z, [X13] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z9.S }, P3/Z, [X16, X19, LSL #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z31.D }, P6/Z, [X3] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z30.D }, P4/Z, [X25, X12, LSL #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z27.S }, P6/Z, [X10, Z17.S, UXTW #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z8.D }, P4/Z, [X28, Z31.D, SXTW #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z1.D }, P0/Z, [X23, Z14.D, UXTW] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z17.S }, P5/Z, [X8, Z6.S, UXTW] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+ ldff1w { Z19.D }, P3/Z, [X7, Z18.D, LSL #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z23.D }, P2/Z, [X16, Z4.D] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z24.S }, P6/Z, [Z24.S] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1w { Z20.S }, P0/Z, [Z6.S, #36] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+ ldff1w { Z21.D }, P5/Z, [Z12.D] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldff1w { Z29.D }, P2/Z, [Z11.D, #40] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+ ldnf1b { Z17.B }, P5/Z, [X20] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z8.B }, P5/Z, [X26, #1, MUL VL] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z4.H }, P3/Z, [X25] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z31.H }, P3/Z, [X7, #0, MUL VL] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z2.S }, P7/Z, [X25] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z17.S }, P5/Z, [X29, #2, MUL VL] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z6.D }, P5/Z, [X26] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z18.D }, P4/Z, [X20, #5, MUL VL] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1d { Z5.D }, P6/Z, [X6] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1d { Z19.D }, P0/Z, [X15, #-1, MUL VL] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z7.H }, P5/Z, [X22] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z27.H }, P1/Z, [X2, #6, MUL VL] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z18.S }, P2/Z, [X13] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z8.S }, P2/Z, [X29, #-8, MUL VL] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z26.D }, P5/Z, [X5] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z20.D }, P0/Z, [X29, #-6, MUL VL] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z17.H }, P0/Z, [X23] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z14.H }, P0/Z, [X18, #-5, MUL VL] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z23.S }, P0/Z, [X3] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z13.S }, P7/Z, [X15, #-8, MUL VL] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z14.D }, P4/Z, [X7] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z13.D }, P7/Z, [X25, #6, MUL VL] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z28.S }, P4/Z, [X9] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z3.S }, P1/Z, [X14, #-2, MUL VL] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z1.D }, P2/Z, [X0] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z14.D }, P3/Z, [X8, #3, MUL VL] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sw { Z8.D }, P4/Z, [X9] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sw { Z28.D }, P4/Z, [X13, #-7, MUL VL] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z15.S }, P5/Z, [X27] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z28.S }, P0/Z, [X28, #-1, MUL VL] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z28.D }, P5/Z, [X13] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z4.D }, P0/Z, [X12, #2, MUL VL] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnp S1, S13, [X4] // LDNP <St1>, <St2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp S30, S5, [X11, #-184] // LDNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp D3, D12, [X21] // LDNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp D12, D5, [X7, #-424] // LDNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp Q0, Q14, [X24] // LDNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Load vector pair, immed offset, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ldnp Q4, Q1, [X27, #80] // LDNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ldnp W4, W20, [X25] // LDNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldnp W30, W4, [X21, #-196] // LDNP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldnp X7, X30, [X18] // LDNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+ ldnp X5, X19, [X1, #-240] // LDNP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+ ldnt1b { Z9.B }, P2/Z, [X21] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1b { Z30.B }, P5/Z, [X30, #-3, MUL VL] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1b { Z10.B }, P5/Z, [X12, X17] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldnt1d { Z27.D }, P2/Z, [X12] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1d { Z5.D }, P7/Z, [X22, #6, MUL VL] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1d { Z28.D }, P2/Z, [X14, X0, LSL #3] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldnt1h { Z11.H }, P0/Z, [X21] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1h { Z19.H }, P1/Z, [X24, #-5, MUL VL] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1h { Z27.H }, P0/Z, [X22, X24, LSL #1] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldnt1w { Z27.S }, P4/Z, [X19] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1w { Z15.S }, P0/Z, [X22, #3, MUL VL] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1w { Z25.S }, P4/Z, [X12, X21, LSL #2] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldp S19, S15, [X24], #-64 // LDP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp D9, D1, [X20], #296 // LDP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp Q18, Q24, [X11], #144 // LDP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Load vector pair, immed post-index, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ldp S10, S30, [X0, #-4]! // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp D26, D11, [X16, #-304]! // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp Q18, Q12, [X25, #960]! // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Load vector pair, immed pre-index, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+ ldp S12, S31, [X20, #-192] // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldp D26, D6, [X22, #-144] // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldp Q5, Q19, [X9, #-448] // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+ ldp W10, W18, [X16], #-96 // LDP <Wt1>, <Wt2>, [<Xn|SP>], #<imm32> \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldp X13, X16, [X11], #288 // LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm64> \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 3 4 4 1.50 V1UnitI, V1UnitL[2]
+ ldp W7, W16, [X13, #-116]! // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>]! \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldp X26, X3, [X14, #16]! // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>]! \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 3 4 4 1.50 V1UnitI, V1UnitL[2]
+ ldp W25, W23, [X22] // LDP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldp W3, W21, [X17, #40] // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldp X6, X25, [X17] // LDP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+ ldp X9, X21, [X3, #104] // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+ ldpsw X23, X26, [X30], #-160 // LDPSW <Xt1>, <Xt2>, [<Xn|SP>], #<imm> \\ Load pair, immed post-index or immed pre-index, signed words \\ 3 5 5 2.00 V1UnitI[2], V1UnitL
+ ldpsw X19, X28, [X21, #-248]! // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! \\ Load pair, immed post-index or immed pre-index, signed words \\ 3 5 5 2.00 V1UnitI[2], V1UnitL
+ ldpsw X13, X20, [X15] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 3.00 V1UnitI, V1UnitL
+ ldpsw X9, X27, [X8, #80] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 3.00 V1UnitI, V1UnitL
+ ldr W13, [X2], #-22 // LDR <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr X6, [X9], #248 // LDR <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr W20, [X10, #13]! // LDR <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr X23, [X20, #-24]! // LDR <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr W19, [X15, #11620] // LDR <Wt>, [<Xn|SP>, #<pimm32>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldr X2, [X13, #18528] // LDR <Xt>, [<Xn|SP>, #<pimm64>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldr B0, [X15], #-18 // LDR <Bt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr H25, [X4], #-156 // LDR <Ht>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr S28, [X6], #162 // LDR <St>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr D23, [X8], #-176 // LDR <Dt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr Q5, [X18], #70 // LDR <Qt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr B9, [X0, #-104]! // LDR <Bt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr H24, [X10, #34]! // LDR <Ht>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr S29, [X5, #168]! // LDR <St>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr D22, [X9, #-1]! // LDR <Dt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr Q27, [X20, #-204]! // LDR <Qt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr B23, [X0, #349] // LDR <Bt>, [<Xn|SP>, #<pimmb>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr H1, [X15, #3540] // LDR <Ht>, [<Xn|SP>, #<pimmh>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr S14, [X7, #16208] // LDR <St>, [<Xn|SP>, #<pimms>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr D4, [X17, #7368] // LDR <Dt>, [<Xn|SP>, #<pimmd>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr Q14, [X6, #4624] // LDR <Qt>, [<Xn|SP>, #<pimmq>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr W15, test // LDR <Wt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ ldr X26, test // LDR <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ ldr S17, test // LDR <St>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+ ldr D10, test // LDR <Dt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+ ldr Q22, test // LDR <Qt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+ ldr P0, [X28] // LDR <Pt>, [<Xn|SP>] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+ ldr P1, [X6, #-53, MUL VL] // LDR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+ ldr W30, [X10, X0] // LDR <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldr X13, [X4, X21] // LDR <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldr W25, [X18, W26, UXTW] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr X20, [X29, W26, UXTW] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr W26, [X12, W0, UXTW #2] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X13, [X2, W10, UXTW #3] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr W13, [X18, W19, SXTW] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr X5, [X26, W12, SXTW] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr W16, [X9, W24, SXTW #2] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X21, [X29, W4, SXTW #3] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr W19, [X15, X1, SXTX] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr X25, [X4, X20, SXTX] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr W3, [X1, X17, SXTX #2] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X2, [X13, X26, SXTX #3] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr W1, [X18, X17, LSL #2] // LDR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X22, [X17, X3, LSL #3] // LDR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr B8, [X30, X10] // LDR <Bt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr B25, [X21, W8, UXTW] // LDR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr B7, [X9, W29, SXTW] // LDR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr B31, [X17, X6, SXTX] // LDR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr H11, [X13, X9] // LDR <Ht>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 2 7 7 3.00 V1UnitL
+ ldr H6, [X4, W4, UXTW] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+ ldr H28, [X3, W28, SXTW] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+ ldr H3, [X15, X19, SXTX] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+ ldr H24, [X27, W5, UXTW #1] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr H22, [X28, W11, SXTW #1] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr H3, [X18, X26, SXTX #1] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr H8, [X23, X19, LSL #1] // LDR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr S21, [X1, X29] // LDR <St>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr S12, [X30, W5, UXTW] // LDR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr S15, [X2, W20, SXTW] // LDR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr S11, [X25, X20, SXTX] // LDR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr S9, [X24, W27, UXTW #2] // LDR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr S7, [X2, W5, SXTW #2] // LDR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr S13, [X19, X28, SXTX #2] // LDR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr S21, [X10, X4, LSL #2] // LDR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D10, [X23, X10] // LDR <Dt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr D24, [X26, W7, UXTW] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr D28, [X12, W2, SXTW] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr D0, [X7, X29, SXTX] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr D24, [X9, W27, UXTW #3] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D5, [X17, W2, SXTW #3] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D2, [X5, X16, SXTX #3] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D2, [X29, X18, LSL #3] // LDR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr Q9, [X13, X16] // LDR <Qt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 2 7 7 3.00 V1UnitL
+ ldr Q16, [X16, W1, UXTW] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+ ldr Q1, [X17, W5, SXTW] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+ ldr Q1, [X8, X9, SXTX] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+ ldr Q23, [X26, W23, UXTW #4] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Q3, [X18, W23, SXTW #4] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Q2, [X28, X30, SXTX #4] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Q21, [X23, X27, LSL #4] // LDR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Z26, [X4] // LDR <Zt>, [<Xn|SP>] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+ ldr Z18, [X27, #16, MUL VL] // LDR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+ ldrb W4, [X17], #0 // LDRB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrb W27, [X23, #114]! // LDRB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrb W26, [X19] // LDRB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrb W29, [X18, #3179] // LDRB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrb W16, [X25, X9] // LDRB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrb W9, [X15, W19, UXTW] // LDRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrb W25, [X7, W0, SXTW] // LDRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrb W0, [X18, X21, SXTX] // LDRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W9, [X1], #-2 // LDRH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrh W12, [X29, #-41]! // LDRH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrh W28, [X3] // LDRH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrh W27, [X19, #3156] // LDRH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrh W20, [X25, X15] // LDRH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrh W22, [X0, W24, UXTW] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W6, [X17, W18, SXTW] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W21, [X13, X30, SXTX] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W14, [X21, W21, UXTW #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrh W0, [X29, W13, SXTW #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrh W11, [X20, X0, SXTX #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrh W12, [X17, X27, LSL #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsb W12, [X13], #-250 // LDRSB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb X10, [X2], #-229 // LDRSB <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb W5, [X2, #-169]! // LDRSB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb X28, [X12, #-46]! // LDRSB <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb W5, [X26] // LDRSB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb W24, [X0, #3862] // LDRSB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb X6, [X0] // LDRSB <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb X20, [X0, #653] // LDRSB <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb W30, [X22, X21] // LDRSB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsb W24, [X2, W14, UXTW] // LDRSB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb W7, [X1, W8, SXTW] // LDRSB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb W4, [X8, X25, SXTX] // LDRSB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb X12, [X28, X27] // LDRSB <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsb X10, [X5, W9, UXTW] // LDRSB <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb X19, [X23, W24, SXTW] // LDRSB <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb X20, [X10, X13, SXTX] // LDRSB <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W5, [X0], #-115 // LDRSH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh X30, [X18], #-50 // LDRSH <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh W27, [X15, #-45]! // LDRSH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh X14, [X24, #27]! // LDRSH <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh W18, [X13] // LDRSH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh W11, [X27, #4094] // LDRSH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh X19, [X26] // LDRSH <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh X19, [X9, #6652] // LDRSH <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh W18, [X30, X24] // LDRSH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsh W13, [X25, W7, UXTW] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W3, [X16, W28, SXTW] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W0, [X13, X14, SXTX] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W0, [X5, W21, UXTW #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh W26, [X6, W29, SXTW #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh W22, [X26, X15, SXTX #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh W26, [X20, X21, LSL #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh X4, [X9, X24] // LDRSH <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsh X25, [X8, W13, UXTW] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh X25, [X20, W10, SXTW] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh X6, [X13, X10, SXTX] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh X15, [X0, W28, UXTW #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh X19, [X9, W15, SXTW #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh X1, [X17, X26, SXTX #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsh X7, [X29, X17, LSL #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 1 4 4 3.00 V1UnitL
+ ldrsw X4, [X21], #-93 // LDRSW <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsw X6, [X28, #96]! // LDRSW <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsw X1, [X23] // LDRSW <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsw X6, [X19, #4552] // LDRSW <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsw X20, test // LDRSW <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ ldrsw X21, [X25, X7] // LDRSW <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsw X12, [X28, W12, UXTW] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsw X22, [X26, W21, SXTW] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsw X0, [X21, X19, SXTX] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsw X23, [X17, W19, UXTW #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldrsw X23, [X30, W11, SXTW #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldrsw X29, [X12, X5, SXTX #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldrsw X3, [X1, X17, LSL #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldtr W12, [X9] // LDTR <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtr W9, [X3, #-55] // LDTR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtr X9, [X9] // LDTR <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtr X25, [X1, #103] // LDTR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrb W27, [X7] // LDTRB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrb W8, [X1, #-90] // LDTRB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrh W13, [X21] // LDTRH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrh W10, [X15, #-67] // LDTRH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb W15, [X19] // LDTRSB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb W28, [X19, #-202] // LDTRSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb X17, [X6] // LDTRSB <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb X0, [X11, #180] // LDTRSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh W19, [X26] // LDTRSH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh W16, [X28, #-233] // LDTRSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh X26, [X22] // LDTRSH <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh X27, [X19, #-76] // LDTRSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsw X23, [X28] // LDTRSW <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsw X26, [X21, #45] // LDTRSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldur B24, [X3] // LDUR <Bt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur B9, [X25, #240] // LDUR <Bt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur H29, [X21] // LDUR <Ht>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur H5, [X23, #-5] // LDUR <Ht>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur S12, [X14] // LDUR <St>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur S22, [X10, #108] // LDUR <St>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur D16, [X14] // LDUR <Dt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur D22, [X24, #-198] // LDUR <Dt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur Q25, [X9] // LDUR <Qt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur Q5, [X24, #233] // LDUR <Qt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur W19, [X30] // LDUR <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldur W24, [X12, #202] // LDUR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldur X0, [X3] // LDUR <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldur X14, [X14, #17] // LDUR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurb W9, [X24] // LDURB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurb W12, [X5, #92] // LDURB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurh W27, [X14] // LDURH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurh W13, [X30, #-173] // LDURH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb W5, [X8] // LDURSB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb W21, [X10, #172] // LDURSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb X19, [X15] // LDURSB <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb X16, [X11, #-173] // LDURSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh W21, [X12] // LDURSH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh W16, [X18, #203] // LDURSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh X4, [X28] // LDURSH <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh X5, [X3, #-133] // LDURSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursw X21, [X7] // LDURSW <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursw X11, [X16, #169] // LDURSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldxp W23, W14, [X17] // LDXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxp W2, W8, [X21, #0] // LDXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxp X5, X6, [X30] // LDXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxp X10, X26, [X6, #0] // LDXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxr W4, [X9] // LDXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxr W7, [X3, #0] // LDXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxr X6, [X27] // LDXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxr X3, [X4, #0] // LDXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxrb W17, [X21] // LDXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxrb W14, [X3, #0] // LDXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxrh W14, [X1] // LDXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxrh W24, [X11, #0] // LDXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ lsl W25, W0, #22 // LSL <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsl X27, X7, #56 // LSL <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsl Z1.B, P1/M, Z1.B, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z7.H, P3/M, Z7.H, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z10.S, P3/M, Z10.S, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z21.D, P7/M, Z21.D, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z13.B, Z4.B, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z11.H, Z16.H, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z16.S, Z11.S, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z18.D, Z4.D, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl W4, W9, W12 // LSL <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ lsl X7, X29, X22 // LSL <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ lsl Z3.D, P2/M, Z3.D, Z15.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z3.S, P6/M, Z3.S, Z8.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z19.S, Z25.S, Z25.D // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lslr Z3.H, P5/M, Z3.H, Z23.H // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lslv W6, W8, W2 // LSLV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ lslv X7, X26, X21 // LSLV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ lsr W0, W0, #30 // LSR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr X23, X24, #23 // LSR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr Z21.B, P5/M, Z21.B, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z1.H, P4/M, Z1.H, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z24.S, P7/M, Z24.S, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z13.D, P3/M, Z13.D, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z3.B, Z11.B, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z5.H, Z12.H, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z21.S, Z16.S, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z21.D, Z15.D, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr W17, W20, W15 // LSR <Wd>, <Wn>, <Wm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr X24, X4, X20 // LSR <Xd>, <Xn>, <Xm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr Z30.D, P3/M, Z30.D, Z28.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z18.H, P3/M, Z18.H, Z29.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z7.H, Z30.H, Z11.D // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsrr Z14.B, P1/M, Z14.B, Z16.B // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsrv W0, W28, W19 // LSRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ lsrv X16, X22, X19 // LSRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ mad Z17.B, P7/M, Z4.B, Z5.B // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mad Z29.H, P4/M, Z31.H, Z18.H // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mad Z7.S, P4/M, Z5.S, Z29.S // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mad Z28.D, P7/M, Z18.D, Z2.D // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ madd W15, W9, W9, W29 // MADD <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ madd X29, X22, X21, X21 // MADD <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ mla V15.8H, V22.8H, V4.H[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mla V28.2S, V10.2S, V2.S[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mla V31.4S, V18.4S, V27.4S // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mla Z1.B, P0/M, Z3.B, Z3.B // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mla Z21.H, P2/M, Z31.H, Z30.H // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mla Z24.S, P3/M, Z11.S, Z9.S // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mla Z2.D, P0/M, Z12.D, Z5.D // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mls V25.8H, V29.8H, V0.H[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mls V22.2S, V29.2S, V0.S[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mls V26.4S, V5.4S, V28.4S // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mls Z11.B, P1/M, Z28.B, Z6.B // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mls Z31.H, P0/M, Z25.H, Z24.H // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mls Z1.S, P5/M, Z7.S, Z13.S // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mls Z2.D, P1/M, Z17.D, Z10.D // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mneg W14, W30, W30 // MNEG <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mneg X21, X3, X9 // MNEG <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mov Z9.S, P2/M, S10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z17.B, Z29.B[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z26.H, Z7.H[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z14.S, Z21.S[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z22.D, Z14.D[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z21.S, S25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov WSP, #0xe00 // MOV <Wd|WSP>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov X3, #0x1e00 // MOV <Xd|SP>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov V30.B[12], V17.B[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V10.H[3], V17.H[5] // MOV <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V19.S[2], V2.S[1] // MOV <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V21.D[1], V16.D[0] // MOV <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V5.B[12], W23 // MOV <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov V27.H[6], W6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov V21.S[0], W21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov V13.D[0], X10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov Z30.B, P7/M, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z30.D, P7/M, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z10.H, P5/M, #72, LSL #0 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z19.B, P6/Z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z6.D, P1/Z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z12.D, P7/Z, #40, LSL #8 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z30.B, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z2.H, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z20.H, #82, LSL #8 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov W24, #0xe00 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov X15, #0xe00 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov P0.B, P0/M, P6.B // MOV <Pd>.B, <Pg>/M, <Pn>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+ mov P3.B, P7/Z, P2.B // MOV <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ mov W21, W11 // MOV <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov X14, X0 // MOV <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov B15, V21.B[8] // MOV B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov H13, V17.H[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov S7, V11.S[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov D27, V24.D[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov Z12.D, P5/M, X24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ mov Z31.D, P6/M, SP // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ mov Z19.B, W27 // MOV <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ mov Z17.H, WSP // MOV <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ mov W13, V12.S[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ mov X30, V18.D[0] // MOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ mov WSP, WSP // MOV <Wd|WSP>, <Wn|WSP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov X1, X11 // MOV <Xd|SP>, <Xn|SP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov V12.16B, V6.16B // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ mov Z1.D, P3/M, Z6.D // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+ mov Z24.D, Z25.D // MOV <Zd>.D, <Zn>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ mov W30, #0xe00 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov X4, #0xe00 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov Z14.B, #0x70 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z8.H, #0x60 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z2.S, #0x2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z6.D, #0x4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov P2.B, P5.B // MOV <Pd>.B, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ movi V7.16B, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V14.8H, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V13.4H, #74, LSL #8 // MOVI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V19.2S, #226 // MOVI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V0.2S, #137, LSL #24 // MOVI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V1.4S, #122, MSL #8 // MOVI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi D16, #0 // MOVI <Dd>, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V13.2D, #0xff00ff00ff00ff00 // MOVI <Vd>.2D, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movk W8, #57951 // MOVK <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movk W6, #34540, LSL #0 // MOVK <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movk X1, #56641 // MOVK <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movk X23, #3111, LSL #48 // MOVK <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn W16, #52526 // MOVN <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn W27, #47742, LSL #0 // MOVN <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn X10, #63431 // MOVN <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn X0, #58015, LSL #48 // MOVN <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movprfx Z22.B, P0/M, Z4.B // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+ mla Z22.B, P0/M, Z19.B, Z25.B // Ignore
+ movprfx Z3, Z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+ fmla Z3.D, P0/M, Z8.D, Z19.D // Ignore
+ movs P0.B, P7/Z, P3.B // MOVS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ movs P4.B, P0.B // MOVS <Pd>.B, <Pn>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ movz W3, #9629 // MOVZ <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movz W23, #10835, LSL #16 // MOVZ <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movz X0, #22630 // MOVZ <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movz X11, #20464, LSL #48 // MOVZ <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mrs X4, ACTLR_EL1 // MRS <Xt>, <systemreg> \\ No description \\ No scheduling info
+ mrs X14, S2_4_C0_C5_4 // MRS <Xt>, S<op0>_<op1>_<Cn>_<Cm>_<op2> \\ No description \\ No scheduling info
+ msb Z18.B, P1/M, Z27.B, Z0.B // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ msb Z27.H, P5/M, Z23.H, Z1.H // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ msb Z26.S, P2/M, Z0.S, Z2.S // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ msb Z1.D, P6/M, Z12.D, Z12.D // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ msr DAIFSet, #0 // MSR <pstatefield1>, #<imm1> \\ No description \\ No scheduling info
+ msr SPSel, #0 // MSR <pstatefield2>, #<imm2> \\ No description \\ No scheduling info
+ msr ACTLR_EL3, X18 // MSR <systemreg>, <Xt> \\ No description \\ No scheduling info
+ msr S3_6_C8_C12_1, X23 // MSR S<op0>_<op1>_<Cn>_<Cm>_<op2>, <Xt> \\ No description \\ No scheduling info
+ msub W6, W26, W13, W13 // MSUB <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ msub X14, X28, X9, X3 // MSUB <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ mul V26.4H, V20.4H, V14.H[5] // MUL <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul V5.8H, V21.8H, V3.H[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul V29.2S, V10.2S, V3.S[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul V30.4S, V11.4S, V4.S[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul Z16.B, Z16.B, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z9.H, Z9.H, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z23.S, Z23.S, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z15.D, Z15.D, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+ mul V3.8H, V9.8H, V8.8H // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul Z17.B, P6/M, Z17.B, Z9.B // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z18.H, P7/M, Z18.H, Z15.H // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z29.S, P6/M, Z29.S, Z8.S // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z25.D, P1/M, Z25.D, Z25.D // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+ mul W8, W13, W20 // MUL <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mul X12, X8, X25 // MUL <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mvn W0, W18 // MVN <Wd>, <Wm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn W25, W27, ASR #6 // MVN <Wd>, <Wm>, <shift> #<wamount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn X1, X21 // MVN <Xd>, <Xm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn X9, X23, ASR #39 // MVN <Xd>, <Xm>, <shift> #<amount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn V16.16B, V24.16B // MVN <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ mvni V9.4H, #237 // MVNI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V8.8H, #171, LSL #8 // MVNI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V7.2S, #81 // MVNI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V22.4S, #15, LSL #8 // MVNI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V12.4S, #141, MSL #8 // MVNI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ nand P5.B, P4/Z, P5.B, P5.B // NAND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ nands P6.B, P3/Z, P4.B, P5.B // NANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ neg W25, W20, LSL #4 // NEG <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ neg W0, W29, LSL #9 // NEG <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ neg W7, W28, ASR #24 // NEG <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ neg X29, X11, LSL #3 // NEG <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ neg X24, X10, LSL #54 // NEG <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ neg X0, X16, LSR #2 // NEG <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ neg D18, D20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ neg V16.2D, V14.2D // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ neg Z16.B, P2/M, Z15.B // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ negs W30, W22, LSL #2 // NEGS <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ negs W8, W8, LSL #15 // NEGS <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ negs W12, W21, ASR #15 // NEGS <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ negs X24, X23, LSL #1 // NEGS <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ negs X20, X13, LSL #20 // NEGS <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ negs X1, X22, LSR #30 // NEGS <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ ngc W11, W9 // NGC <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ ngc X30, X4 // NGC <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ ngcs W13, W22 // NGCS <Wd>, <Wm> \\ No description \\ No scheduling info
+ ngcs X15, X1 // NGCS <Xd>, <Xm> \\ No description \\ No scheduling info
+ nop // NOP \\ No description \\ No scheduling info
+ nor P4.B, P4/Z, P0.B, P4.B // NOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ nors P1.B, P0/Z, P7.B, P6.B // NORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ not P7.B, P2/Z, P6.B // NOT <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ not Z29.S, P4/M, Z9.S // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ not V15.8B, V29.8B // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ nots P7.B, P3/Z, P1.B // NOTS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ orn Z5.B, Z5.B, #0x70 // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z14.H, Z14.H, #0x60 // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z14.S, Z14.S, #0x2 // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z27.D, Z27.D, #0x4 // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn P1.B, P2/Z, P3.B, P5.B // ORN <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ orn W2, W27, W7 // ORN <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn W6, W28, W14, LSL #19 // ORN <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn X22, X12, X3 // ORN <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn X19, X17, X0, LSL #58 // ORN <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn V29.8B, V19.8B, V16.8B // ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orns P3.B, P3/Z, P0.B, P3.B // ORNS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ orr WSP, W27, #0xe00 // ORR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ orr X27, X6, #0x1e00 // ORR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ orr Z4.B, Z4.B, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z26.H, Z26.H, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z3.S, Z3.S, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z30.D, Z30.D, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr P6.B, P4/Z, P4.B, P3.B // ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ orr W14, W1, W23 // ORR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr W25, W22, W0, ASR #20 // ORR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr X11, X6, X13 // ORR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr X26, X26, X7, LSL #62 // ORR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr V9.4H, #18 // ORR <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V20.8H, #175, LSL #0 // ORR <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V4.4S, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V17.4S, #119, LSL #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V12.16B, V9.16B, V1.16B // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr Z28.H, P3/M, Z28.H, Z7.H // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z8.D, Z14.D, Z19.D // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orrs P7.B, P7/Z, P6.B, P5.B // ORRS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+ orv D19, P6, Z31.D // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 4 12 12 0.50 V1UnitV01[4]
+ pfalse P6.B // PFALSE <Pd>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ pfirst P0.B, P5, P0.B // PFIRST <Pdn>.B, <Pg>, <Pdn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ pmul V30.8B, V0.8B, V27.8B // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
+ pmul V7.16B, V20.16B, V18.16B // PMUL <Vd>.16B, <Vn>.16B, <Vm>.16B \\ ASIMD multiply/multiply long (8x8) polynomial, Q-form \\ 1 3 3 2.0 V1UnitV01
+ pnext P5.S, P5, P5.S // PNEXT <Pdn>.<T>, <Pv>, <Pdn>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ prfb #14, P5, [X21] // PRFB #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfb #14, P3, [X28, #-24, MUL VL] // PRFB #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfb PSTL1STRM, P7, [X5] // PRFB <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfb PLDL2KEEP, P1, [X12, #11, MUL VL] // PRFB <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfb PLDL1KEEP, P7, [X4, X9] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Xm>] \\ No description \\ No scheduling info
+ prfb PLDL3STRM, P4, [X3, Z15.S, UXTW] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ No description \\ No scheduling info
+ prfb PLDL1STRM, P7, [X28, Z4.D, UXTW] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ No description \\ No scheduling info
+ prfb PSTL3KEEP, P2, [X18, Z19.D] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] \\ No description \\ No scheduling info
+ prfb #12, P1, [Z28.S] // PRFB #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfb #8, P0, [Z22.S, #21] // PRFB #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfb PSTL1STRM, P2, [Z25.S] // PRFB <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfb PSTL2STRM, P1, [Z31.S, #18] // PRFB <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfb #11, P5, [Z25.D] // PRFB #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfb #2, P2, [Z4.D, #10] // PRFB #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfb PSTL2KEEP, P5, [Z5.D] // PRFB <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfb PLDL1KEEP, P1, [Z31.D, #17] // PRFB <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfd #13, P3, [X21] // PRFD #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfd #4, P5, [X3, #-7, MUL VL] // PRFD #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfd PSTL3KEEP, P0, [X29] // PRFD <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfd PLDL1STRM, P3, [X15, #-16, MUL VL] // PRFD <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfd PSTL2KEEP, P3, [X24, X24, LSL #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ No description \\ No scheduling info
+ prfd PSTL1STRM, P3, [X27, Z27.S, SXTW #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3] \\ No description \\ No scheduling info
+ prfd PSTL1KEEP, P0, [X21, Z2.D, UXTW #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ No description \\ No scheduling info
+ prfd PLDL1STRM, P7, [X22, Z22.D, LSL #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ No description \\ No scheduling info
+ prfd #3, P1, [Z2.S] // PRFD #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfd #8, P7, [Z10.S, #72] // PRFD #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfd PSTL1KEEP, P3, [Z19.S] // PRFD <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfd PSTL2STRM, P4, [Z26.S, #248] // PRFD <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfd #15, P1, [Z17.D] // PRFD #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfd #3, P0, [Z6.D, #24] // PRFD #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfd PSTL1KEEP, P3, [Z31.D] // PRFD <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfd PSTL1STRM, P7, [Z10.D, #40] // PRFD <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfh #3, P3, [X17] // PRFH #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfh #6, P3, [X6, #19, MUL VL] // PRFH #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfh PLDL3KEEP, P6, [X2] // PRFH <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfh PLDL2KEEP, P6, [X18, #-4, MUL VL] // PRFH <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfh PSTL2KEEP, P1, [X28, X9, LSL #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ No description \\ No scheduling info
+ prfh PLDL1STRM, P6, [X0, Z10.S, UXTW #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ No description \\ No scheduling info
+ prfh PLDL3KEEP, P7, [X24, Z21.D, UXTW #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ No description \\ No scheduling info
+ prfh PSTL1STRM, P5, [X10, Z6.D, LSL #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ No description \\ No scheduling info
+ prfh PLDL3STRM, P6, [Z0.S] // PRFH <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfh PSTL3STRM, P0, [Z30.S, #12] // PRFH <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfh PSTL2KEEP, P2, [Z21.D] // PRFH <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfh PSTL2KEEP, P1, [Z8.D, #14] // PRFH <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfm PLDL1STRM, [X5] // PRFM <prfop>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL3KEEP, [X19, #10160] // PRFM <prfop>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm #25, [X28] // PRFM #<imm5>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm #7, [X15, #6776] // PRFM #<imm5>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL3STRM, test // PRFM <prfop>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ prfm #0, test // PRFM #<imm5>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL1KEEP, [X25, X16] // PRFM <prfop>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ prfm #24, [X1, X18] // PRFM #<imm5>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL1KEEP, [X14, W8, UXTW] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm #12, [X8, W5, UXTW] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL2KEEP, [X16, W16, SXTW] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm #11, [X25, W11, SXTW] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL2STRM, [X3, X24, SXTX] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm #25, [X5, X2, SXTX] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL2KEEP, [X10, W29, UXTW #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #17, [X9, W27, UXTW #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL1KEEP, [X24, W0, SXTW #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #4, [X30, W25, SXTW #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL1STRM, [X18, X20, SXTX #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #19, [X29, X25, SXTX #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL3KEEP, [X2, X5, LSL #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #8, [X22, X3, LSL #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfum PSTL1KEEP, [X7] // PRFUM <prfop>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfum PLDL2KEEP, [X7, #-37] // PRFUM <prfop>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfum #20, [X21] // PRFUM #<imm5>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfum #23, [X6, #-131] // PRFUM #<imm5>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfw #3, P2, [X4] // PRFW #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfw #6, P4, [X7, #6, MUL VL] // PRFW #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfw PLDL3KEEP, P3, [X2] // PRFW <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfw PSTL1KEEP, P7, [X2, #-31, MUL VL] // PRFW <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfw PSTL1KEEP, P4, [X18, X21, LSL #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ No description \\ No scheduling info
+ prfw PLDL2STRM, P0, [X15, Z6.S, UXTW #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ No description \\ No scheduling info
+ prfw PSTL2KEEP, P0, [X27, Z18.D, SXTW #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ No description \\ No scheduling info
+ prfw PSTL2KEEP, P3, [X19, Z8.D, LSL #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ No description \\ No scheduling info
+ prfw #7, P7, [Z27.S] // PRFW #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfw #9, P5, [Z16.S, #72] // PRFW #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfw PLDL3KEEP, P4, [Z2.S] // PRFW <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfw PSTL3KEEP, P2, [Z0.S, #40] // PRFW <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfw #7, P1, [Z20.D] // PRFW #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfw #7, P2, [Z10.D, #108] // PRFW #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfw PSTL1KEEP, P6, [Z12.D] // PRFW <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfw PSTL2STRM, P0, [Z18.D, #60] // PRFW <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ psb CSYNC // PSB CSYNC \\ No description \\ No scheduling info
+ pssbb // PSSBB \\ No description \\ No scheduling info
+ ptest P0, P5.B // PTEST <Pg>, <Pn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P2.B // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P3.D, POW2 // PTRUE <Pd>.<T>, <pattern> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P0.H // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P4.S, #21 // PTRUE <Pd>.<T>, #<uimm5> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrues P3.H // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+ ptrues P3.D, VL32 // PTRUES <Pd>.<T>, <pattern> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+ ptrues P0.B // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+ ptrues P2.D, #12 // PTRUES <Pd>.<T>, #<uimm5> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+ punpkhi P4.H, P4.B // PUNPKHI <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+ punpklo P1.H, P4.B // PUNPKLO <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+ raddhn V17.2S, V22.2D, V5.2D // RADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ raddhn2 V21.4S, V11.2D, V1.2D // RADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ rbit V16.16B, V21.16B // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
+ rbit W27, W10 // RBIT <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rbit X30, X0 // RBIT <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rbit Z23.S, P3/M, Z10.S // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ rdffr P2.B, P1/Z // RDFFR <Pd>.B, <Pg>/Z \\ Read first fault register, predicated \\ 2 3 3 0.50 V1UnitM0[2]
+ rdffr P5.B // RDFFR <Pd>.B \\ Read first fault register, unpredicated \\ 1 2 2 1.0 V1UnitM0
+ rdffrs P7.B, P2/Z // RDFFRS <Pd>.B, <Pg>/Z \\ Read first fault register and set flags \\ 1 4 4 2.00 V1UnitI, V1UnitM
+ rdvl X20, #-20 // RDVL <Xd>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ ret // RET \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+ ret X14 // RET {<Xn>} \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+ rev P1.H, P2.H // REV <Pd>.<T>, <Pn>.<T> \\ Predicate reverse \\ 1 2 2 1.0 V1UnitM0
+ rev Z11.D, Z24.D // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ rev W19, W20 // REV <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev X30, X15 // REV <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev16 V5.16B, V26.16B // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+ rev16 W1, W25 // REV16 <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev16 X27, X11 // REV16 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev32 V22.8H, V4.8H // REV32 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+ rev32 X30, X6 // REV32 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev64 X5, X2 // REV64 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev64 V0.2S, V19.2S // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+ revb Z3.D, P2/M, Z21.D // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ revh Z1.D, P5/M, Z19.D // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ revw Z16.D, P1/M, Z3.D // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ ror W20, W13, #21 // ROR <Wd>, <Ws>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ ror X5, X8, #7 // ROR <Xd>, <Xs>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ ror W29, W26, W0 // ROR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ ror X4, X13, X3 // ROR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ rorv W26, W0, W28 // RORV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ rorv X21, X29, X17 // RORV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ rshrn V24.8B, V0.8H, #4 // RSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn V8.4H, V24.4S, #16 // RSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn V12.2S, V12.2D, #28 // RSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn2 V1.16B, V16.8H, #6 // RSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn2 V1.8H, V28.4S, #3 // RSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn2 V20.4S, V19.2D, #14 // RSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rsubhn V3.8B, V9.8H, V16.8H // RSUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ rsubhn2 V31.4S, V12.2D, V15.2D // RSUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ saba V8.16B, V27.16B, V13.16B // SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+ sabal V2.2D, V5.2S, V31.2S // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ sabal2 V21.2D, V15.4S, V13.4S // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ sabd V12.2S, V11.2S, V27.2S // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+ sabd Z14.S, P1/M, Z14.S, Z23.S // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sabdl V28.2D, V4.2S, V19.2S // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ sabdl2 V10.8H, V30.16B, V4.16B // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ sadalp V3.4H, V5.8B // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+ saddl V7.8H, V3.8B, V23.8B // SADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ saddl2 V21.4S, V5.8H, V10.8H // SADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ saddlp V13.8H, V29.16B // SADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ saddlv H18, V28.8B // SADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ saddlv H30, V4.16B // SADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ saddlv S24, V29.4H // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ saddlv S22, V23.8H // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ saddlv D2, V27.4S // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ saddv D19, P6, Z1.B // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+ saddv D7, P2, Z14.H // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+ saddv D4, P7, Z27.S // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+ saddw V8.4S, V0.4S, V1.4H // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ saddw2 V24.8H, V10.8H, V30.16B // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sbc W0, W16, W1 // SBC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sbc X19, X3, X9 // SBC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sbcs W26, W28, W0 // SBCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ sbcs X8, X26, X29 // SBCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ sbfiz W14, W5, #21, #8 // SBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+ sbfiz X14, X1, #56, #2 // SBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+ sbfm W24, W11, #27, #19 // SBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ sbfm X14, X1, #36, #55 // SBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ sbfx W16, W16, #31, #1 // SBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ sbfx X14, X28, #53, #8 // SBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ scvtf H18, W17, #30 // SCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S14, W9, #19 // SCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D16, W3, #13 // SCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H28, X25, #23 // SCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S27, X19, #5 // SCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D15, X22, #32 // SCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H22, W7 // SCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S22, W10 // SCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D23, W6 // SCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H21, X12 // SCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S25, X28 // SCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D12, X0 // SCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H4, H8, #9 // SCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ scvtf S29, S12, #1 // SCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ scvtf D1, D12, #26 // SCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V25.4H, V13.4H, #8 // SCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ scvtf V4.8H, V8.8H, #10 // SCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ scvtf V5.2S, V2.2S, #26 // SCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V2.4S, V24.4S, #10 // SCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ scvtf V11.2D, V2.2D, #42 // SCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf H5, H14 // SCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ scvtf S5, S16 // SCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ scvtf D12, D11 // SCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V22.4H, V10.4H // SCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ scvtf V16.8H, V13.8H // SCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ scvtf V9.2S, V31.2S // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V2.4S, V7.4S // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ scvtf V18.2D, V11.2D // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf Z3.H, P3/M, Z29.H // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 4 6 6 0.25 V1UnitV0[4]
+ scvtf Z1.H, P5/M, Z27.S // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+ scvtf Z30.S, P4/M, Z29.S // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+ scvtf Z18.D, P3/M, Z16.S // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+ scvtf Z18.H, P1/M, Z14.D // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ scvtf Z10.S, P1/M, Z11.D // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ scvtf Z3.D, P2/M, Z27.D // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ sdiv W6, W28, W24 // SDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[13]
+ sdiv X19, X2, X14 // SDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[21]
+ sdiv Z24.S, P1/M, Z24.S, Z14.S // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+ sdiv Z7.D, P6/M, Z7.D, Z20.D // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+ sdivr Z10.S, P2/M, Z10.S, Z7.S // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+ sdivr Z0.D, P3/M, Z0.D, Z9.D // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+ sdot Z6.S, Z29.B, Z0.B[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ sdot Z0.D, Z18.H, Z10.H[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ sdot Z28.S, Z30.B, Z14.B // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ sdot Z19.D, Z5.H, Z8.H // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ sdot V2.4S, V27.16B, V5.4B[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ sdot V3.2S, V20.8B, V10.8B // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ sel P1.B, P7, P5.B, P4.B // SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+ sel Z0.H, P7, Z13.H, Z13.H // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+ setffr // SETFFR \\ Set first fault register \\ 1 2 2 1.0 V1UnitM0
+ sev // SEV \\ No description \\ No scheduling info
+ sevl // SEVL \\ No description \\ No scheduling info
+ shadd V25.16B, V1.16B, V10.16B // SHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ shl D17, D3, #16 // SHL <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 4.00 V1UnitV
+ shl V23.8B, V18.8B, #6 // SHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V0.8H, V23.8H, #10 // SHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V0.4S, V18.4S, #30 // SHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V20.2D, V28.2D, #40 // SHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll V3.8H, V13.8B, #8 // SHLL <Vd>.8H, <Vn>.8B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll V26.4S, V18.4H, #16 // SHLL <Vd>.4S, <Vn>.4H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll V4.2D, V25.2S, #32 // SHLL <Vd>.2D, <Vn>.2S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll2 V12.8H, V28.16B, #8 // SHLL2 <Vd>.8H, <Vn>.16B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll2 V11.4S, V22.8H, #16 // SHLL2 <Vd>.4S, <Vn>.8H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll2 V2.2D, V29.4S, #32 // SHLL2 <Vd>.2D, <Vn>.4S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn V27.8B, V23.8H, #3 // SHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn V17.4H, V1.4S, #13 // SHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn V13.2S, V0.2D, #12 // SHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn2 V4.16B, V29.8H, #8 // SHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn2 V9.8H, V18.4S, #10 // SHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn2 V5.4S, V12.2D, #16 // SHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shsub V15.8H, V5.8H, V27.8H // SHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sli D7, D19, #53 // SLI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 4.00 V1UnitV
+ sli V16.16B, V26.16B, #7 // SLI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V14.4H, V10.4H, #15 // SLI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V29.2S, V14.2S, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V25.2D, V21.2D, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ smaddl X17, W27, W30, X3 // SMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ smax Z3.S, Z3.S, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smax Z0.B, P5/M, Z0.B, Z20.B // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smax V30.16B, V3.16B, V30.16B // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ smaxp V21.8H, V16.8H, V7.8H // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ smaxv B4, V30.8B // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ smaxv B15, V16.16B // SMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ smaxv H28, V14.4H // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ smaxv H6, V19.8H // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ smaxv S3, V14.4S // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ smaxv B19, P4, Z14.B // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+ smaxv H0, P6, Z20.H // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+ smaxv S11, P2, Z28.S // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+ smaxv D24, P5, Z24.D // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+ smc #32343 // SMC #<imm> \\ No description \\ No scheduling info
+ smin Z21.S, Z21.S, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smin Z22.S, P0/M, Z22.S, Z30.S // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smin V29.4S, V24.4S, V24.4S // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ sminp V7.8H, V27.8H, V7.8H // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ sminv B6, V11.8B // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ sminv B24, V8.16B // SMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ sminv H24, V23.4H // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ sminv H2, V9.8H // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ sminv S16, V15.4S // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ sminv B4, P2, Z10.B // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+ sminv H15, P7, Z10.H // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+ sminv S29, P0, Z27.S // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+ sminv D17, P2, Z18.D // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+ smlal V16.4S, V9.4H, V11.H[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal V0.2D, V25.2S, V1.S[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal2 V1.4S, V9.8H, V0.H[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal2 V30.2D, V22.4S, V7.S[2] // SMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal V25.8H, V24.8B, V28.8B // SMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal2 V30.4S, V31.8H, V13.8H // SMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl V14.4S, V23.4H, V12.H[7] // SMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl V25.2D, V27.2S, V1.S[1] // SMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl2 V12.4S, V11.8H, V12.H[0] // SMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl2 V11.2D, V28.4S, V7.S[2] // SMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl V11.4S, V14.4H, V15.4H // SMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl2 V21.4S, V27.8H, V16.8H // SMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smmla V0.4S, V17.16B, V31.16B // SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+ smnegl X3, W23, W18 // SMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ smov W15, V22.B[0] // SMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov W6, V28.B[9] // SMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov W26, V27.H[0] // SMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov W18, V29.H[6] // SMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov X21, V0.B[0] // SMOV <Xd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov X16, V29.B[8] // SMOV <Xd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov X9, V27.H[0] // SMOV <Xd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov X4, V21.H[2] // SMOV <Xd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov X15, V3.S[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smov X5, V29.S[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ smsubl X8, W24, W13, X6 // SMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ smulh Z11.B, P5/M, Z11.B, Z17.B // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ smulh Z8.H, P0/M, Z8.H, Z4.H // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ smulh Z27.S, P7/M, Z27.S, Z30.S // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ smulh Z4.D, P7/M, Z4.D, Z28.D // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+ smulh X8, X29, X17 // SMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+ smull X19, W0, W6 // SMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ smull V3.4S, V26.4H, V1.H[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull V31.2D, V23.2S, V6.S[2] // SMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull2 V13.4S, V18.8H, V0.H[3] // SMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull2 V11.2D, V1.4S, V7.S[0] // SMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull V28.2D, V26.2S, V20.2S // SMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull2 V7.2D, V14.4S, V15.4S // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqabs D15, D26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqabs V25.8H, V24.8H // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqadd Z1.B, Z1.B, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z18.H, Z18.H, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z3.D, Z3.D, #158, LSL #0 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z19.D, Z27.D, Z28.D // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd H12, H18, H10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqadd V15.2S, V13.2S, V28.2S // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqdecb X26, W26 // SQDECB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X16, W16, VL64 // SQDECB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X4, W4, VL1, MUL #16 // SQDECB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X4 // SQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X28, VL6 // SQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X20, VL7, MUL #4 // SQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X1, W1 // SQDECD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X11, W11, MUL3 // SQDECD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X14, W14, VL2, MUL #16 // SQDECD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X18 // SQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X11, VL5 // SQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X21, ALL, MUL #13 // SQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd Z27.D // SQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdecd Z2.D, VL128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdecd Z23.D, VL1, MUL #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdech X7, W7 // SQDECH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X10, W10, VL128 // SQDECH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X16, W16, VL6, MUL #11 // SQDECH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X6 // SQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X17, VL128 // SQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X27, VL128, MUL #4 // SQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech Z16.H // SQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdech Z21.H, VL6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdech Z7.H, MUL3, MUL #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdecp X1, P4.B, W1 // SQDECP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqdecp X26, P6.D // SQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqdecp Z10.D, P3 // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+ sqdecw X13, W13 // SQDECW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X2, W2, POW2 // SQDECW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X26, W26, VL8, MUL #10 // SQDECW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X10 // SQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X17, VL128 // SQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X13, MUL4, MUL #3 // SQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw Z7.S // SQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdecw Z10.S, POW2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdecw Z28.S, VL2, MUL #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqdmlal S23, H16, V4.H[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal D12, S18, V3.S[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal V20.4S, V30.4H, V12.H[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal V11.2D, V24.2S, V0.S[3] // SQDMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal2 V2.4S, V17.8H, V5.H[6] // SQDMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal2 V23.2D, V30.4S, V6.S[0] // SQDMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal D16, S12, S15 // SQDMLAL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal V8.4S, V24.4H, V31.4H // SQDMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal2 V29.4S, V11.8H, V13.8H // SQDMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl S26, H21, V11.H[1] // SQDMLSL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl D6, S16, V3.S[1] // SQDMLSL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl V4.4S, V22.4H, V13.H[2] // SQDMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl V26.2D, V7.2S, V3.S[0] // SQDMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl2 V2.4S, V28.8H, V4.H[6] // SQDMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl2 V4.2D, V3.4S, V3.S[2] // SQDMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl D13, S21, S8 // SQDMLSL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl V11.4S, V19.4H, V5.4H // SQDMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl2 V27.4S, V8.8H, V22.8H // SQDMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh H14, H17, V6.H[6] // SQDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh S19, S6, V6.S[3] // SQDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh V8.4H, V16.4H, V5.H[4] // SQDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh V16.2S, V24.2S, V7.S[2] // SQDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh H26, H21, H17 // SQDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh V20.2S, V11.2S, V29.2S // SQDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmull S25, H5, V1.H[3] // SQDMULL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull D29, S23, V0.S[2] // SQDMULL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull V8.4S, V19.4H, V1.H[2] // SQDMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull V20.2D, V10.2S, V6.S[2] // SQDMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull2 V10.4S, V25.8H, V0.H[7] // SQDMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull2 V4.2D, V29.4S, V2.S[3] // SQDMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull D19, S2, S0 // SQDMULL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply long \\ 1 2 2 4.00 V1UnitV
+ sqdmull V14.2D, V23.2S, V13.2S // SQDMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull2 V12.4S, V11.8H, V1.8H // SQDMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqincb X12, W12 // SQINCB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X1, W1, VL8 // SQINCB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X16, W16, VL2, MUL #16 // SQINCB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X5 // SQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X4, VL6 // SQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X30, ALL, MUL #7 // SQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X28, W28 // SQINCD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X16, W16, VL8 // SQINCD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X22, W22, VL6, MUL #16 // SQINCD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X10 // SQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X17, VL5 // SQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X13, VL64, MUL #1 // SQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd Z24.D // SQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqincd Z10.D, VL128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqincd Z29.D, VL128, MUL #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqinch X28, W28 // SQINCH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X30, W30, VL1 // SQINCH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X16, W16, VL4, MUL #2 // SQINCH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X23 // SQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X10, VL64 // SQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X16, POW2, MUL #2 // SQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch Z3.H // SQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqinch Z23.H, VL4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqinch Z6.H, VL128, MUL #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqincp X13, P2.H, W13 // SQINCP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqincp X0, P7.H // SQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqincp Z9.H, P1 // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+ sqincw X24, W24 // SQINCW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X16, W16, MUL4 // SQINCW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X27, W27, VL32, MUL #15 // SQINCW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X29 // SQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X25, VL7 // SQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X21, VL8, MUL #3 // SQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw Z30.S // SQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqincw Z8.S, MUL3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqincw Z0.S, VL5, MUL #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ sqneg D24, D22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqneg V30.16B, V15.16B // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqrdmlah H14, H4, V6.H[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah S24, S17, V6.S[2] // SQRDMLAH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah V17.4H, V18.4H, V4.H[7] // SQRDMLAH <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah V10.2S, V17.2S, V3.S[3] // SQRDMLAH <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah S3, S3, S5 // SQRDMLAH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah V16.8H, V30.8H, V28.8H // SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh H13, H26, V4.H[2] // SQRDMLSH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh S26, S29, V7.S[0] // SQRDMLSH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V1.8H, V21.8H, V8.H[1] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V8.4H, V11.4H, V1.H[3] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V20.2S, V29.2S, V4.S[3] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V21.4S, V9.4S, V1.S[0] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh S30, S20, S13 // SQRDMLSH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V20.4H, V2.4H, V23.4H // SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh H3, H25, V2.H[1] // SQRDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh S9, S24, V4.S[3] // SQRDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh V0.8H, V15.8H, V0.H[5] // SQRDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh V6.2S, V29.2S, V4.S[2] // SQRDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh H5, H2, H20 // SQRDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh V31.2S, V17.2S, V4.2S // SQRDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrshl D6, D1, D30 // SQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshl V15.8B, V26.8B, V21.8B // SQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn B6, H24, #3 // SQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqrshrn H11, S22, #8 // SQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqrshrn S4, D9, #13 // SQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqrshrn V31.8B, V31.8H, #2 // SQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn V27.4H, V11.4S, #8 // SQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn V4.2S, V30.2D, #10 // SQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn2 V11.16B, V30.8H, #7 // SQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn2 V14.8H, V3.4S, #12 // SQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn2 V13.4S, V28.2D, #24 // SQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun B5, H0, #3 // SQRSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqrshrun H25, S11, #7 // SQRSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqrshrun S15, D18, #2 // SQRSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqrshrun V0.8B, V3.8H, #7 // SQRSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun V5.4H, V8.4S, #7 // SQRSHRUN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun V7.2S, V8.2D, #13 // SQRSHRUN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun2 V14.16B, V14.8H, #3 // SQRSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun2 V9.8H, V16.4S, #10 // SQRSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun2 V12.4S, V23.2D, #30 // SQRSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl B15, B3, #4 // SQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl H21, H0, #5 // SQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl S26, S9, #24 // SQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl D8, D23, #17 // SQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V25.16B, V26.16B, #5 // SQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V29.4H, V1.4H, #7 // SQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V0.2S, V5.2S, #1 // SQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V11.2D, V2.2D, #23 // SQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl S17, S4, S23 // SQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V23.16B, V23.16B, V23.16B // SQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshlu B3, B27, #5 // SQSHLU B<d>, B<n>, #<shiftb> \\ No description \\ No scheduling info
+ sqshlu H23, H4, #6 // SQSHLU H<d>, H<n>, #<shifth> \\ No description \\ No scheduling info
+ sqshlu S29, S29, #30 // SQSHLU S<d>, S<n>, #<shifts> \\ No description \\ No scheduling info
+ sqshlu D14, D5, #22 // SQSHLU D<d>, D<n>, #<shiftd> \\ No description \\ No scheduling info
+ sqshlu V11.8B, V17.8B, #6 // SQSHLU <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ No description \\ No scheduling info
+ sqshlu V18.8H, V8.8H, #14 // SQSHLU <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ No description \\ No scheduling info
+ sqshlu V25.4S, V7.4S, #13 // SQSHLU <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ No description \\ No scheduling info
+ sqshlu V19.2D, V14.2D, #39 // SQSHLU <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ No description \\ No scheduling info
+ sqshrn B17, H30, #7 // SQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrn H30, S15, #5 // SQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrn S16, D0, #20 // SQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrn V3.8B, V25.8H, #1 // SQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn V23.4H, V14.4S, #6 // SQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn V6.2S, V29.2D, #10 // SQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn2 V31.16B, V31.8H, #8 // SQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn2 V13.8H, V6.4S, #13 // SQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn2 V30.4S, V0.2D, #1 // SQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun B3, H16, #3 // SQSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun H11, S10, #7 // SQSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun S18, D1, #13 // SQSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun V21.8B, V27.8H, #5 // SQSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun V18.4H, V19.4S, #2 // SQSHRUN <Vd>.4H, <Vn>.4S, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun V2.2S, V14.2D, #3 // SQSHRUN <Vd>.2S, <Vn>.2D, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun2 V10.16B, V28.8H, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun2 V4.8H, V28.4S, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqshrun2 V7.4S, V18.2D, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ sqsub Z13.B, Z13.B, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z28.H, Z28.H, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z11.S, Z11.S, #14, LSL #0 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z28.S, Z9.S, Z12.S // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub B3, B13, B12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqsub V20.8H, V18.8H, V12.8H // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqxtn B11, H22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtn V3.2S, V17.2D // SQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtn2 V17.8H, V27.4S // SQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtun B30, H18 // SQXTUN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtun V26.8B, V21.8H // SQXTUN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtun2 V22.16B, V6.8H // SQXTUN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ srhadd V29.8B, V3.8B, V8.8B // SRHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sri D30, D17, #61 // SRI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 4.00 V1UnitV
+ sri V23.16B, V30.16B, #2 // SRI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V1.4H, V0.4H, #4 // SRI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V28.2S, V6.2S, #16 // SRI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V8.2D, V19.2D, #40 // SRI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ srshl D30, D8, D8 // SRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ srshl V20.8B, V23.8B, V27.8B // SRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr D20, D18, #27 // SRSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ srshr V20.8B, V0.8B, #7 // SRSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V27.8H, V19.8H, #9 // SRSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V8.2S, V20.2S, #31 // SRSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V31.2D, V17.2D, #33 // SRSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srsra D13, D10, #25 // SRSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V31.16B, V15.16B, #5 // SRSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V14.4H, V27.4H, #7 // SRSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V17.2S, V8.2S, #8 // SRSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V22.2D, V4.2D, #12 // SRSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssbb // SSBB \\ No description \\ No scheduling info
+ sshl D29, D30, D9 // SSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ sshl V13.2D, V7.2D, V27.2D // SSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll V9.8H, V2.8B, #0 // SSHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll V12.4S, V3.4H, #4 // SSHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll V17.2D, V6.2S, #22 // SSHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll2 V28.8H, V12.16B, #7 // SSHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll2 V29.4S, V22.8H, #7 // SSHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll2 V17.2D, V13.4S, #22 // SSHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr D3, D18, #10 // SSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 4.00 V1UnitV
+ sshr V20.8B, V28.8B, #2 // SSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V20.4H, V23.4H, #10 // SSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V13.2S, V23.2S, #2 // SSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V3.2D, V8.2D, #61 // SSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ssra D28, D30, #51 // SSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V9.8B, V18.8B, #2 // SSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V21.4H, V24.4H, #3 // SSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V28.2S, V17.2S, #6 // SSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V0.2D, V23.2D, #35 // SSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssubl V13.4S, V9.4H, V5.4H // SSUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ssubl2 V18.4S, V29.8H, V17.8H // SSUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ssubw V5.2D, V13.2D, V4.2S // SSUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ssubw2 V4.4S, V26.4S, V31.8H // SSUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ st1 { V18.8B }, [X15] // ST1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V31.16B }, [X29] // ST1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V19.4H }, [X7] // ST1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V27.8H }, [X17] // ST1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V25.2S }, [X6] // ST1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V22.4S }, [X19] // ST1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V20.1D }, [X10] // ST1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V8.2D }, [X15] // ST1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V16.8B }, [X14], #8 // ST1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V10.16B }, [X8], #16 // ST1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V29.4H }, [X17], #8 // ST1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V14.8H }, [X28], #16 // ST1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V18.2S }, [X20], #8 // ST1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V28.4S }, [X1], #16 // ST1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V17.1D }, [X27], #8 // ST1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V30.2D }, [X4], #16 // ST1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V13.8B }, [X8], X7 // ST1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V4.16B }, [X7], X26 // ST1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V17.4H }, [X10], X4 // ST1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V18.8H }, [X15], X1 // ST1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V6.2S }, [X17], X24 // ST1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V26.4S }, [X20], X29 // ST1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V13.1D }, [X3], X20 // ST1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V15.2D }, [X21], X11 // ST1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V8.8B, V9.8B }, [X18] // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V1.16B, V2.16B }, [X4] // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V22.4H, V23.4H }, [X22] // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V18.8H, V19.8H }, [X2] // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V13.2S, V14.2S }, [X9] // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V15.4S, V16.4S }, [X12] // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V21.1D, V22.1D }, [X29] // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V26.2D, V27.2D }, [X28] // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V23.8B, V24.8B }, [X4], #16 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V15.16B, V16.16B }, [X16], #32 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V7.4H, V8.4H }, [X7], #16 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V8.8H, V9.8H }, [X1], #32 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V23.2S, V24.2S }, [X7], #16 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V8.4S, V9.4S }, [X15], #32 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V14.1D, V15.1D }, [X11], #16 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V12.2D, V13.2D }, [X2], #32 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V3.8B, V4.8B }, [X28], X14 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V19.16B, V20.16B }, [X13], X7 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V28.4H, V29.4H }, [X14], X5 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V9.8H, V10.8H }, [X28], X9 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V10.2S, V11.2S }, [X10], X2 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V13.4S, V14.4S }, [X8], X15 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V5.1D, V6.1D }, [X9], X14 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V14.2D, V15.2D }, [X24], X1 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V15.8B, V16.8B, V17.8B }, [X0] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V27.16B, V28.16B, V29.16B }, [X18] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V13.4H, V14.4H, V15.4H }, [X7] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V8.8H, V9.8H, V10.8H }, [X16] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V12.2S, V13.2S, V14.2S }, [X3] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V19.4S, V20.4S, V21.4S }, [X7] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V5.1D, V6.1D, V7.1D }, [X3] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V13.2D, V14.2D, V15.2D }, [X27] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V3.8B, V4.8B, V5.8B }, [X21], #24 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V25.16B, V26.16B, V27.16B }, [X4], #48 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V24.4H, V25.4H, V26.4H }, [X9], #24 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V0.8H, V1.8H, V2.8H }, [X7], #48 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V3.2S, V4.2S, V5.2S }, [X4], #24 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V25.4S, V26.4S, V27.4S }, [X14], #48 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V7.1D, V8.1D, V9.1D }, [X13], #24 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V19.2D, V20.2D, V21.2D }, [X5], #48 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V5.8B, V6.8B, V7.8B }, [X17], X25 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V12.16B, V13.16B, V14.16B }, [X29], X23 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V18.4H, V19.4H, V20.4H }, [X0], X14 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V16.8H, V17.8H, V18.8H }, [X1], X18 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V1.2S, V2.2S, V3.2S }, [X15], X29 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V2.4S, V3.4S, V4.4S }, [X29], X6 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V8.1D, V9.1D, V10.1D }, [X13], X27 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V8.2D, V9.2D, V10.2D }, [X18], X19 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V21.8B, V22.8B, V23.8B, V24.8B }, [X14] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V18.16B, V19.16B, V20.16B, V21.16B }, [X29] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+ st1 { V23.4H, V24.4H, V25.4H, V26.4H }, [X24] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V7.8H, V8.8H, V9.8H, V10.8H }, [X19] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+ st1 { V6.2S, V7.2S, V8.2S, V9.2S }, [X13] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V26.4S, V27.4S, V28.4S, V29.4S }, [X12] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+ st1 { V0.1D, V1.1D, V2.1D, V3.1D }, [X10] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+ st1 { V25.2D, V26.2D, V27.2D, V28.2D }, [X19] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+ st1 { V27.8B, V28.8B, V29.8B, V30.8B }, [X17], #32 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V26.16B, V27.16B, V28.16B, V29.16B }, [X0], #64 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V18.4H, V19.4H, V20.4H, V21.4H }, [X22], #32 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V12.8H, V13.8H, V14.8H, V15.8H }, [X13], #64 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V13.2S, V14.2S, V15.2S, V16.2S }, [X25], #32 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V4.4S, V5.4S, V6.4S, V7.4S }, [X11], #64 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V7.1D, V8.1D, V9.1D, V10.1D }, [X13], #32 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V12.2D, V13.2D, V14.2D, V15.2D }, [X25], #64 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V21.8B, V22.8B, V23.8B, V24.8B }, [X25], X28 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V26.16B, V27.16B, V28.16B, V29.16B }, [X24], X5 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V20.4H, V21.4H, V22.4H, V23.4H }, [X25], X19 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V20.8H, V21.8H, V22.8H, V23.8H }, [X18], X0 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V4.2S, V5.2S, V6.2S, V7.2S }, [X9], X5 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V7.4S, V8.4S, V9.4S, V10.4S }, [X12], X30 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V23.1D, V24.1D, V25.1D, V26.1D }, [X23], X4 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V20.2D, V21.2D, V22.2D, V23.2D }, [X7], X14 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V1.B }[5], [X1] // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V0.H }[2], [X1] // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V31.S }[1], [X16] // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V15.D }[1], [X8] // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, D \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V15.B }[1], [X12], #1 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V16.B }[3], [X0], X2 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V29.H }[2], [X27], #2 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V15.H }[4], [X30], X9 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V3.S }[1], [X24], #4 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V26.S }[0], [X2], X30 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V19.D }[1], [X9], #8 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V29.D }[0], [X26], X22 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1b { Z7.H }, P2, [X14] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1b { Z16.S }, P4, [X20, #3, MUL VL] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1b { Z17.S }, P3, [X20, X0] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1b { Z0.D }, P4, [X11, Z13.D, UXTW] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1b { Z16.S }, P4, [X19, Z25.S, SXTW] // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1b { Z10.D }, P3, [X12, Z21.D] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1b { Z17.S }, P7, [Z28.S] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1b { Z16.S }, P0, [Z25.S, #7] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1b { Z15.D }, P6, [Z27.D] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1b { Z2.D }, P0, [Z21.D, #24] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1d { Z10.D }, P2, [X26, Z5.D, SXTW #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1d { Z18.D }, P2, [X7, Z1.D, UXTW] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1d { Z9.D }, P6, [X6, Z12.D, LSL #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1d { Z3.D }, P3, [X1, Z30.D] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1d { Z18.D }, P0, [Z7.D] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1d { Z4.D }, P2, [Z2.D, #136] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1h { Z28.S }, P3, [X18] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1h { Z23.H }, P1, [X14, #-8, MUL VL] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1h { Z25.S }, P3, [X17, X8, LSL #1] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+ st1h { Z12.S }, P3, [X24, Z30.S, SXTW #1] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Scatter store, 32-bit scaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1h { Z26.D }, P5, [X9, Z17.D, UXTW #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1h { Z23.D }, P1, [X5, Z25.D, SXTW] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1h { Z14.S }, P4, [X22, Z17.S, SXTW] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1h { Z23.D }, P3, [X25, Z11.D, LSL #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1h { Z0.D }, P4, [X21, Z21.D] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1h { Z29.S }, P5, [Z9.S] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1h { Z4.S }, P7, [Z23.S, #40] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1h { Z27.D }, P2, [Z3.D] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1h { Z11.D }, P6, [Z7.D, #38] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1w { Z25.S }, P1, [X9, Z28.S, SXTW #2] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Scatter store, 32-bit scaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1w { Z13.D }, P3, [X16, Z9.D, SXTW #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1w { Z21.D }, P1, [X24, Z23.D, SXTW] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1w { Z17.S }, P1, [X5, Z22.S, UXTW] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1w { Z28.D }, P1, [X5, Z8.D, LSL #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1w { Z26.D }, P3, [X3, Z0.D] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1w { Z28.S }, P6, [Z21.S] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1w { Z26.S }, P3, [Z24.S, #120] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+ st1w { Z3.D }, P0, [Z12.D] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st1w { Z17.D }, P2, [Z1.D, #80] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2 { V14.8B, V15.8B }, [X2] // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V6.16B, V7.16B }, [X23] // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st2 { V10.4H, V11.4H }, [X18] // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V10.8H, V11.8H }, [X18] // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st2 { V25.2S, V26.2S }, [X29] // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V26.4S, V27.4S }, [X14] // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st2 { V10.2D, V11.2D }, [X1] // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, D \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st2 { V21.8B, V22.8B }, [X22], #16 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V26.16B, V27.16B }, [X2], #32 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V19.4H, V20.4H }, [X27], #16 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V28.8H, V29.8H }, [X22], #32 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V1.2S, V2.2S }, [X26], #16 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V19.4S, V20.4S }, [X7], #32 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V22.2D, V23.2D }, [X18], #32 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V29.8B, V30.8B }, [X9], X2 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V17.16B, V18.16B }, [X4], X0 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V9.4H, V10.4H }, [X7], X25 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V8.8H, V9.8H }, [X11], X8 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V17.2S, V18.2S }, [X2], X8 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V9.4S, V10.4S }, [X23], X12 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V29.2D, V30.2D }, [X25], X11 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V21.B, V22.B }[15], [X15] // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V28.H, V29.H }[2], [X6] // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V14.S, V15.S }[1], [X25] // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V17.D, V18.D }[1], [X1] // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, D \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V9.B, V10.B }[15], [X12], #2 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V19.B, V20.B }[9], [X27], X28 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V18.H, V19.H }[3], [X30], #4 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V13.H, V14.H }[5], [X23], X24 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V23.S, V24.S }[1], [X22], #8 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V16.S, V17.S }[3], [X12], X16 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V27.D, V28.D }[0], [X16], #16 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V6.D, V7.D }[1], [X14], X5 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2b { Z19.B, Z20.B }, P1, [X18] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2b { Z26.B, Z27.B }, P7, [X15, #-6, MUL VL] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2b { Z19.B, Z20.B }, P1, [X23, X27] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2d { Z29.D, Z30.D }, P4, [X8] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2d { Z16.D, Z17.D }, P3, [X20, #14, MUL VL] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2d { Z17.D, Z18.D }, P7, [X2, X28, LSL #3] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2h { Z5.H, Z6.H }, P7, [X23] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2h { Z11.H, Z12.H }, P6, [X4, #10, MUL VL] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2h { Z3.H, Z4.H }, P3, [X22, X16, LSL #1] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 3 4 4 2.00 V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV
+ st2w { Z14.S, Z15.S }, P4, [X17] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2w { Z9.S, Z10.S }, P5, [X19, #-8, MUL VL] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st2w { Z5.S, Z6.S }, P3, [X23, X13, LSL #2] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+ st3 { V10.8B, V11.8B, V12.8B }, [X18] // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V26.16B, V27.16B, V28.16B }, [X4] // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V25.4H, V26.4H, V27.4H }, [X11] // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V0.8H, V1.8H, V2.8H }, [X0] // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V19.2S, V20.2S, V21.2S }, [X30] // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V24.4S, V25.4S, V26.4S }, [X8] // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V24.2D, V25.2D, V26.2D }, [X25] // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, D \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V25.8B, V26.8B, V27.8B }, [X23], #24 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V9.16B, V10.16B, V11.16B }, [X26], #48 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V24.4H, V25.4H, V26.4H }, [X3], #24 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V23.8H, V24.8H, V25.8H }, [X22], #48 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V7.2S, V8.2S, V9.2S }, [X8], #24 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V11.4S, V12.4S, V13.4S }, [X15], #48 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V1.2D, V2.2D, V3.2D }, [X4], #48 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, D \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V16.8B, V17.8B, V18.8B }, [X26], X2 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V9.16B, V10.16B, V11.16B }, [X3], X18 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V2.4H, V3.4H, V4.4H }, [X4], X4 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V27.8H, V28.8H, V29.8H }, [X27], X8 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V26.2S, V27.2S, V28.2S }, [X2], X25 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V5.4S, V6.4S, V7.4S }, [X18], X29 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V26.2D, V27.2D, V28.2D }, [X14], X5 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, D \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V8.B, V9.B, V10.B }[4], [X18] // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V11.H, V12.H, V13.H }[4], [X0] // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V9.S, V10.S, V11.S }[2], [X20] // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V16.D, V17.D, V18.D }[0], [X13] // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, D \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st3 { V26.B, V27.B, V28.B }[1], [X12], #3 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V27.B, V28.B, V29.B }[15], [X19], X23 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V24.H, V25.H, V26.H }[2], [X14], #6 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V1.H, V2.H, V3.H }[2], [X0], X23 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V25.S, V26.S, V27.S }[2], [X10], #12 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD store, 3 element, one lane, S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V8.S, V9.S, V10.S }[0], [X11], X20 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V19.D, V20.D, V21.D }[1], [X5], #24 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD store, 3 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V10.D, V11.D, V12.D }[0], [X12], X11 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3b { Z0.B, Z1.B, Z2.B }, P6, [X26] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3b { Z22.B, Z23.B, Z24.B }, P6, [X25, #3, MUL VL] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3b { Z14.B, Z15.B, Z16.B }, P2, [X29, X27] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+ st3d { Z6.D, Z7.D, Z8.D }, P2, [X12] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3d { Z20.D, Z21.D, Z22.D }, P5, [X15, #9, MUL VL] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3d { Z15.D, Z16.D, Z17.D }, P7, [X0, X9, LSL #3] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+ st3h { Z17.H, Z18.H, Z19.H }, P3, [X14] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3h { Z21.H, Z22.H, Z23.H }, P0, [X15, #6, MUL VL] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3h { Z2.H, Z3.H, Z4.H }, P3, [X21, X9, LSL #1] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+ st3w { Z9.S, Z10.S, Z11.S }, P3, [X29] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3w { Z11.S, Z12.S, Z13.S }, P4, [X13, #15, MUL VL] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+ st3w { Z19.S, Z20.S, Z21.S }, P2, [X22, X28, LSL #2] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+ st4 { V17.8B, V18.8B, V19.8B, V20.8B }, [X8] // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 6 6 6 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V7.16B, V8.16B, V9.16B, V10.16B }, [X15] // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 12 7 7 0.33 V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V5.4H, V6.4H, V7.4H, V8.4H }, [X13] // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 6 6 6 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V11.8H, V12.8H, V13.8H, V14.8H }, [X1] // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 12 7 7 0.33 V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V15.2S, V16.2S, V17.2S, V18.2S }, [X18] // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 6 6 6 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V21.4S, V22.4S, V23.4S, V24.4S }, [X6] // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 12 7 7 0.33 V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V25.2D, V26.2D, V27.2D, V28.2D }, [X16] // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, D \\ 8 4 4 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ st4 { V16.8B, V17.8B, V18.8B, V19.8B }, [X24], #32 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V2.16B, V3.16B, V4.16B, V5.16B }, [X13], #64 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V17.4H, V18.4H, V19.4H, V20.4H }, [X3], #32 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V18.8H, V19.8H, V20.8H, V21.8H }, [X5], #64 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V26.2S, V27.2S, V28.2S, V29.2S }, [X17], #32 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V21.4S, V22.4S, V23.4S, V24.4S }, [X7], #64 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V27.2D, V28.2D, V29.2D, V30.2D }, [X25], #64 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, D \\ 9 4 4 0.50 V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ st4 { V24.8B, V25.8B, V26.8B, V27.8B }, [X24], X8 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V2.16B, V3.16B, V4.16B, V5.16B }, [X21], X21 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V11.4H, V12.4H, V13.4H, V14.4H }, [X29], X3 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V16.8H, V17.8H, V18.8H, V19.8H }, [X13], X3 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V13.2S, V14.2S, V15.2S, V16.2S }, [X0], X0 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+ st4 { V26.4S, V27.4S, V28.4S, V29.4S }, [X1], X22 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+ st4 { V18.2D, V19.2D, V20.2D, V21.2D }, [X10], X28 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, D \\ 9 4 4 0.50 V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+ st4 { V10.B, V11.B, V12.B, V13.B }[3], [X5] // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 6 6 6 1.00 V1UnitL[3], V1UnitV[3]
+ st4 { V5.H, V6.H, V7.H, V8.H }[4], [X13] // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 6 6 6 1.00 V1UnitL[3], V1UnitV[3]
+ st4 { V22.S, V23.S, V24.S, V25.S }[0], [X7] // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, S \\ 6 6 6 1.00 V1UnitL[3], V1UnitV[3]
+ st4 { V23.D, V24.D, V25.D, V26.D }[1], [X5] // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, D \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+ st4 { V22.B, V23.B, V24.B, V25.B }[0], [X29], #4 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+ st4 { V6.B, V7.B, V8.B, V9.B }[9], [X26], X21 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+ st4 { V19.H, V20.H, V21.H, V22.H }[2], [X18], #8 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+ st4 { V6.H, V7.H, V8.H, V9.H }[4], [X9], X9 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+ st4 { V19.S, V20.S, V21.S, V22.S }[2], [X27], #16 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD store, 4 element, one lane, S \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+ st4 { V22.S, V23.S, V24.S, V25.S }[0], [X29], X21 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, S \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+ st4 { V10.D, V11.D, V12.D, V13.D }[0], [X16], #32 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD store, 4 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V10.D, V11.D, V12.D, V13.D }[0], [X12], X11 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4b { Z22.B, Z23.B, Z24.B, Z25.B }, P0, [X0] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4b { Z1.B, Z2.B, Z3.B, Z4.B }, P7, [X1, #20, MUL VL] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4b { Z28.B, Z29.B, Z30.B, Z31.B }, P4, [X27, X20] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+ st4d { Z19.D, Z20.D, Z21.D, Z22.D }, P1, [X11] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4d { Z0.D, Z1.D, Z2.D, Z3.D }, P6, [X7, #-24, MUL VL] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4d { Z28.D, Z29.D, Z30.D, Z31.D }, P5, [X19, X20, LSL #3] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+ st4h { Z14.H, Z15.H, Z16.H, Z17.H }, P1, [X24] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4h { Z27.H, Z28.H, Z29.H, Z30.H }, P3, [X26, #16, MUL VL] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4h { Z2.H, Z3.H, Z4.H, Z5.H }, P5, [X30, X17, LSL #1] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+ st4w { Z3.S, Z4.S, Z5.S, Z6.S }, P0, [X0] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4w { Z5.S, Z6.S, Z7.S, Z8.S }, P2, [X0, #-20, MUL VL] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+ st4w { Z21.S, Z22.S, Z23.S, Z24.S }, P5, [X5, X18, LSL #2] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+ stlrb W19, [X26] // STLRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlrb W9, [X19, #0] // STLRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlrh W4, [X7] // STLRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlrh W20, [X5, #0] // STLRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlur W3, [X27] // STLUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlur W0, [X15, #-14] // STLUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlur X23, [X25] // STLUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlur X18, [X6, #101] // STLUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlurb W30, [X17] // STLURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlurb W25, [X21, #-8] // STLURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlurh W9, [X29] // STLURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlurh W6, [X27, #-224] // STLURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlxp W26, W11, W12, [X7] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxp W24, W10, W16, [X8, #0] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxp W1, X25, X26, [X10] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxp W10, X7, X20, [X22, #0] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxr W23, W8, [X6] // STLXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxr W29, W28, [X26, #0] // STLXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxr W23, X8, [X7] // STLXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxr W14, X18, [X23, #0] // STLXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxrb W2, W7, [X10] // STLXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxrb W0, W1, [X20, #0] // STLXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxrh W16, W17, [X21] // STLXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxrh W12, W26, [X23, #0] // STLXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stnp S29, S16, [X11] // STNP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp S17, S19, [X27, #-40] // STNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp D4, D3, [X30] // STNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp D25, D31, [X28, #328] // STNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp Q28, Q22, [X3] // STNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp Q17, Q15, [X16, #656] // STNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp W29, W25, [X5] // STNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnp W16, W18, [X27, #-232] // STNP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnp X20, X16, [X8] // STNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnp X6, X20, [X15, #-120] // STNP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnt1b { Z18.B }, P7, [X21] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1b { Z9.B }, P6, [X26, #-7, MUL VL] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1b { Z18.B }, P1, [X1, X20] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1d { Z16.D }, P3, [X3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1d { Z27.D }, P4, [X16, #-6, MUL VL] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1d { Z11.D }, P0, [X18, X22, LSL #3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1h { Z27.H }, P5, [X16] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1h { Z2.H }, P2, [X30, #-8, MUL VL] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1h { Z0.H }, P1, [X7, X1, LSL #1] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+ stnt1w { Z9.S }, P3, [X20] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1w { Z12.S }, P4, [X11, #-6, MUL VL] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1w { Z28.S }, P6, [X6, X0, LSL #2] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stp S10, S19, [X13], #76 // STP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Store vector pair, immed post-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp D19, D20, [X30], #-144 // STP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Store vector pair, immed post-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp Q3, Q17, [X14], #-976 // STP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Store vector pair, immed post-index, Q-form \\ 4 2 2 1.00 V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV, V1UnitV01
+ stp S19, S24, [X27, #-224]! // STP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Store vector pair, immed pre-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp D16, D21, [X28, #168]! // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Store vector pair, immed pre-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp Q10, Q31, [X0, #608]! // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Store vector pair, immed pre-index, Q-form \\ 4 2 2 1.00 V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV, V1UnitV01
+ stp S27, S11, [X30] // STP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp D30, D19, [X25] // STP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp Q25, Q3, [X27] // STP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp S29, S13, [X0, #-44] // STP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp D15, D12, [X20, #-72] // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp Q13, Q16, [X3, #320] // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp W18, W8, [X6], #196 // STP <Wt1>, <Wt2>, [<Xn|SP>], #<imms> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp X10, X17, [X7], #-328 // STP <Xt1>, <Xt2>, [<Xn|SP>], #<immd> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp W4, W3, [X0, #-36]! // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp X14, X13, [X24, #-272]! // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp W27, W30, [X20] // STP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stp X3, X6, [X16] // STP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stp W9, W14, [X10, #-24] // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stp X27, X4, [X14, #-448] // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W14, [X2], #-72 // STR <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ str X28, [X14], #-130 // STR <Xt>, [<Xn|SP>], #<simm> \\ Store register, immed post-index \\ 3 1 1 2.00 V1UnitL01,V1UnitD
+ str W9, [X29, #-227]! // STR <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ str X13, [X5, #233]! // STR <Xt>, [<Xn|SP>, #<simm>]! \\ Store register, immed pre-index \\ 3 1 1 2.00 V1UnitL01,V1UnitD
+ str W2, [X30] // STR <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W28, [X2, #1796] // STR <Wt>, [<Xn|SP>, #<pimm32>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X22, [X29] // STR <Xt>, [<Xn|SP>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X2, [X10, #9472] // STR <Xt>, [<Xn|SP>, #<pimm64>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str B21, [X28], #-62 // STR <Bt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str H13, [X10], #-194 // STR <Ht>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str S14, [X8], #166 // STR <St>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str D24, [X10], #134 // STR <Dt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str Q20, [X30], #-108 // STR <Qt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str B9, [X24, #242]! // STR <Bt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str H0, [X4, #-193]! // STR <Ht>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str S19, [X23, #115]! // STR <St>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str D20, [X2, #-30]! // STR <Dt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str Q24, [X20, #62]! // STR <Qt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str B5, [X11] // STR <Bt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B20, [X23, #2409] // STR <Bt>, [<Xn|SP>, #<pimm8>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H23, [X15] // STR <Ht>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H24, [X6, #492] // STR <Ht>, [<Xn|SP>, #<pimm16>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S25, [X19] // STR <St>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S2, [X14, #984] // STR <St>, [<Xn|SP>, #<pimm32>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D15, [X2] // STR <Dt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D27, [X7, #25704] // STR <Dt>, [<Xn|SP>, #<pimm64>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q13, [X16] // STR <Qt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q4, [X7, #96] // STR <Qt>, [<Xn|SP>, #<pimm128>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str P4, [X5] // STR <Pt>, [<Xn|SP>] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+ str P3, [X21, #-78, MUL VL] // STR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+ str W14, [X9, X17] // STR <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X5, [X0, X22] // STR <Xt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W24, [X21, W29, UXTW] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X27, [X26, W24, UXTW] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W28, [X29, W29, SXTW] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X25, [X1, W24, SXTW] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W2, [X24, X12, SXTX] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X3, [X24, X27, SXTX] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W29, [X30, W30, UXTW #2] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X5, [X13, W8, UXTW #3] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W28, [X7, W24, SXTW #2] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X23, [X2, W26, SXTW #3] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W11, [X8, X30, SXTX #2] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X20, [X4, X2, SXTX #3] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W8, [X11, X10, LSL #2] // STR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X27, [X2, X11, LSL #3] // STR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str B14, [X13, X25] // STR <Bt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B30, [X16, W26, UXTW] // STR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B20, [X19, W3, SXTW] // STR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B13, [X29, X19, SXTX] // STR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H16, [X5, X24] // STR <Ht>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str H15, [X15, W15, UXTW] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str H3, [X6, W15, SXTW] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str H2, [X1, X28, SXTX] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str H30, [X29, W30, UXTW #1] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str H10, [X21, W11, SXTW #1] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str H0, [X15, X9, SXTX #1] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str H13, [X0, X26, LSL #1] // STR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Store vector reg, register offset, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str S2, [X16, X17] // STR <St>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S20, [X24, W10, UXTW] // STR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S4, [X9, W14, SXTW] // STR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S3, [X23, X26, SXTX] // STR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S27, [X17, W9, UXTW #2] // STR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S0, [X11, W20, SXTW #2] // STR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S20, [X17, X14, SXTX #2] // STR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S0, [X15, X28, LSL #2] // STR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D5, [X26, X6] // STR <Dt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D11, [X9, W5, UXTW] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D16, [X20, W8, SXTW] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D0, [X12, X9, SXTX] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D4, [X21, W25, UXTW #3] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D28, [X20, W4, SXTW #3] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D20, [X13, X23, SXTX #3] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D31, [X19, X28, LSL #3] // STR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q13, [X24, X1] // STR <Qt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str Q1, [X25, W9, UXTW] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str Q25, [X20, W15, SXTW] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str Q25, [X0, X15, SXTX] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+ str Q6, [X13, W0, UXTW #4] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Q27, [X4, W15, SXTW #4] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Q3, [X23, X0, SXTX #4] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Q27, [X1, X28, LSL #4] // STR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Store vector reg, register offset, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Z3, [X0] // STR <Zt>, [<Xn|SP>] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ str Z8, [X6, #188, MUL VL] // STR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ strb W23, [X11], #34 // STRB <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strb W5, [X19, #-175]! // STRB <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strb W18, [X30] // STRB <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W12, [X9, #2315] // STRB <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W5, [X26, W7, UXTW] // STRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W18, [X2, W28, SXTW] // STRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W21, [X21, X7, SXTX] // STRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W9, [X6, X21] // STRB <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W21, [X8], #192 // STRH <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strh W8, [X26, #-204]! // STRH <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strh W6, [X7] // STRH <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W0, [X19, #7514] // STRH <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W12, [X0, X11] // STRH <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W5, [X18, W8, UXTW] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W28, [X29, W0, SXTW] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W7, [X17, X0, SXTX] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W7, [X2, W14, UXTW #1] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+ strh W7, [X16, W29, SXTW #1] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+ strh W5, [X1, X13, SXTX #1] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store register, register offset, extend, scale by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+ strh W14, [X28, X2, LSL #1] // STRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Store register, register offset, scaled by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+ sttr W17, [X20] // STTR <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttr W14, [X30, #-35] // STTR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttr X10, [X16] // STTR <Xt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttr X16, [X8, #-25] // STTR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrb W13, [X2] // STTRB <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrb W0, [X20, #-114] // STTRB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrh W26, [X11] // STTRH <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrh W11, [X30, #-78] // STTRH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur B29, [X8] // STUR <Bt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur B5, [X0, #80] // STUR <Bt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur H10, [X15] // STUR <Ht>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur H10, [X12, #-227] // STUR <Ht>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur S10, [X4] // STUR <St>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur S9, [X14, #21] // STUR <St>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur D1, [X28] // STUR <Dt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur D6, [X6, #188] // STUR <Dt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur Q6, [X16] // STUR <Qt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur Q5, [X13, #-253] // STUR <Qt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur W29, [X27] // STUR <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur W14, [X2, #-34] // STUR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur X29, [X10] // STUR <Xt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur X30, [X25, #127] // STUR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturb W21, [X5] // STURB <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturb W25, [X26, #-117] // STURB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturh W0, [X11] // STURH <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturh W7, [X10, #-209] // STURH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stxp W29, W24, W6, [X9] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxp W26, W19, W22, [X11, #0] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxp W30, X6, X3, [X1] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxp W7, X2, X10, [X25, #0] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxr W19, W21, [X9] // STXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxr W25, W1, [X24, #0] // STXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxr W25, X30, [X28] // STXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxr W30, X20, [X23, #0] // STXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxrb W0, W26, [X10] // STXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxrb W10, W16, [X25, #0] // STXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxrh W0, W20, [X8] // STXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxrh W12, W14, [X1, #0] // STXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ sub W13, WSP, W10 // SUB <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ sub W22, WSP, W13, UXTB // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ sub W18, WSP, W23, SXTB #1 // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ sub W13, WSP, W8, LSL #4 // SUB <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+ sub X6, X8, X22 // SUB <Xd>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X16, X2, W19, UXTB // SUB <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 2 2 2.00 V1UnitI
+ sub X16, X3, W27, UXTB #2 // SUB <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+ sub X4, X13, X16, LSL #3 // SUB <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ sub WSP, WSP, #50 // SUB <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub WSP, WSP, #84, LSL #12 // SUB <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X18, X22, #36 // SUB <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X17, X20, #184, LSL #0 // SUB <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub Z18.B, Z18.B, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z22.S, Z22.S, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z15.H, Z15.H, #196, LSL #8 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub W0, W21, W2, LSL #4 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ sub W22, W7, W13, LSL #19 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ sub W1, W18, W16, ASR #4 // SUB <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ sub X27, X29, X16, LSL #1 // SUB <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ sub X24, X10, X15, LSL #35 // SUB <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ sub X24, X19, X13, LSR #20 // SUB <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ sub D18, D25, D0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sub V15.2S, V14.2S, V11.2S // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sub Z18.H, P4/M, Z18.H, Z7.H // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z29.B, Z19.B, Z8.B // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subhn V7.4H, V10.4S, V13.4S // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ subhn2 V24.4S, V24.2D, V8.2D // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ subr Z13.B, Z13.B, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z17.S, Z17.S, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z15.D, Z15.D, #100, LSL #0 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z21.D, P7/M, Z21.D, Z24.D // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subs W25, WSP, W13 // SUBS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ subs W10, WSP, W9, UXTH // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ subs W20, WSP, W3, SXTH #2 // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ subs W12, WSP, W27, LSL #4 // SUBS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ subs X16, X20, X21 // SUBS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs X15, X2, W11, UXTB // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs X13, X15, X14, SXTX #1 // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ subs X30, X1, X26, LSL #3 // SUBS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs W25, WSP, #239 // SUBS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs W13, WSP, #75, LSL #12 // SUBS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ subs X9, X3, #173 // SUBS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs X30, X25, #82, LSL #12 // SUBS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+ subs W16, W27, W25 // SUBS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs W0, W30, W27, LSL #4 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs W17, W27, W3, LSL #20 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ subs W27, W7, W27, ASR #5 // SUBS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ subs X21, X22, X17 // SUBS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs X18, X1, X5, LSL #0 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ subs X28, X26, X4, LSL #49 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ subs X26, X14, X30, LSR #35 // SUBS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ sudot V4.2S, V20.8B, V18.4B[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+ sudot Z5.S, Z30.B, Z3.B[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+ sunpkhi Z22.D, Z16.S // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ sunpklo Z10.H, Z0.B // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ suqadd B15, B21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ suqadd V26.16B, V27.16B // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ svc #35275 // SVC #<imm> \\ No description \\ No scheduling info
+ sxtb W7, W20 // SXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxtb X18, W14 // SXTB <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxtb Z16.H, P5/M, Z15.H // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxth Z4.S, P7/M, Z11.S // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxtw Z12.D, P1/M, Z16.D // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxth W23, W2 // SXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxth X22, W17 // SXTH <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxtl V4.8H, V21.8B // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sxtl2 V20.2D, V30.4S // SXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sxtw X18, W22 // SXTW <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sys #6, C6, C0, #3 // SYS #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+ sys #7, C12, C5, #3, X8 // SYS #<op1>, <Cn>, <Cm>, #<op2>, <Xt> \\ No description \\ No scheduling info
+ sysl X16, #5, C11, C8, #5 // SYSL <Xt>, #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+ tbl V7.8B, { V2.16B, V3.16B }, V17.8B // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 2 2 2 1.00 V1UnitV[2], V1UnitV01[2]
+ tbl V3.16B, { V10.16B, V11.16B, V12.16B }, V29.16B // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 3 table regs \\ 2 4 4 1.00 V1UnitV01[2]
+ tbl V9.8B, { V22.16B, V23.16B, V24.16B, V25.16B }, V14.8B // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 4 table regs \\ 3 4 4 0.67 V1UnitV01[3]
+ tbl V29.16B, { V3.16B }, V17.16B // TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 2 2 2 1.00 V1UnitV[2], V1UnitV01[2]
+ tbnz W3, #28, test // TBNZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tbnz X30, #48, test // TBNZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tbx V25.8B, { V13.16B, V14.16B }, V30.8B // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 2 table reg \\ 2 4 4 1.00 V1UnitV01[2]
+ tbx V22.16B, { V3.16B, V4.16B, V5.16B }, V25.16B // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 3 table reg \\ 3 6 6 0.67 V1UnitV01[3]
+ tbx V23.16B, { V0.16B, V1.16B, V2.16B, V3.16B }, V26.16B // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 4 table reg \\ 5 6 6 0.40 V1UnitV[5], V1UnitV01[5]
+ tbx V16.8B, { V21.16B }, V18.8B // TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 1 table reg \\ 2 2 2 1.00 V1UnitV[2], V1UnitV01[2]
+ tbz W17, #16, test // TBZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tbz X22, #41, test // TBZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tlbi VMALLE1 // TLBI <tlbi_op> \\ No description \\ No scheduling info
+ tlbi IPAS2E1IS, X7 // TLBI <tlbi_op2>, <Xt> \\ No description \\ No scheduling info
+ trn1 V30.2S, V21.2S, V25.2S // TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+ trn1 P1.S, P4.S, P0.S // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+ trn2 P0.H, P5.H, P7.H // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+ trn2 V27.2D, V29.2D, V10.2D // TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+ tst W25, #0xe00 // TST <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ tst X3, #0x1e00 // TST <Xn>, #<immd> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+ tst W9, W14 // TST <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ tst W10, W3, ASR #16 // TST <Wn>, <Wm>, <shift> #<wamount> \\ Test/Compare, shift by immed \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ tst X11, X28 // TST <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+ tst X9, X7, ASR #33 // TST <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+ uaba V13.16B, V14.16B, V19.16B // UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+ uabal V13.2D, V16.2S, V11.2S // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ uabal2 V17.4S, V0.8H, V1.8H // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ uabd V23.4S, V4.4S, V30.4S // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+ uabd Z5.B, P5/M, Z5.B, Z10.B // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uabdl V13.4S, V26.4H, V7.4H // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ uabdl2 V15.2D, V9.4S, V10.4S // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ uadalp V31.1D, V14.2S // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+ uaddl V29.8H, V8.8B, V31.8B // UADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uaddl2 V15.4S, V22.8H, V14.8H // UADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uaddlp V15.1D, V5.2S // UADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ uaddlv H24, V24.8B // UADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ uaddlv H19, V31.16B // UADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ uaddlv S12, V24.4H // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uaddlv S30, V0.8H // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ uaddlv D6, V19.4S // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uaddv D9, P5, Z1.B // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+ uaddv D26, P0, Z25.H // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+ uaddv D4, P1, Z1.S // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+ uaddv D28, P6, Z6.D // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+ uaddw V17.2D, V9.2D, V12.2S // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uaddw2 V15.4S, V13.4S, V4.8H // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ubfiz W11, W6, #30, #1 // UBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+ ubfiz X27, X15, #49, #9 // UBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+ ubfm W19, W16, #25, #24 // UBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ ubfm X4, X30, #59, #50 // UBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ ubfx W13, W18, #25, #3 // UBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ ubfx X23, X26, #59, #5 // UBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ ucvtf H8, W24, #16 // UCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S7, W16, #29 // UCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D5, W17, #23 // UCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H13, X17, #12 // UCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S25, X2, #37 // UCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D20, X11, #43 // UCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H30, W4 // UCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S22, W8 // UCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D8, W15 // UCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H17, X12 // UCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S8, X0 // UCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D22, X17 // UCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H22, H16, #11 // UCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ ucvtf S17, S18, #18 // UCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ ucvtf D19, D1, #2 // UCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V18.4H, V11.4H, #7 // UCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ ucvtf V22.8H, V20.8H, #10 // UCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ ucvtf V16.2S, V17.2S, #11 // UCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V17.4S, V23.4S, #2 // UCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ ucvtf V18.2D, V20.2D, #60 // UCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf H7, H21 // UCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ ucvtf S25, S7 // UCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+ ucvtf D30, D29 // UCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V9.4H, V25.4H // UCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ ucvtf V24.8H, V31.8H // UCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+ ucvtf V14.2S, V2.2S // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V20.4S, V0.4S // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+ ucvtf V27.2D, V3.2D // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf Z31.H, P5/M, Z30.H // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 4 6 6 0.25 V1UnitV0[4]
+ ucvtf Z23.H, P7/M, Z9.S // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+ ucvtf Z1.S, P1/M, Z10.S // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+ ucvtf Z24.D, P5/M, Z9.S // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+ ucvtf Z30.H, P2/M, Z24.D // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ ucvtf Z9.S, P5/M, Z9.D // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ ucvtf Z18.D, P6/M, Z19.D // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ udiv W12, W17, W22 // UDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[13]
+ udiv X7, X2, X23 // UDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[21]
+ udiv Z30.S, P5/M, Z30.S, Z10.S // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+ udiv Z31.D, P5/M, Z31.D, Z29.D // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+ udivr Z19.S, P4/M, Z19.S, Z8.S // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+ udivr Z3.D, P5/M, Z3.D, Z8.D // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+ udot Z0.S, Z5.B, Z4.B[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ udot Z19.D, Z1.H, Z13.H[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ udot Z22.S, Z29.B, Z4.B // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ udot Z9.D, Z1.H, Z11.H // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ udot V10.2S, V11.8B, V21.4B[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ udot V7.4S, V21.16B, V6.4B[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ udot V19.2S, V31.8B, V17.8B // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ uhadd V10.8H, V7.8H, V7.8H // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uhsub V12.4H, V16.4H, V28.4H // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ umaddl X9, W28, W9, X19 // UMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ umax Z8.B, Z8.B, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umax Z27.B, P1/M, Z27.B, Z13.B // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umax V7.16B, V11.16B, V7.16B // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ umaxp V15.8H, V8.8H, V12.8H // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ umaxv B19, V7.8B // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ umaxv B12, V10.16B // UMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ umaxv H27, V5.4H // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ umaxv H11, V22.8H // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ umaxv S5, V25.4S // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ umaxv B9, P7, Z19.B // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+ umaxv H8, P7, Z26.H // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+ umaxv S15, P2, Z28.S // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+ umaxv D11, P4, Z11.D // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+ umin Z21.S, Z21.S, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umin Z31.S, P2/M, Z31.S, Z4.S // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umin V0.16B, V26.16B, V2.16B // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ uminp V28.4S, V16.4S, V15.4S // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ uminv B23, V21.8B // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ uminv B3, V10.16B // UMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+ uminv H6, V22.4H // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uminv H23, V3.8H // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+ uminv S29, V19.4S // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uminv B2, P5, Z8.B // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+ uminv H28, P0, Z0.H // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+ uminv S10, P1, Z29.S // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+ uminv D24, P5, Z29.D // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+ umlal V22.4S, V14.4H, V0.H[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal V28.2D, V31.2S, V0.S[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal2 V31.4S, V7.8H, V15.H[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal2 V10.2D, V4.4S, V3.S[2] // UMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal V29.4S, V20.4H, V30.4H // UMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal2 V10.2D, V28.4S, V19.4S // UMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl V21.4S, V12.4H, V7.H[5] // UMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl V20.2D, V20.2S, V2.S[0] // UMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl2 V27.4S, V28.8H, V6.H[4] // UMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl2 V30.2D, V23.4S, V1.S[2] // UMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl V11.2D, V23.2S, V1.2S // UMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl2 V11.8H, V20.16B, V2.16B // UMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ ummla V14.4S, V17.16B, V25.16B // UMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+ umnegl X23, W5, W23 // UMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ umov W6, V22.B[0] // UMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov W29, V0.B[11] // UMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov W10, V25.H[0] // UMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov W6, V7.H[3] // UMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov W8, V8.S[0] // UMOV <Wd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov W20, V1.S[3] // UMOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov X20, V11.D[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umov X29, V7.D[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+ umsubl X21, W16, W28, X6 // UMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ umulh Z20.B, P4/M, Z20.B, Z6.B // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ umulh Z30.H, P6/M, Z30.H, Z15.H // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ umulh Z11.S, P7/M, Z11.S, Z8.S // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ umulh Z3.D, P3/M, Z3.D, Z2.D // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+ umulh X23, X22, X19 // UMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+ umull X5, W17, W23 // UMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ umull V27.4S, V1.4H, V8.H[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull V22.2D, V28.2S, V6.S[1] // UMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull2 V18.4S, V26.8H, V10.H[1] // UMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull2 V28.2D, V21.4S, V1.S[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull V23.4S, V26.4H, V19.4H // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull2 V11.8H, V29.16B, V29.16B // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ uqadd Z18.B, Z18.B, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z2.S, Z2.S, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z24.S, Z24.S, #56, LSL #0 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z6.H, Z28.H, Z5.H // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd S0, S24, S30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqadd V14.2D, V22.2D, V20.2D // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqdecb W10 // UQDECB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb W8, VL3 // UQDECB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb W3, VL32, MUL #1 // UQDECB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb X8 // UQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb X3, VL5 // UQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb X22, MUL3, MUL #2 // UQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd W11 // UQDECD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd W27, VL256 // UQDECD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd W6, VL32, MUL #10 // UQDECD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd X1 // UQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd X12, VL8 // UQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd X10, VL64, MUL #10 // UQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd Z0.D // UQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdecd Z8.D, VL3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdecd Z27.D, VL16, MUL #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdech W30 // UQDECH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech W28, MUL3 // UQDECH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech W5, VL5, MUL #8 // UQDECH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech X2 // UQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech X15, VL7 // UQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech X17, VL256, MUL #10 // UQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech Z5.H // UQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdech Z16.H, VL128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdech Z27.H, VL128, MUL #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdecp W19, P5.H // UQDECP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqdecp X1, P1.B // UQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqdecp Z20.S, P0 // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+ uqdecw W17 // UQDECW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw W11, VL256 // UQDECW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw W13, MUL4, MUL #13 // UQDECW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw X7 // UQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw X28, VL32 // UQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw X0, VL256, MUL #3 // UQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw Z29.S // UQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdecw Z22.S, VL2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqdecw Z20.S, VL2, MUL #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqincb W2 // UQINCB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb W21, VL128 // UQINCB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb W0, ALL, MUL #13 // UQINCB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb X24 // UQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb X18, VL7 // UQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb X13, VL256, MUL #13 // UQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd W23 // UQINCD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd W27, VL4 // UQINCD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd W7, VL32, MUL #16 // UQINCD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd X0 // UQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd X29, MUL4 // UQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd X20, POW2, MUL #3 // UQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd Z29.D // UQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqincd Z4.D, VL64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqincd Z12.D, VL6, MUL #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqinch W4 // UQINCH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch W23, MUL3 // UQINCH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch W27, VL7, MUL #3 // UQINCH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch X8 // UQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch X13, MUL3 // UQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch X5, MUL4, MUL #9 // UQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch Z21.H // UQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqinch Z1.H, VL8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqinch Z7.H, VL7, MUL #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqincp W4, P5.D // UQINCP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqincp X13, P5.D // UQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqincp Z1.S, P0 // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+ uqincw W13 // UQINCW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw W26, VL8 // UQINCW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw W3, VL16, MUL #13 // UQINCW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw X26 // UQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw X13, VL256 // UQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw X29, VL7, MUL #6 // UQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw Z26.S // UQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqincw Z31.S, VL5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqincw Z12.S, VL7, MUL #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+ uqrshl S17, S5, S8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshl V25.8B, V13.8B, V23.8B // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn B12, H9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ uqrshrn H1, S28, #2 // UQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ uqrshrn S1, D4, #12 // UQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ uqrshrn V17.8B, V24.8H, #4 // UQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn V29.4H, V25.4S, #10 // UQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn V16.2S, V0.2D, #10 // UQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn2 V5.16B, V28.8H, #6 // UQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn2 V28.8H, V22.4S, #15 // UQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn2 V20.4S, V13.2D, #4 // UQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl B16, B25, #3 // UQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl H22, H27, #3 // UQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl S9, S5, #2 // UQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl D25, D1, #30 // UQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V25.16B, V0.16B, #7 // UQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V1.4H, V12.4H, #15 // UQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V23.2S, V4.2S, #17 // UQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V28.2D, V23.2D, #48 // UQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl B22, B26, B2 // UQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V8.4H, V17.4H, V13.4H // UQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn B16, H27, #6 // UQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ uqshrn H4, S2, #15 // UQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ uqshrn S0, D15, #22 // UQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ uqshrn V19.8B, V26.8H, #3 // UQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn V31.4H, V17.4S, #8 // UQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn V1.2S, V11.2D, #9 // UQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn2 V23.16B, V16.8H, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn2 V1.8H, V12.4S, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn2 V30.4S, V29.2D, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqsub Z26.B, Z26.B, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z19.S, Z19.S, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z15.H, Z15.H, #104, LSL #8 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z25.D, Z13.D, Z19.D // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub S16, S21, S6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqsub V19.4S, V0.4S, V5.4S // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqxtn S3, D27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ uqxtn V26.2S, V5.2D // UQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ uqxtn2 V15.16B, V22.8H // UQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ urecpe V10.2S, V8.2S // URECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+ urecpe V1.4S, V23.4S // URECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ urhadd V16.2S, V19.2S, V2.2S // URHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ urshl D24, D22, D29 // URSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ urshl V31.8B, V5.8B, V3.8B // URSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr D23, D19, #62 // URSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+ urshr V23.16B, V14.16B, #2 // URSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V16.4H, V13.4H, #7 // URSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V10.4S, V10.4S, #21 // URSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V2.2D, V16.2D, #30 // URSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ ursqrte V15.2S, V20.2S // URSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+ ursqrte V31.4S, V14.4S // URSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+ ursra D24, D24, #48 // URSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V14.8B, V18.8B, #1 // URSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V9.4H, V9.4H, #16 // URSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V25.2S, V17.2S, #9 // URSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V17.2D, V16.2D, #61 // URSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usdot V0.2S, V18.8B, V10.4B[3] // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+ usdot Z5.S, Z25.B, Z2.B[1] // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+ usdot V17.2S, V0.8B, V29.8B // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+ usdot Z8.S, Z6.B, Z18.B // USDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+ ushl D7, D17, D3 // USHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ ushl V6.8B, V26.8B, V6.8B // USHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll V18.8H, V24.8B, #4 // USHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll V12.4S, V10.4H, #3 // USHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll V16.2D, V16.2S, #31 // USHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll2 V14.8H, V3.16B, #3 // USHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll2 V18.4S, V22.8H, #13 // USHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll2 V31.2D, V12.4S, #11 // USHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr D23, D22, #58 // USHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 4.00 V1UnitV
+ ushr V24.8B, V0.8B, #2 // USHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V21.8H, V31.8H, #11 // USHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V27.2S, V24.2S, #14 // USHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V0.2D, V27.2D, #48 // USHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ usmmla V25.4S, V10.16B, V11.16B // USMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+ usqadd H14, H13 // USQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ usqadd V18.2D, V23.2D // USQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ usra D22, D24, #9 // USRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V16.16B, V5.16B, #5 // USRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V18.4H, V22.4H, #11 // USRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V13.2S, V12.2S, #24 // USRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V30.2D, V30.2D, #41 // USRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usubl V22.4S, V18.4H, V3.4H // USUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ usubl2 V12.8H, V23.16B, V15.16B // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ usubw V30.8H, V12.8H, V20.8B // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ usubw2 V2.4S, V0.4S, V30.8H // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uunpkhi Z26.D, Z26.S // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ uunpklo Z10.S, Z11.H // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ uxtb W2, W23 // UXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ uxtb Z1.D, P2/M, Z11.D // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxth Z6.S, P3/M, Z18.S // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxtw Z23.D, P4/M, Z3.D // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxth W7, W14 // UXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ uxtl V1.4S, V22.4H // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ uxtl2 V14.8H, V3.16B // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ uzp1 V9.2S, V29.2S, V20.2S // UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+ uzp1 P5.D, P3.D, P5.D // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ uzp2 P6.S, P0.S, P6.S // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ uzp2 V18.4S, V12.4S, V31.4S // UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+ wfe // WFE \\ No description \\ No scheduling info
+ wfi // WFI \\ No description \\ No scheduling info
+ whilele P6.H, X28, X30 // WHILELE <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+ whilelo P3.B, X9, X7 // WHILELO <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+ whilels P4.B, W4, W20 // WHILELS <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+ whilelt P7.S, X20, X6 // WHILELT <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+ wrffr P7.B // WRFFR <Pn>.B \\ Write to first fault register \\ 1 2 2 1.0 V1UnitM0
+ xtn V20.8B, V17.8H // XTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+ xtn2 V31.16B, V26.8H // XTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+ yield // YIELD \\ No description \\ No scheduling info
+ zip1 V21.2D, V4.2D, V11.2D // ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+ zip1 P0.D, P1.D, P4.D // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ zip2 P3.S, P5.S, P4.S // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ zip2 V2.4S, V20.4S, V5.4S // ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+
+.Lfunc_end0:
+ .size test, .Lfunc_end0-test
+ .cfi_endproc
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 376600
+# CHECK-NEXT: Total Cycles: 254716
+# CHECK-NEXT: Total uOps: 755000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.48
+# CHECK-NEXT: Block RThroughput: 606.5
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - V1UnitB:2
+# CHECK-NEXT: [1] - V1UnitD:2
+# CHECK-NEXT: [2] - V1UnitFlg:3
+# CHECK-NEXT: [3] - V1UnitI:4 V1UnitS, V1UnitS, V1UnitM0, V1UnitM1
+# CHECK-NEXT: [4] - V1UnitL:3 V1UnitL01, V1UnitL01, V1UnitL2
+# CHECK-NEXT: [5] - V1UnitL2:1
+# CHECK-NEXT: [6] - V1UnitL01:2
+# CHECK-NEXT: [7] - V1UnitM:2 V1UnitM0, V1UnitM1
+# CHECK-NEXT: [8] - V1UnitM0:1
+# CHECK-NEXT: [9] - V1UnitM1:1
+# CHECK-NEXT: [10] - V1UnitS:2
+# CHECK-NEXT: [11] - V1UnitV:4 V1UnitV0, V1UnitV1, V1UnitV2, V1UnitV3
+# CHECK-NEXT: [12] - V1UnitV0:1
+# CHECK-NEXT: [13] - V1UnitV1:1
+# CHECK-NEXT: [14] - V1UnitV2:1
+# CHECK-NEXT: [15] - V1UnitV3:1
+# CHECK-NEXT: [16] - V1UnitV01:2 V1UnitV0, V1UnitV1
+# CHECK-NEXT: [17] - V1UnitV02:2 V1UnitV0, V1UnitV2
+# CHECK-NEXT: [18] - V1UnitV13:2 V1UnitV1, V1UnitV3
+
+# CHECK: Scheduling Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: Bypass Latency
+# CHECK-NEXT: [4]: Throughput
+# CHECK-NEXT: [5]: Resources
+# CHECK-NEXT: [6]: LLVM OpcodeName
+# CHECK-NEXT: [7]: Instruction
+# CHECK-NEXT: [8]: Comment if any
+# CHECK-NEXT: [1] [2] [3] [4] [5] [6] [7] [8]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv1i64 | abs d15, d11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv2i32 | abs v25.2s, v25.2s // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ABS_ZPmZ_B | abs z26.b, p6/m, z27.b // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADCWr | adc w13, w6, w4 // ADC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADCXr | adc x8, x12, x10 // ADC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADCSWr | adcs w29, w7, w30 // ADCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADCSXr | adcs x11, x3, x5 // ADCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrx | add wsp, wsp, w10 // ADD <Wd|WSP>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrx | add wsp, wsp, w2, uxtb // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrx | add wsp, wsp, w13, uxth #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrx | add wsp, wsp, w13, lsl #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x22, x2, x27 // ADD <Xd|SP>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrx | add x25, x9, w25, uxtb // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrx | add x4, x28, w3, uxtb #3 // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x0, x28, x26, lsl #3 // ADD <Xd|SP>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | add wsp, wsp, #3765 // ADD <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | add wsp, wsp, #3547, lsl #12 // ADD <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXri | add x7, x30, #803 // ADD <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXri | add x7, x2, #319, lsl #12 // ADD <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZI_D | add z13.d, z13.d, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZI_D | add z16.d, z16.d, #59648 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrs | add w3, w2, w21, lsl #3 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrs | add w6, w21, w17, lsl #15 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrs | add w28, w30, w19, asr #30 // ADD <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x8, x3, x28, lsl #3 // ADD <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrs | add x12, x13, x0, lsl #44 // ADD <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrs | add x5, x20, x28, lsr #16 // ADD <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDv1i64 | add d0, d23, d21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDv4i32 | add v19.4s, v24.4s, v15.4s // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZPmZ_D | add z29.d, p5/m, z29.d, z29.d // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZZZ_H | add z10.h, z22.h, z13.h // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDHNv4i32_v4i16 | addhn v26.4h, v5.4s, v9.4s // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDHNv8i16_v16i8 | addhn2 v1.16b, v19.8h, v6.8h // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDPv2i64p | addp d1, v14.2d // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDPv2i32 | addp v7.2s, v1.2s, v2.2s // ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ADDPL_XXI | addpl x27, x6, #-6 // ADDPL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrx | adds w17, wsp, w25 // ADDS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrx | adds w6, wsp, w15, uxth // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w22, wsp, w30, uxtb #2 // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrx | adds w12, wsp, w29, lsl #4 // ADDS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x14, x0, x10 // ADDS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrx | adds x13, x23, w8, uxtb // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrx | adds x4, x26, w28, uxtb #1 // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x10, x3, x29, lsl #2 // ADDS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWri | adds w23, wsp, #502 // ADDS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWri | adds w2, wsp, #2980, lsl #12 // ADDS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXri | adds x12, x4, #1345 // ADDS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXri | adds x25, x18, #3037, lsl #12 // ADDS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | adds w12, w13, w26 // ADDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | adds w0, w23, w20 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | adds w13, w16, w12, lsl #28 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | adds w20, w19, w16, asr #0 // ADDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x23, x12, x4 // ADDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x0, x13, x4, lsl #2 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | adds x4, x7, x6, lsl #31 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | adds x9, x8, x9, asr #41 // ADDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | ADDVv8i8v | addv b0, v28.8b // ADDV B<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | ADDVv16i8v | addv b1, v26.16b // ADDV B<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | ADDVv4i16v | addv h18, v13.4h // ADDV H<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | ADDVv8i16v | addv h29, v17.8h // ADDV H<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | ADDVv4i32v | addv s22, v18.4s // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ADDVL_XXI | addvl x1, x27, #-8 // ADDVL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADR | adr x3, test // ADR <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_LSL_ZZZ_D_0 | adr z26.d, [z1.d, z8.d] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_LSL_ZZZ_S_2 | adr z22.s, [z28.s, z8.s, lsl #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_SXTW_ZZZ_D_0 | adr z11.d, [z2.d, z29.d, sxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_SXTW_ZZZ_D_2 | adr z3.d, [z9.d, z9.d, sxtw #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_UXTW_ZZZ_D_0 | adr z6.d, [z7.d, z13.d, uxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_UXTW_ZZZ_D_1 | adr z4.d, [z24.d, z22.d, uxtw #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADRP | adrp x0, test // ADRP <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWri | and wsp, w16, #0xe00 // AND <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXri | and x2, x22, #0x1e00 // AND <Xd|SP>, <Xn>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z1.b, z1.b, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.h, z7.h, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.s, z7.s, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.d, z7.d, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | AND_PPzPP | and p5.b, p1/z, p6.b, p4.b // AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWrs | and w11, w14, w24 // AND <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWrs | and w2, w21, w22, lsr #25 // AND <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXrs | and x1, x20, x29 // AND <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXrs | and x8, x11, x22, asr #56 // AND <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ANDv8i8 | and v29.8b, v26.8b, v26.8b // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZPmZ_D | and z17.d, p6/m, z17.d, z12.d // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZZZ | and z9.d, z5.d, z17.d // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWri | ands w14, w8, #0x70 // ANDS <Wd>, <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXri | ands x4, x10, #0x60 // ANDS <Xd>, <Xn>, #<immd> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | ands w29, w28, w12 // ANDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | ands w7, w13, w23, asr #3 // ANDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | ands x21, x9, x6 // ANDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | ands x10, x27, x7, asr #20 // ANDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ANDS_PPzPP | ands p5.b, p1/z, p2.b, p7.b // ANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | ANDV_VPZ_H | andv h7, p6, z31.h // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 4 12 12 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | asr w30, w14, #5 // ASR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | asr x12, x21, #28 // ASR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_B | asr z7.b, p5/m, z7.b, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_H | asr z6.h, p6/m, z6.h, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_S | asr z28.s, p0/m, z28.s, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_D | asr z26.d, p5/m, z26.d, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_B | asr z10.b, z14.b, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_H | asr z23.h, z18.h, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_S | asr z29.s, z11.s, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_D | asr z20.d, z26.d, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVWr | asr w3, w0, w20 // ASR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVXr | asr x7, x5, x21 // ASR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmZ_S | asr z3.s, p0/m, z3.s, z10.s // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_WIDE_ZPmZ_S | asr z9.s, p2/m, z9.s, z8.d // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_WIDE_ZZZ_S | asr z26.s, z21.s, z21.d // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_B | asrd z6.b, p4/m, z6.b, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_H | asrd z19.h, p3/m, z19.h, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_S | asrd z16.s, p3/m, z16.s, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_D | asrd z9.d, p6/m, z9.d, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRR_ZPmZ_B | asrr z0.b, p0/m, z0.b, z19.b // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVWr | asr w24, w28, w13 // ASRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVXr | asr x3, x21, x24 // ASRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | at s12e1r, x28 // AT <at_op>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | B | b test // B <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.eq test // B.eq <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.eq test // B.none <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ne test // B.ne <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ne test // B.any <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hs test // B.cs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hs test // B.hs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hs test // B.nlast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lo test // B.cc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lo test // B.lo <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lo test // B.last <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.mi test // B.mi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.mi test // B.first <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.pl test // B.pl <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.pl test // B.nfrst <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.vs test // B.vs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.vc test // B.vc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hi test // B.hi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hi test // B.pmore <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ls test // B.ls <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ls test // B.plast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ge test // B.ge <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ge test // B.tcont <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lt test // B.lt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lt test // B.tstop <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.gt test // B.gt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.le test // B.le <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.al test // B.al <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.nv test // B.nv <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | BFCVT | bfcvt h6, s20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | BFCVT_ZPmZ | bfcvt z16.h, p6/m, z1.s // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | BFCVTN | bfcvtn v12.4h, v15.4s // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | BFCVTN2 | bfcvtn2 v15.8h, v13.4s // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | BFCVTNT_ZPmZ | bfcvtnt z11.h, p7/m, z24.s // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BF16DOTlanev4bf16 | bfdot v0.2s, v24.4h, v14.2h[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFDOT_ZZI | bfdot z24.s, z26.h, z2.h[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFDOTv8bf16 | bfdot v31.4s, v21.8h, v14.8h // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFDOT_ZZZ | bfdot z15.s, z3.h, z7.h // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfi w10, w26, #31, #1 // BFI <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfi x25, x7, #8, #1 // BFI <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfi w30, w26, #18, #13 // BFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfxil x15, x20, #0, #36 // BFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALB_ZZZI | bfmlalb z13.s, z30.h, z0.h[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALB_ZZZ | bfmlalb z3.s, z14.h, z13.h // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALBIdx | bfmlalb v22.4s, v11.8h, v11.h[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALTIdx | bfmlalt v17.4s, v4.8h, v11.h[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALB | bfmlalb v13.4s, v5.8h, v17.8h // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALT | bfmlalt v10.4s, v16.8h, v1.8h // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALT_ZZZI | bfmlalt z23.s, z3.h, z2.h[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALT_ZZZ | bfmlalt z25.s, z21.h, z22.h // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 3 | 4.00 | V1UnitV | BFMMLA | bfmmla v15.4s, v28.8h, v23.8h // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 5 | 3 | 2.00 | V1UnitV, V1UnitV01 | BFMMLA_ZZZ | bfmmla z26.s, z2.h, z12.h // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 1 5 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfxil w27, w23, #14, #14 // BFXIL <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfxil x0, x5, #11, #22 // BFXIL <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z28.b, z28.b, #0x8f // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z18.h, z18.h, #0xff9f // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z12.s, z12.s, #0xfffffffd // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z6.d, z6.d, #0xfffffffffffffffb // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BIC_PPzPP | bic p4.b, p4/z, p6.b, p0.b // BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICWrs | bic w0, w26, w22 // BIC <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICWrs | bic w23, w10, w7, lsl #11 // BIC <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICXrs | bic x21, x20, x14 // BIC <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICXrs | bic x21, x3, x17, lsr #35 // BIC <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv4i16 | bic v6.4h, #217 // BIC <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv8i16 | bic v23.8h, #101 // BIC <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv2i32 | bic v24.2s, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv2i32 | bic v31.2s, #192 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv16i8 | bic v25.16b, v10.16b, v9.16b // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | BIC_ZPmZ_D | bic z15.d, p4/m, z15.d, z25.d // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | BIC_ZZZ | bic z7.d, z8.d, z28.d // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSWrs | bics w24, w1, w25 // BICS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSWrs | bics w21, w0, w24, lsl #11 // BICS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSXrs | bics x27, x25, x10 // BICS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSXrs | bics x22, x6, x27, lsl #62 // BICS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BICS_PPzPP | bics p2.b, p4/z, p1.b, p7.b // BICS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BIFv8i8 | bif v0.8b, v25.8b, v4.8b // BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BITv8i8 | bit v5.8b, v12.8b, v22.8b // BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitB, V1UnitI, V1UnitS | BL | bl test // BL <label> \\ Branch and link, immed \\ 2 1 1 2.0 V1UnitB,V1UnitS
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitB, V1UnitI, V1UnitS | BLR | blr x11 // BLR <Xn> \\ Branch and link, register \\ 2 1 1 2.0 V1UnitB,V1UnitS
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | BR | br x17 // BR <Xn> \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | BRK | brk #0x8415 // BRK #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKA_PPzP | brka p7.b, p7/z, p5.b // BRKA <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKAS_PPzP | brkas p6.b, p5/z, p0.b // BRKAS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKB_PPzP | brkb p5.b, p0/z, p1.b // BRKB <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKBS_PPzP | brkbs p6.b, p1/z, p4.b // BRKBS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKN_PPzP | brkn p7.b, p0/z, p6.b, p7.b // BRKN <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKNS_PPzP | brkns p3.b, p1/z, p7.b, p3.b // BRKNS <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKPA_PPzPP | brkpa p3.b, p5/z, p0.b, p1.b // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKPAS_PPzPP | brkpas p2.b, p5/z, p1.b, p3.b // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKPB_PPzPP | brkpb p1.b, p0/z, p7.b, p6.b // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKPBS_PPzPP | brkpbs p7.b, p1/z, p6.b, p1.b // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BSLv16i8 | bsl v27.16b, v13.16b, v21.16b // BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBNZW | cbnz w21, test // CBNZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBNZX | cbnz x26, test // CBNZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBZW | cbz w6, test // CBZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBZX | cbz x4, test // CBZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNWi | ccmn w8, #14, #3, hs // CCMN <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNXi | ccmn x23, #17, #0, gt // CCMN <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNWr | ccmn w17, w18, #12, hs // CCMN <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNXr | ccmn x19, x29, #12, lo // CCMN <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPWi | ccmp w24, #2, #5, hs // CCMP <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPXi | ccmp x12, #8, #2, lo // CCMP <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPWr | ccmp w2, w9, #3, lt // CCMP <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPXr | ccmp x11, x10, #13, ls // CCMP <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCWr | cinc w23, w5, lt // CINC <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | cinc x2, x1, pl // CINC <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | cinv w9, w12, ge // CINV <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | cinv x9, x30, mi // CINV <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_VPZ_B | clasta b11, p4, b11, z21.b // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_RPZ_B | clasta w8, p0, w8, z6.b // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_ZPZ_S | clasta z25.s, p1, z25.s, z14.s // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_VPZ_D | clastb d6, p7, d6, z31.d // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_RPZ_B | clastb w28, p6, w28, z12.b // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_ZPZ_H | clastb z27.h, p6, z27.h, z22.h // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | CLREX | clrex // CLREX \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | CLREX | clrex #12 // CLREX #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CLSv8i8 | cls v5.8b, v22.8b // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLSWr | cls w25, w0 // CLS <Wd>, <Wn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLSXr | cls x22, x6 // CLS <Xd>, <Xn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CLS_ZPmZ_D | cls z28.d, p3/m, z2.d // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CLZv8i16 | clz v24.8h, v30.8h // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLZWr | clz w26, w27 // CLZ <Wd>, <Wn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLZXr | clz x4, x0 // CLZ <Xd>, <Xn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CLZ_ZPmZ_S | clz z3.s, p3/m, z18.s // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv1i64 | cmeq d26, d5, d25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv8i16 | cmeq v9.8h, v16.8h, v24.8h // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv1i64rz | cmeq d7, d26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv4i16rz | cmeq v14.4h, v18.4h, #0 // CMEQ <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv1i64 | cmge d26, d21, d28 // CMGE <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv8i16 | cmge v22.8h, v16.8h, v3.8h // CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv1i64rz | cmge d30, d12, #0 // CMGE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv16i8rz | cmge v22.16b, v30.16b, #0 // CMGE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv1i64 | cmgt d23, d25, d12 // CMGT <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv2i64 | cmgt v3.2d, v29.2d, v11.2d // CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv1i64rz | cmgt d28, d14, #0 // CMGT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv2i32rz | cmgt v22.2s, v10.2s, #0 // CMGT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHIv1i64 | cmhi d29, d16, d5 // CMHI <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHIv4i16 | cmhi v28.4h, v25.4h, v21.4h // CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHSv1i64 | cmhs d5, d3, d12 // CMHS <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHSv8i8 | cmhs v6.8b, v31.8b, v12.8b // CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLEv1i64rz | cmle d14, d21, #0 // CMLE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLEv2i32rz | cmle v21.2s, v19.2s, #0 // CMLE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLTv1i64rz | cmlt d21, d24, #0 // CMLT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLTv4i16rz | cmlt v26.4h, v12.4h, #0 // CMLT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrx | cmn wsp, w7 // CMN <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrx | cmn wsp, w8, sxtb // CMN <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w3, uxtb #3 // CMN <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrx | cmn wsp, w7, lsl #3 // CMN <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x2, x28 // CMN <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrx | cmn x3, w0, uxtb // CMN <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrx | cmn x0, w4, uxtb #3 // CMN <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x14, x26, lsl #2 // CMN <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWri | cmn wsp, #613 // CMN <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWri | cmn wsp, #2991, lsl #12 // CMN <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXri | cmn x23, #3803 // CMN <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXri | cmn x29, #3786, lsl #12 // CMN <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | cmn w12, w0 // CMN <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | cmn w19, w27, lsl #1 // CMN <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | cmn w2, w11, lsl #29 // CMN <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | cmn w0, w0, asr #30 // CMN <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x23, x28 // CMN <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x6, x1, lsl #2 // CMN <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | cmn x28, x30, lsl #26 // CMN <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | cmn x25, x15, lsr #49 // CMN <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | cmp wsp, w26 // CMP <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | cmp wsp, w13, sxth // CMP <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | cmp wsp, w12, sxth #3 // CMP <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | cmp wsp, w30, lsl #4 // CMP <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x22, x18 // CMP <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrx | cmp x16, w27, uxtb // CMP <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrx | cmp x16, w7, uxtb #4 // CMP <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x21, x24, lsl #4 // CMP <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWri | cmp wsp, #2342 // CMP <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWri | cmp wsp, #3664, lsl #12 // CMP <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXri | cmp x5, #1482 // CMP <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXri | cmp x4, #3684, lsl #12 // CMP <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | cmp w14, w0, lsl #4 // CMP <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | cmp w0, w23, lsl #29 // CMP <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | cmp w2, w28, lsr #20 // CMP <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x27, x10, lsl #1 // CMP <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | cmp x18, x12, lsl #14 // CMP <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | cmp x6, x7, lsr #0 // CMP <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZI_H | cmpeq p2.h, p0/z, z26.h, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZI_B | cmpge p1.b, p4/z, z28.b, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZI_B | cmpgt p1.b, p0/z, z13.b, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZI_D | cmphi p1.d, p3/z, z23.d, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZI_D | cmphs p7.d, p5/z, z23.d, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLE_PPzZI_B | cmple p5.b, p2/z, z9.b, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLO_PPzZI_S | cmplo p3.s, p5/z, z18.s, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLS_PPzZI_D | cmpls p6.d, p6/z, z31.d, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLT_PPzZI_H | cmplt p0.h, p6/z, z29.h, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_PPzZI_S | cmpne p5.s, p4/z, z18.s, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZZ_S | cmpeq p6.s, p5/z, z2.s, z9.s // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZZ_S | cmpge p7.s, p4/z, z15.s, z15.s // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_H | cmpgt p2.h, p4/z, z26.h, z11.h // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_S | cmphi p0.s, p4/z, z8.s, z4.s // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZZ_D | cmphs p1.d, p6/z, z26.d, z15.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_PPzZZ_B | cmpne p4.b, p3/z, z21.b, z16.b // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZZ_D | cmpeq p2.d, p3/z, z13.d, z18.d // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_WIDE_PPzZZ_B | cmpge p2.b, p3/z, z3.b, z16.d // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_WIDE_PPzZZ_H | cmpgt p2.h, p2/z, z28.h, z30.d // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_WIDE_PPzZZ_H | cmphi p0.h, p5/z, z30.h, z16.d // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_WIDE_PPzZZ_H | cmphs p7.h, p2/z, z1.h, z26.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLE_WIDE_PPzZZ_B | cmple p7.b, p7/z, z3.b, z13.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_D | cmphi p6.d, p2/z, z16.d, z16.d // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLS_WIDE_PPzZZ_H | cmpls p3.h, p2/z, z12.h, z26.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_D | cmpgt p0.d, p4/z, z26.d, z29.d // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_WIDE_PPzZZ_S | cmpne p0.s, p4/z, z30.s, z8.d // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZZ_D | cmpge p1.d, p3/z, z26.d, z2.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_B | cmphi p7.b, p0/z, z25.b, z4.b // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZZ_D | cmphs p4.d, p4/z, z14.d, z2.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_S | cmpgt p2.s, p2/z, z21.s, z31.s // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMTSTv1i64 | cmtst d10, d6, d5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMTSTv2i64 | cmtst v13.2d, v13.2d, v13.2d // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGWr | cneg w3, w17, hi // CNEG <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGXr | cneg x26, x8, lo // CNEG <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CNOT_ZPmZ_S | cnot z7.s, p7/m, z8.s // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CNTv16i8 | cnt v12.16b, v14.16b // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CNT_ZPmZ_H | cnt z26.h, p0/m, z27.h // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x18 // CNTB <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x9, vl128 // CNTB <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x28, vl8, mul #13 // CNTB <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTD_XPiI | cntd x20 // CNTD <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTD_XPiI | cntd x27, vl7 // CNTD <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTD_XPiI | cntd x8, vl7, mul #2 // CNTD <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTH_XPiI | cnth x27 // CNTH <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTH_XPiI | cnth x0, vl1 // CNTH <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTH_XPiI | cnth x16, vl3, mul #6 // CNTH <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x22 // CNTW <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x23, vl3 // CNTW <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x6, vl16, mul #11 // CNTW <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTP_XPP_S | cntp x22, p1, p2.s // CNTP <Xd>, <Pg>, <Pn>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | COMPACT_ZPZ_S | compact z17.s, p1, z18.s // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmV_B | mov z13.b, p0/m, b6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_B | mov z3.b, p6/m, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_S | mov z11.s, p5/m, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_H | mov z0.h, p0/m, #-11 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_B | mov z5.b, p1/z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_H | mov z12.h, p1/z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z25.d, p3/z, #-20736 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_H | mov z24.h, p0/m, w19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_S | mov z23.s, p2/m, wsp // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Brr | crc32b w27, w12, w15 // CRC32B <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Hrr | crc32h w3, w15, w21 // CRC32H <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Wrr | crc32w w9, w18, w24 // CRC32W <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Xrr | crc32x w19, w6, x25 // CRC32X <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CBrr | crc32cb w25, w28, w30 // CRC32CB <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CHrr | crc32ch w25, w26, w16 // CRC32CH <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CWrr | crc32cw w27, w12, w23 // CRC32CW <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CXrr | crc32cx w21, w28, x5 // CRC32CX <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | csdb // CSDB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSELWr | csel w25, w16, w30, ls // CSEL <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSELXr | csel x28, x1, x2, pl // CSEL <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCWr | cset w6, ne // CSET <Wd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | cset x11, lt // CSET <Xd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | csetm w3, hi // CSETM <Wd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | csetm x6, ne // CSETM <Xd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCWr | csinc w9, w3, w14, lt // CSINC <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | csinc x20, x11, x23, ge // CSINC <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | csinv w1, w4, w3, hs // CSINV <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | csinv x27, x21, x15, ne // CSINV <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGWr | csneg w5, w13, w4, hi // CSNEG <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGXr | cneg x8, x29, ls // CSNEG <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CTERMEQ_XX | ctermeq x4, x11 // CTERMEQ <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CTERMNE_XX | ctermne x0, x16 // CTERMNE <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | dc csw, x16 // DC <dc_op>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS1 | dcps1 // DCPS1 \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS1 | dcps1 #0x1127 // DCPS1 #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS2 | dcps2 // DCPS2 \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS2 | dcps2 #0x6884 // DCPS2 #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS3 | dcps3 // DCPS3 \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS3 | dcps3 #0xb8e2 // DCPS3 #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECB_XPiI | decb x22 // DECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECB_XPiI | decb x5, vl256 // DECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECB_XPiI | decb x21, vl256, mul #7 // DECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECD_XPiI | decd x11 // DECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECD_XPiI | decd x19 // DECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECD_XPiI | decd x24, vl2, mul #10 // DECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECH_XPiI | dech x16 // DECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECH_XPiI | dech x20, mul4 // DECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECH_XPiI | dech x0, mul3, mul #15 // DECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x27 // DECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x18, vl32 // DECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x29, vl6, mul #3 // DECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECD_ZPiI | decd z19.d // DECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECD_ZPiI | decd z22.d, mul3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECD_ZPiI | decd z1.d, vl128, mul #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECH_ZPiI | dech z23.h // DECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECH_ZPiI | dech z29.h, vl5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECH_ZPiI | dech z28.h, vl64, mul #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECW_ZPiI | decw z8.s // DECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECW_ZPiI | decw z4.s, vl64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | DECW_ZPiI | decw z27.s, vl4, mul #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECP_XP_B | decp x6, p6.b // DECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01 | DECP_ZP_H | decp z22.h, p1.h // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DMB | dmb sy // DMB <option> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DMB | dmb nshst // DMB #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | DRPS | drps // DRPS \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi8 | mov b15, v25.b[12] // DUP B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi16 | mov h2, v31.h[5] // DUP H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi32 | mov s10, v2.s[1] // DUP S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi64 | mov d24, v7.d[1] // DUP D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv8i8lane | dup v25.8b, v21.b[4] // DUP <Vd>.<Tb>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv8i16lane | dup v28.8h, v29.h[1] // DUP <Vd>.<Th>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv4i32lane | dup v24.4s, v9.s[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv2i64lane | dup v20.2d, v3.d[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUPv4i32gpr | dup v19.4s, w27 // DUP <Vd>.<T>, <R><n> \\ ASIMD duplicate, gen reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z30.b, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z15.h, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_D | mov z22.d, #-14 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_B | mov z2.b, z26.b[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_H | mov z23.h, z22.h[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z29.s, z30.s[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_D | mov z4.d, d7 // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_D | mov z25.d, x28 // DUP <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_S | mov z18.s, wsp // DUP <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z18.b, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z12.h, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z16.s, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z16.d, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONWrs | eon w29, w4, w19 // EON <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONWrs | eon w14, w24, w28, asr #14 // EON <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONXrs | eon x19, x12, x2 // EON <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONXrs | eon x23, x23, x23, asr #41 // EON <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z7.b, z7.b, #0x8f // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z3.h, z3.h, #0xff9f // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z2.s, z2.s, #0xfffffffd // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z24.d, z24.d, #0xfffffffffffffffb // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWri | eor wsp, w4, #0xe00 // EOR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXri | eor x27, x25, #0x1e00 // EOR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z19.b, z19.b, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z18.h, z18.h, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z10.s, z10.s, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z29.d, z29.d, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | EOR_PPzPP | eor p6.b, p7/z, p3.b, p5.b // EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWrs | eor w8, w27, w2 // EOR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWrs | eor w8, w7, w29, asr #30 // EOR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXrs | eor x22, x16, x6 // EOR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXrs | eor x0, x23, x30, lsl #11 // EOR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EORv16i8 | eor v8.16b, v10.16b, v19.16b // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZPmZ_H | eor z8.h, p3/m, z8.h, z14.h // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZZZ | eor z30.d, z26.d, z20.d // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | EORS_PPzPP | eors p1.b, p0/z, p3.b, p1.b // EORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | EORV_VPZ_H | eorv h17, p1, z15.h // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 4 12 12 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | ERET | eret // ERET \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | esb // ESB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EXTv8i8 | ext v12.8b, v22.8b, v31.8b, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EXTv16i8 | ext v17.16b, v18.16b, v8.16b, #10 // EXT <Vd>.16B, <Vn>.16B, <Vm>.16B, #<index16> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRWrri | ror w19, w20, #16 // EXTR <Wd>, <Wn>, <Wn>, #<lsbs> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitI[2], V1UnitM | EXTRWrri | extr w27, w4, w5, #23 // EXTR <Wd>, <Wn>, <Wm>, #<lsbs> \\ Bitfield extract, two regs \\ 2 3 3 2.00 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRXrri | ror x25, x22, #62 // EXTR <Xd>, <Xn>, <Xn>, #<lsbd> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitI[2], V1UnitM | EXTRXrri | extr x0, x12, x13, #17 // EXTR <Xd>, <Xn>, <Xm>, #<lsbd> \\ Bitfield extract, two regs \\ 2 3 3 2.00 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABD16 | fabd h27, h20, h17 // FABD <Hd>, <Hn>, <Hm> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABD32 | fabd s16, s29, s6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABDv8f16 | fabd v13.8h, v28.8h, v12.8h // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABDv4f32 | fabd v12.4s, v4.4s, v31.4s // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FABD_ZPmZ_H | fabd z11.h, p6/m, z11.h, z5.h // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSHr | fabs h25, h7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSSr | fabs s17, s12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSDr | fabs d30, d8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSv4f32 | fabs v16.4s, v31.4s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSv2f32 | fabs v17.2s, v28.2s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FABS_ZPmZ_S | fabs z26.s, p7/m, z24.s // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGE_PPzZZ_H | facge p0.h, p5/z, z15.h, z18.h // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGT_PPzZZ_S | facgt p7.s, p7/z, z10.s, z4.s // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGE16 | facge h24, h26, h29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGE64 | facge d25, d24, d7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGEv4f16 | facge v25.4h, v16.4h, v11.4h // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGEv2f32 | facge v19.2s, v24.2s, v5.2s // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGT16 | facgt h0, h4, h10 // FACGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGT32 | facgt s29, s3, s2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGTv8f16 | facgt v22.8h, v14.8h, v31.8h // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGTv4f32 | facgt v22.4s, v8.4s, v2.4s // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGE_PPzZZ_H | facge p7.h, p5/z, z27.h, z22.h // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGT_PPzZZ_H | facgt p5.h, p5/z, z16.h, z31.h // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZPmI_H | fadd z4.h, p7/m, z4.h, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDHrr | fadd h23, h27, h22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDSrr | fadd s1, s23, s27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDDrr | fadd d16, d15, d21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDv2f64 | fadd v7.2d, v30.2d, v20.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDv2f64 | fadd v16.2d, v13.2d, v11.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZPmZ_H | fadd z26.h, p4/m, z26.h, z1.h // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZZZ_S | fadd z23.s, z7.s, z16.s // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 18 | 19 | 19 | 0.06 | V1UnitV[18], V1UnitV0[18], V1UnitV01[18], V1UnitV02[18] | FADDA_VPZ_H | fadda h8, p3, h8, z28.h // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 18 19 19 0.06 V1UnitV[18], V1UnitV0[18], V1UnitV01[18], V1UnitV02[18]
+# CHECK-NEXT: 10 | 11 | 11 | 0.10 | V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10] | FADDA_VPZ_S | fadda s11, p6, s11, z1.s // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 10 11 11 0.10 V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10]
+# CHECK-NEXT: 3 | 8 | 8 | 0.67 | V1UnitV[3], V1UnitV01[3] | FADDA_VPZ_D | fadda d27, p4, d27, z27.d // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 3 8 8 0.67 V1UnitV01[3]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2i16p | faddp h10, v19.2h // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2i64p | faddp d11, v28.2d // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2f64 | faddp v16.2d, v11.2d, v5.2d // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv4f32 | faddp v16.4s, v11.4s, v18.4s // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 6 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FADDV_VPZ_H | faddv h21, p2, z3.h // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 11 | 11 | 0.40 | V1UnitV[6], V1UnitV01[5] | FADDV_VPZ_S | faddv s16, p2, z25.s // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+# CHECK-NEXT: 5 | 9 | 9 | 0.50 | V1UnitV[5], V1UnitV01[4] | FADDV_VPZ_D | faddv d18, p4, z7.d // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FCADD_ZPmZ_H | fcadd z29.h, p2/m, z29.h, z15.h, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPHrr | fccmp h31, h3, #11, hs // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPSrr | fccmp s5, s6, #0, lo // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPDrr | fccmp d17, d15, #0, ne // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEHrr | fccmpe h6, h1, #12, ne // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPESrr | fccmpe s16, s13, #10, vs // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEDrr | fccmpe d17, d14, #15, ls // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMEQ_PPzZZ_D | fcmeq p7.d, p1/z, z23.d, z21.d // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZZ_H | fcmge p6.h, p1/z, z19.h, z10.h // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZZ_S | fcmgt p5.s, p2/z, z29.s, z5.s // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMNE_PPzZZ_D | fcmne p5.d, p0/z, z22.d, z15.d // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMUO_PPzZZ_D | fcmuo p0.d, p2/z, z15.d, z23.d // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMEQ_PPzZ0_D | fcmeq p4.d, p5/z, z19.d, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZ0_D | fcmge p0.d, p5/z, z10.d, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZ0_D | fcmgt p6.d, p1/z, z8.d, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMLE_PPzZ0_D | fcmle p2.d, p4/z, z26.d, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMLT_PPzZ0_D | fcmlt p5.d, p5/z, z23.d, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMNE_PPzZ0_H | fcmne p2.h, p3/z, z7.h, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQ16 | fcmeq h30, h6, h1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQ32 | fcmeq s17, s0, s21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2f32 | fcmeq v19.2s, v31.2s, v19.2s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv4f32 | fcmeq v12.4s, v11.4s, v26.4s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv1i16rz | fcmeq h19, h23, #0.0 // FCMEQ <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv1i32rz | fcmeq s25, s18, #0.0 // FCMEQ <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2i32rz | fcmeq v8.2s, v16.2s, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2i64rz | fcmeq v18.2d, v17.2d, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGE16 | fcmge h1, h16, h12 // FCMGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGE64 | fcmge d29, d9, d3 // FCMGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv8f16 | fcmge v20.8h, v19.8h, v22.8h // FCMGE <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv2f64 | fcmge v17.2d, v11.2d, v13.2d // FCMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv1i16rz | fcmge h10, h23, #0.0 // FCMGE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv1i64rz | fcmge d5, d17, #0.0 // FCMGE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv4i16rz | fcmge v18.4h, v27.4h, #0.0 // FCMGE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv2i32rz | fcmge v17.2s, v11.2s, #0.0 // FCMGE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGT16 | fcmgt h4, h5, h0 // FCMGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGT32 | fcmgt s13, s20, s3 // FCMGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv8f16 | fcmgt v24.8h, v24.8h, v28.8h // FCMGT <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv4f32 | fcmgt v19.4s, v20.4s, v13.4s // FCMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv1i16rz | fcmgt h0, h18, #0.0 // FCMGT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv1i64rz | fcmgt d30, d23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv8i16rz | fcmgt v0.8h, v11.8h, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv2i64rz | fcmgt v19.2d, v31.2d, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZZZI_H | fcmla z20.h, z12.h, z4.h[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZZZI_S | fcmla z1.s, z27.s, z6.s[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZPmZZ_S | fcmla z25.s, p3/m, z13.s, z23.s, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZZ_S | fcmge p5.s, p3/z, z12.s, z28.s // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv1i16rz | fcmle h18, h28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv1i64rz | fcmle d18, d16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv8i16rz | fcmle v16.8h, v11.8h, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv4i32rz | fcmle v22.4s, v30.4s, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZZ_S | fcmgt p1.s, p1/z, z24.s, z13.s // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv1i16rz | fcmlt h23, h7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv1i64rz | fcmlt d22, d28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv4i16rz | fcmlt v8.4h, v2.4h, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv2i64rz | fcmlt v7.2d, v16.2d, #0.0 // FCMLT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHrr | fcmp h5, h21 // FCMP <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHri | fcmp h5, #0.0 // FCMP <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSrr | fcmp s7, s0 // FCMP <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSri | fcmp s28, #0.0 // FCMP <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDrr | fcmp d1, d27 // FCMP <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDri | fcmp d16, #0.0 // FCMP <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHrr | fcmpe h22, h21 // FCMPE <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHri | fcmpe h13, #0.0 // FCMPE <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESrr | fcmpe s11, s29 // FCMPE <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESri | fcmpe s15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDrr | fcmpe d27, d22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDri | fcmpe d9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCPY_ZPmI_H | fmov z2.h, p7/m, #0.50000000 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELHrrr | fcsel h26, h2, h11, hs // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELSrrr | fcsel s5, s1, s4, vc // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELDrrr | fcsel d14, d0, d19, eq // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTSHr | fcvt s13, h13 // FCVT <Sd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTDHr | fcvt d10, h6 // FCVT <Dd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTHSr | fcvt h1, s1 // FCVT <Hd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTDSr | fcvt d9, s23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTHDr | fcvt h17, d16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTSDr | fcvt s31, d27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVT_ZPmZ_HtoS | fcvt z0.s, p1/m, z4.h // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_HtoD | fcvt z6.d, p0/m, z17.h // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVT_ZPmZ_StoH | fcvt z7.h, p7/m, z5.s // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_StoD | fcvt z11.d, p2/m, z18.s // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_DtoH | fcvt z26.h, p0/m, z30.d // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_DtoS | fcvt z13.s, p2/m, z3.d // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWHr | fcvtas w23, h3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXHr | fcvtas x14, h29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWSr | fcvtas w0, s13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXSr | fcvtas x23, s15 // FCVTAS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWDr | fcvtas w1, d31 // FCVTAS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXDr | fcvtas x2, d3 // FCVTAS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv1f16 | fcvtas h27, h24 // FCVTAS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv1i32 | fcvtas s16, s0 // FCVTAS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1i64 | fcvtas d14, d7 // FCVTAS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv4f16 | fcvtas v5.4h, v16.4h // FCVTAS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv8f16 | fcvtas v13.8h, v30.8h // FCVTAS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv2f32 | fcvtas v12.2s, v1.2s // FCVTAS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv4f32 | fcvtas v9.4s, v31.4s // FCVTAS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv2f64 | fcvtas v2.2d, v22.2d // FCVTAS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWHr | fcvtau w13, h27 // FCVTAU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXHr | fcvtau x8, h12 // FCVTAU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWSr | fcvtau w20, s10 // FCVTAU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXSr | fcvtau x27, s22 // FCVTAU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWDr | fcvtau w6, d26 // FCVTAU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXDr | fcvtau x16, d13 // FCVTAU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv1f16 | fcvtau h6, h29 // FCVTAU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv1i32 | fcvtau s23, s7 // FCVTAU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1i64 | fcvtau d1, d26 // FCVTAU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv4f16 | fcvtau v12.4h, v13.4h // FCVTAU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv8f16 | fcvtau v21.8h, v0.8h // FCVTAU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv2f32 | fcvtau v31.2s, v6.2s // FCVTAU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv4f32 | fcvtau v29.4s, v26.4s // FCVTAU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv2f64 | fcvtau v9.2d, v7.2d // FCVTAU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTLv4i16 | fcvtl v30.4s, v4.4h // FCVTL <Vd>.4S, <Vn>.4H \\ ASIMD FP convert, long (F16 to F32) \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTLv2i32 | fcvtl v28.2d, v13.2s // FCVTL <Vd>.2D, <Vn>.2S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTLv8i16 | fcvtl2 v14.4s, v29.8h // FCVTL2 <Vd>.4S, <Vn>.8H \\ ASIMD FP convert, long (F16 to F32) \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTLv4i32 | fcvtl2 v0.2d, v9.4s // FCVTL2 <Vd>.2D, <Vn>.4S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWHr | fcvtms w15, h1 // FCVTMS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXHr | fcvtms x5, h2 // FCVTMS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWSr | fcvtms w1, s16 // FCVTMS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXSr | fcvtms x27, s22 // FCVTMS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWDr | fcvtms w18, d21 // FCVTMS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXDr | fcvtms x6, d26 // FCVTMS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv1f16 | fcvtms h19, h29 // FCVTMS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv1i32 | fcvtms s30, s14 // FCVTMS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1i64 | fcvtms d8, d20 // FCVTMS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv4f16 | fcvtms v27.4h, v7.4h // FCVTMS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv8f16 | fcvtms v26.8h, v11.8h // FCVTMS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv2f32 | fcvtms v13.2s, v2.2s // FCVTMS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv4f32 | fcvtms v18.4s, v21.4s // FCVTMS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv2f64 | fcvtms v15.2d, v16.2d // FCVTMS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWHr | fcvtmu w20, h6 // FCVTMU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXHr | fcvtmu x7, h18 // FCVTMU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWSr | fcvtmu w24, s19 // FCVTMU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXSr | fcvtmu x7, s15 // FCVTMU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWDr | fcvtmu w16, d16 // FCVTMU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXDr | fcvtmu x1, d18 // FCVTMU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv1f16 | fcvtmu h20, h13 // FCVTMU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv1i32 | fcvtmu s28, s25 // FCVTMU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1i64 | fcvtmu d3, d27 // FCVTMU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv4f16 | fcvtmu v18.4h, v2.4h // FCVTMU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv8f16 | fcvtmu v10.8h, v11.8h // FCVTMU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv2f32 | fcvtmu v27.2s, v14.2s // FCVTMU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv4f32 | fcvtmu v31.4s, v4.4s // FCVTMU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv2f64 | fcvtmu v6.2d, v26.2d // FCVTMU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNv4i16 | fcvtn v4.4h, v22.4s // FCVTN <Vd>.4H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNv2i32 | fcvtn v14.2s, v2.2d // FCVTN <Vd>.2S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNv8i16 | fcvtn2 v0.8h, v30.4s // FCVTN2 <Vd>.8H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNv4i32 | fcvtn2 v21.4s, v13.2d // FCVTN2 <Vd>.4S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWHr | fcvtns w19, h15 // FCVTNS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXHr | fcvtns x20, h0 // FCVTNS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWSr | fcvtns w10, s5 // FCVTNS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXSr | fcvtns x14, s12 // FCVTNS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWDr | fcvtns w30, d2 // FCVTNS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXDr | fcvtns x0, d12 // FCVTNS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv1f16 | fcvtns h16, h25 // FCVTNS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv1i32 | fcvtns s23, s19 // FCVTNS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1i64 | fcvtns d30, d1 // FCVTNS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv4f16 | fcvtns v28.4h, v19.4h // FCVTNS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv8f16 | fcvtns v19.8h, v19.8h // FCVTNS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv2f32 | fcvtns v20.2s, v4.2s // FCVTNS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv4f32 | fcvtns v28.4s, v29.4s // FCVTNS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv2f64 | fcvtns v21.2d, v31.2d // FCVTNS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWHr | fcvtnu w12, h3 // FCVTNU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXHr | fcvtnu x23, h27 // FCVTNU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWSr | fcvtnu w4, s23 // FCVTNU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXSr | fcvtnu x5, s28 // FCVTNU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWDr | fcvtnu w4, d11 // FCVTNU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXDr | fcvtnu x12, d8 // FCVTNU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv1f16 | fcvtnu h24, h22 // FCVTNU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv1i32 | fcvtnu s29, s22 // FCVTNU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1i64 | fcvtnu d18, d15 // FCVTNU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv4f16 | fcvtnu v5.4h, v12.4h // FCVTNU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv8f16 | fcvtnu v26.8h, v20.8h // FCVTNU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv2f32 | fcvtnu v15.2s, v1.2s // FCVTNU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv4f32 | fcvtnu v7.4s, v16.4s // FCVTNU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv2f64 | fcvtnu v13.2d, v8.2d // FCVTNU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWHr | fcvtps w27, h14 // FCVTPS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXHr | fcvtps x26, h20 // FCVTPS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWSr | fcvtps w5, s27 // FCVTPS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXSr | fcvtps x29, s6 // FCVTPS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWDr | fcvtps w23, d25 // FCVTPS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXDr | fcvtps x10, d16 // FCVTPS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv1f16 | fcvtps h31, h22 // FCVTPS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv1i32 | fcvtps s3, s3 // FCVTPS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1i64 | fcvtps d10, d26 // FCVTPS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv4f16 | fcvtps v13.4h, v26.4h // FCVTPS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv8f16 | fcvtps v26.8h, v10.8h // FCVTPS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv2f32 | fcvtps v18.2s, v8.2s // FCVTPS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv4f32 | fcvtps v12.4s, v18.4s // FCVTPS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv2f64 | fcvtps v3.2d, v2.2d // FCVTPS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWHr | fcvtpu w25, h22 // FCVTPU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXHr | fcvtpu x4, h24 // FCVTPU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWSr | fcvtpu w13, s0 // FCVTPU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXSr | fcvtpu x0, s17 // FCVTPU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWDr | fcvtpu w16, d25 // FCVTPU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXDr | fcvtpu x15, d12 // FCVTPU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv1f16 | fcvtpu h1, h29 // FCVTPU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv1i32 | fcvtpu s21, s30 // FCVTPU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1i64 | fcvtpu d16, d26 // FCVTPU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv4f16 | fcvtpu v2.4h, v25.4h // FCVTPU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv8f16 | fcvtpu v24.8h, v26.8h // FCVTPU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv2f32 | fcvtpu v6.2s, v23.2s // FCVTPU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv4f32 | fcvtpu v10.4s, v6.4s // FCVTPU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv2f64 | fcvtpu v7.2d, v23.2d // FCVTPU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv1i64 | fcvtxn s29, d4 // FCVTXN <Vb><d>, <Va><n> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv2f32 | fcvtxn v25.2s, v15.2d // FCVTXN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv4f32 | fcvtxn2 v21.4s, v6.2d // FCVTXN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSSWHri | fcvtzs w28, h26, #26 // FCVTZS <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSSXHri | fcvtzs x22, h17, #58 // FCVTZS <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSSWSri | fcvtzs w17, s23, #22 // FCVTZS <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSSXSri | fcvtzs x15, s30, #2 // FCVTZS <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSSWDri | fcvtzs w13, d17, #17 // FCVTZS <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSSXDri | fcvtzs x14, d9, #24 // FCVTZS <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWHr | fcvtzs w15, h10 // FCVTZS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXHr | fcvtzs x4, h21 // FCVTZS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWSr | fcvtzs w1, s4 // FCVTZS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXSr | fcvtzs x27, s27 // FCVTZS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWDr | fcvtzs w24, d30 // FCVTZS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXDr | fcvtzs x18, d21 // FCVTZS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSh | fcvtzs h29, h23, #16 // FCVTZS H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSs | fcvtzs s23, s15, #2 // FCVTZS S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSd | fcvtzs d20, d26, #57 // FCVTZS D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4i16_shift | fcvtzs v20.4h, v24.4h, #11 // FCVTZS <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv8i16_shift | fcvtzs v18.8h, v10.8h, #7 // FCVTZS <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2i32_shift | fcvtzs v16.2s, v2.2s, #11 // FCVTZS <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4i32_shift | fcvtzs v22.4s, v18.4s, #5 // FCVTZS <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2i64_shift | fcvtzs v14.2d, v30.2d, #54 // FCVTZS <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv1f16 | fcvtzs h16, h27 // FCVTZS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv1i32 | fcvtzs s4, s5 // FCVTZS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1i64 | fcvtzs d4, d23 // FCVTZS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4f16 | fcvtzs v8.4h, v16.4h // FCVTZS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv8f16 | fcvtzs v2.8h, v16.8h // FCVTZS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2f32 | fcvtzs v27.2s, v28.2s // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4f32 | fcvtzs v29.4s, v18.4s // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2f64 | fcvtzs v13.2d, v31.2d // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoH | fcvtzs z1.h, p2/m, z6.h // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 4 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZS_ZPmZ_HtoS | fcvtzs z19.s, p4/m, z16.h // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_HtoD | fcvtzs z14.d, p0/m, z6.h // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZS_ZPmZ_StoS | fcvtzs z25.s, p5/m, z23.s // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_StoD | fcvtzs z3.d, p1/m, z31.s // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_DtoS | fcvtzs z28.s, p5/m, z23.d // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_DtoD | fcvtzs z22.d, p6/m, z29.d // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUSWHri | fcvtzu w12, h19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUSXHri | fcvtzu x17, h23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUSWSri | fcvtzu w16, s3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUSXSri | fcvtzu x27, s15, #8 // FCVTZU <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUSWDri | fcvtzu w21, d10, #23 // FCVTZU <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUSXDri | fcvtzu x26, d30, #27 // FCVTZU <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWHr | fcvtzu w26, h30 // FCVTZU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXHr | fcvtzu x9, h11 // FCVTZU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWSr | fcvtzu w20, s16 // FCVTZU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXSr | fcvtzu x7, s21 // FCVTZU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWDr | fcvtzu w25, d30 // FCVTZU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXDr | fcvtzu x13, d8 // FCVTZU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUh | fcvtzu h19, h8, #12 // FCVTZU H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUs | fcvtzu s25, s27, #10 // FCVTZU S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUd | fcvtzu d30, d16, #42 // FCVTZU D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4i16_shift | fcvtzu v19.4h, v26.4h, #9 // FCVTZU <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv8i16_shift | fcvtzu v27.8h, v6.8h, #11 // FCVTZU <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2i32_shift | fcvtzu v30.2s, v4.2s, #19 // FCVTZU <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4i32_shift | fcvtzu v31.4s, v6.4s, #22 // FCVTZU <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2i64_shift | fcvtzu v10.2d, v12.2d, #53 // FCVTZU <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv1f16 | fcvtzu h25, h30 // FCVTZU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv1i32 | fcvtzu s2, s19 // FCVTZU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1i64 | fcvtzu d4, d7 // FCVTZU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4f16 | fcvtzu v3.4h, v2.4h // FCVTZU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv8f16 | fcvtzu v30.8h, v25.8h // FCVTZU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2f32 | fcvtzu v25.2s, v25.2s // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4f32 | fcvtzu v21.4s, v2.4s // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2f64 | fcvtzu v23.2d, v15.2d // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoH | fcvtzu z15.h, p0/m, z8.h // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 4 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZU_ZPmZ_HtoS | fcvtzu z8.s, p5/m, z18.h // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_HtoD | fcvtzu z11.d, p4/m, z24.h // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZU_ZPmZ_StoS | fcvtzu z13.s, p7/m, z8.s // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_StoD | fcvtzu z20.d, p2/m, z13.s // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_DtoS | fcvtzu z31.s, p3/m, z20.d // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_DtoD | fcvtzu z4.d, p1/m, z25.d // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVHrr | fdiv h1, h26, h23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 0.29 V1UnitV[7], V1UnitV02[7]
+# CHECK-NEXT: 1 | 10 | 10 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVSrr | fdiv s31, s18, s12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
+# CHECK-NEXT: 1 | 15 | 15 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVDrr | fdiv d6, d3, d0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVv4f16 | fdiv v21.4h, v15.4h, v22.4h // FDIV <Vd>.4H, <Vn>.4H, <Vm>.4H \\ ASIMD FP divide, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 13 | 13 | 0.40 | V1UnitV[5], V1UnitV02[5] | FDIVv8f16 | fdiv v31.8h, v12.8h, v15.8h // FDIV <Vd>.8H, <Vn>.8H, <Vm>.8H \\ ASIMD FP divide, Q-form, F16 \\ 1 13 13 0.14 V1UnitV02[14]
+# CHECK-NEXT: 1 | 10 | 10 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVv2f32 | fdiv v15.2s, v23.2s, v2.2s // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+# CHECK-NEXT: 1 | 10 | 10 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVv4f32 | fdiv v7.4s, v27.4s, v22.4s // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.2 V1UnitV02[10]
+# CHECK-NEXT: 1 | 15 | 15 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVv2f64 | fdiv v31.2d, v25.2d, v8.2d // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.13 V1UnitV02[15]
+# CHECK-NEXT: 1 | 13 | 13 | 0.10 | V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10] | FDIV_ZPmZ_H | fdiv z21.h, p7/m, z21.h, z15.h // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | FDIV_ZPmZ_S | fdiv z17.s, p4/m, z17.s, z20.s // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.1 V1UnitV0[10]
+# CHECK-NEXT: 1 | 15 | 15 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | FDIV_ZPmZ_D | fdiv z13.d, p3/m, z13.d, z28.d // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[15]
+# CHECK-NEXT: 1 | 13 | 13 | 0.10 | V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10] | FDIVR_ZPmZ_H | fdivr z29.h, p4/m, z29.h, z1.h // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | FDIVR_ZPmZ_S | fdivr z13.s, p0/m, z13.s, z29.s // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.1 V1UnitV0[10]
+# CHECK-NEXT: 1 | 15 | 15 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | FDIVR_ZPmZ_D | fdivr z14.d, p3/m, z14.d, z31.d // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[15]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FDUP_ZI_S | fmov z19.s, #0.50000000 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FEXPA_ZZ_H | fexpa z6.h, z3.h // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAD_ZPmZZ_S | fmad z9.s, p5/m, z9.s, z7.s // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDHrrr | fmadd h27, h0, h6, h28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDSrrr | fmadd s13, s24, s15, s5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDDrrr | fmadd d19, d4, d2, d17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAX_ZPmI_D | fmax z25.d, p2/m, z25.d, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXHrr | fmax h8, h7, h11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXSrr | fmax s9, s21, s2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXDrr | fmax d4, d26, d26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXv4f32 | fmax v0.4s, v13.4s, v21.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXv4f32 | fmax v12.4s, v27.4s, v11.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAX_ZPmZ_S | fmax z16.s, p5/m, z16.s, z12.s // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAXNM_ZPmI_D | fmaxnm z25.d, p5/m, z25.d, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMHrr | fmaxnm h29, h13, h14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMSrr | fmaxnm s25, s20, s0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMDrr | fmaxnm d29, d25, d16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMv4f32 | fmaxnm v6.4s, v3.4s, v3.4s // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMv2f64 | fmaxnm v9.2d, v15.2d, v11.2d // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAXNM_ZPmZ_S | fmaxnm z6.s, p5/m, z6.s, z17.s // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv2i16p | fmaxnmp h25, v19.2h // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv2i64p | fmaxnmp d17, v29.2d // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv4f32 | fmaxnmp v31.4s, v4.4s, v2.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv4f32 | fmaxnmp v23.4s, v15.4s, v1.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXNMVv4i16v | fmaxnmv h0, v13.4h // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.33 | V1UnitV[3] | FMAXNMVv8i16v | fmaxnmv h12, v11.8h // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXNMVv4i32v | fmaxnmv s28, v31.4s // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 6 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMAXNMV_VPZ_H | fmaxnmv h9, p3, z2.h // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 11 | 11 | 0.40 | V1UnitV[6], V1UnitV01[5] | FMAXNMV_VPZ_S | fmaxnmv s26, p6, z0.s // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+# CHECK-NEXT: 5 | 9 | 9 | 0.50 | V1UnitV[5], V1UnitV01[4] | FMAXNMV_VPZ_D | fmaxnmv d7, p1, z29.d // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2i16p | fmaxp h15, v25.2h // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2i32p | fmaxp s6, v2.2s // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2f32 | fmaxp v21.2s, v17.2s, v13.2s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv4f32 | fmaxp v10.4s, v5.4s, v25.4s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXVv4i16v | fmaxv h23, v4.4h // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.33 | V1UnitV[3] | FMAXVv8i16v | fmaxv h25, v15.8h // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXVv4i32v | fmaxv s23, v2.4s // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 6 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMAXV_VPZ_H | fmaxv h12, p0, z22.h // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 11 | 11 | 0.40 | V1UnitV[6], V1UnitV01[5] | FMAXV_VPZ_S | fmaxv s24, p5, z12.s // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+# CHECK-NEXT: 5 | 9 | 9 | 0.50 | V1UnitV[5], V1UnitV01[4] | FMAXV_VPZ_D | fmaxv d1, p6, z25.d // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMIN_ZPmI_D | fmin z24.d, p4/m, z24.d, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINHrr | fmin h4, h13, h17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINSrr | fmin s1, s14, s22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINDrr | fmin d18, d19, d22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINv4f32 | fmin v6.4s, v25.4s, v27.4s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINv2f32 | fmin v12.2s, v30.2s, v25.2s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMIN_ZPmZ_H | fmin z11.h, p3/m, z11.h, z16.h // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMINNM_ZPmI_H | fminnm z19.h, p4/m, z19.h, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMHrr | fminnm h29, h23, h17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMSrr | fminnm s24, s14, s30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMDrr | fminnm d0, d26, d8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMv2f32 | fminnm v16.2s, v23.2s, v27.2s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMv4f32 | fminnm v23.4s, v19.4s, v22.4s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMINNM_ZPmZ_S | fminnm z24.s, p3/m, z24.s, z13.s // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2i16p | fminnmp h20, v14.2h // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2i64p | fminnmp d15, v8.2d // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2f64 | fminnmp v27.2d, v27.2d, v16.2d // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv4f32 | fminnmp v2.4s, v14.4s, v14.4s // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMINNMVv4i16v | fminnmv h19, v25.4h // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.33 | V1UnitV[3] | FMINNMVv8i16v | fminnmv h23, v17.8h // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMINNMVv4i32v | fminnmv s29, v17.4s // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 6 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMINNMV_VPZ_H | fminnmv h24, p3, z1.h // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 11 | 11 | 0.40 | V1UnitV[6], V1UnitV01[5] | FMINNMV_VPZ_S | fminnmv s30, p3, z9.s // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+# CHECK-NEXT: 5 | 9 | 9 | 0.50 | V1UnitV[5], V1UnitV01[4] | FMINNMV_VPZ_D | fminnmv d18, p5, z8.d // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2i16p | fminp h7, v10.2h // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2i32p | fminp s17, v7.2s // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv4f32 | fminp v25.4s, v2.4s, v15.4s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2f32 | fminp v14.2s, v28.2s, v15.2s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMINVv4i16v | fminv h3, v30.4h // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.33 | V1UnitV[3] | FMINVv8i16v | fminv h29, v12.8h // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 3 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2] | FMINVv4i32v | fminv s16, v19.4s // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 2 4 4 2.00 V1UnitV[2]
+# CHECK-NEXT: 6 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMINV_VPZ_H | fminv h15, p2, z25.h // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 6 13 13 0.33 V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 11 | 11 | 0.40 | V1UnitV[6], V1UnitV01[5] | FMINV_VPZ_S | fminv s4, p0, z6.s // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 6 11 11 0.40 V1UnitV[6], V1UnitV01[5]
+# CHECK-NEXT: 5 | 9 | 9 | 0.50 | V1UnitV[5], V1UnitV01[4] | FMINV_VPZ_D | fminv d20, p1, z5.d // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 5 9 9 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i16_indexed | fmla h23, h24, v15.h[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i32_indexed | fmla s9, s20, v28.s[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i64_indexed | fmla d12, d20, v7.d[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv8i16_indexed | fmla v29.8h, v15.8h, v10.h[4] // FMLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2i32_indexed | fmla v2.2s, v16.2s, v28.s[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv4i32_indexed | fmla v14.4s, v14.4s, v5.s[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2i64_indexed | fmla v10.2d, v14.2d, v21.d[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_H | fmla z2.h, z4.h, z7.h[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_S | fmla z22.s, z15.s, z1.s[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_D | fmla z1.d, z30.d, z11.d[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv4f32 | fmla v1.4s, v24.4s, v12.4s // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2f64 | fmla v30.2d, v16.2d, v6.2d // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZPmZZ_S | fmla z6.s, p1/m, z24.s, z24.s // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i16_indexed | fmls h8, h14, v7.h[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i32_indexed | fmls s20, s17, v5.s[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i64_indexed | fmls d11, d24, v29.d[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv8i16_indexed | fmls v30.8h, v18.8h, v4.h[6] // FMLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2i32_indexed | fmls v10.2s, v27.2s, v0.s[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv4i32_indexed | fmls v27.4s, v7.4s, v24.s[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2i64_indexed | fmls v10.2d, v22.2d, v29.d[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_H | fmls z3.h, z31.h, z0.h[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_S | fmls z30.s, z8.s, z0.s[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_D | fmls z10.d, z20.d, z0.d[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2f32 | fmls v6.2s, v3.2s, v12.2s // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv8f16 | fmls v6.8h, v15.8h, v23.8h // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZPmZZ_S | fmls z26.s, p5/m, z28.s, z26.s // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHWr | fmov w15, h31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHXr | fmov x21, h14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVWHr | fmov h6, w5 // FMOV <Hd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVWSr | fmov s22, w0 // FMOV <Sd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVSWr | fmov w23, s30 // FMOV <Wd>, <Sn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVXHr | fmov h16, x27 // FMOV <Hd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVXDr | fmov d22, x12 // FMOV <Dd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | FMOVXDHighr | fmov v7.d[1], x8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXr | fmov x26, d29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXHighr | fmov x4, v26.d[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCPY_ZPmI_S | fmov z2.s, p0/m, #0.50000000 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FDUP_ZI_S | fmov z14.s, #0.50000000 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVHr | fmov h18, h28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVSr | fmov s13, s23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVDr | fmov d27, d17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVHi | fmov h29, #0.50000000 // FMOV <Hd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVSi | fmov s22, #0.50000000 // FMOV <Sd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVDi | fmov d18, #0.50000000 // FMOV <Dd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f32_ns | fmov v12.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f32_ns | fmov v10.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f64_ns | fmov v0.2d, #0.50000000 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_D | mov z2.d, p2/m, #0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_S | mov z5.s, #0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMSB_ZPmZZ_S | fmsb z25.s, p5/m, z25.s, z29.s // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBHrrr | fmsub h25, h28, h12, h24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBSrrr | fmsub s31, s0, s23, s24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBDrrr | fmsub d12, d10, d20, d16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i16_indexed | fmul h18, h4, v7.h[3] // FMUL <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i32_indexed | fmul s17, s23, v30.s[2] // FMUL S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i64_indexed | fmul d27, d8, v10.d[1] // FMUL D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv4i16_indexed | fmul v10.4h, v2.4h, v7.h[5] // FMUL <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2i32_indexed | fmul v5.2s, v12.2s, v9.s[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv4i32_indexed | fmul v15.4s, v30.4s, v2.s[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2i64_indexed | fmul v11.2d, v31.2d, v24.d[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZPmI_H | fmul z17.h, p5/m, z17.h, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_H | fmul z27.h, z30.h, z0.h[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_S | fmul z6.s, z16.s, z1.s[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_D | fmul z4.d, z30.d, z2.d[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULHrr | fmul h28, h14, h3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULSrr | fmul s28, s16, s24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULDrr | fmul d19, d19, d0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2f64 | fmul v0.2d, v14.2d, v20.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2f64 | fmul v9.2d, v29.2d, v7.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZPmZ_D | fmul z22.d, p1/m, z22.d, z3.d // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZ_S | fmul z19.s, z14.s, z26.s // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i16_indexed | fmulx h18, h17, v7.h[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i32_indexed | fmulx s23, s3, v3.s[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i64_indexed | fmulx d3, d13, v30.d[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv4i16_indexed | fmulx v28.4h, v25.4h, v15.h[1] // FMULX <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2i32_indexed | fmulx v3.2s, v22.2s, v23.s[3] // FMULX <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv4i32_indexed | fmulx v5.4s, v28.4s, v15.s[3] // FMULX <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2i64_indexed | fmulx v22.2d, v18.2d, v25.d[1] // FMULX <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMULX16 | fmulx h20, h25, h0 // FMULX <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMULX64 | fmulx d18, d19, d22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2f64 | fmulx v22.2d, v18.2d, v4.2d // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2f32 | fmulx v16.2s, v4.2s, v27.2s // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMULX_ZPmZ_H | fmulx z7.h, p5/m, z7.h, z21.h // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGHr | fneg h2, h9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGSr | fneg s11, s19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGDr | fneg d5, d16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGv2f64 | fneg v26.2d, v2.2d // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGv2f32 | fneg v14.2s, v24.2s // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNEG_ZPmZ_S | fneg z16.s, p0/m, z25.s // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMAD_ZPmZZ_H | fnmad z6.h, p2/m, z14.h, z21.h // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDHrrr | fnmadd h3, h18, h31, h24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDSrrr | fnmadd s8, s18, s2, s14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDDrrr | fnmadd d19, d29, d28, d30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMLA_ZPmZZ_D | fnmla z15.d, p0/m, z8.d, z29.d // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMLS_ZPmZZ_D | fnmls z13.d, p0/m, z8.d, z12.d // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMSB_ZPmZZ_D | fnmsb z30.d, p7/m, z8.d, z9.d // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBHrrr | fnmsub h3, h29, h24, h17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBSrrr | fnmsub s29, s26, s17, s4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBDrrr | fnmsub d7, d13, d13, d4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULHrr | fnmul h3, h15, h7 // FNMUL <Hd>, <Hn>, <Hm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULSrr | fnmul s16, s11, s2 // FNMUL <Sd>, <Sn>, <Sm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULDrr | fnmul d12, d22, d14 // FNMUL <Dd>, <Dn>, <Dm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1f16 | frecpe h20, h8 // FRECPE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 4 4 2.00 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1i32 | frecpe s27, s7 // FRECPE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1i64 | frecpe d2, d1 // FRECPE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv4f16 | frecpe v28.4h, v27.4h // FRECPE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv8f16 | frecpe v9.8h, v6.8h // FRECPE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 2 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv2f32 | frecpe v25.2s, v28.2s // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv4f32 | frecpe v21.4s, v18.4s // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv2f64 | frecpe v10.2d, v26.2d // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FRECPE_ZZ_H | frecpe z14.h, z0.h // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 4 6 6 0.25 V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FRECPE_ZZ_S | frecpe z5.s, z16.s // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_D | frecpe z27.d, z11.d // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPS16 | frecps h29, h19, h8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPS64 | frecps d25, d17, d12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPSv8f16 | frecps v12.8h, v25.8h, v4.8h // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPSv2f64 | frecps v7.2d, v29.2d, v18.2d // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV01 | FRECPS_ZZZ_S | frecps z11.s, z31.s, z1.s // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPXv1f16 | frecpx h18, h11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPXv1i32 | frecpx s13, s30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPX_ZPmZ_S | frecpx z15.s, p4/m, z12.s // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_H | frintn z30.h, p3/m, z31.h // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_S | frintn z17.s, p4/m, z23.s // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_D | frintn z28.d, p1/m, z25.d // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_H | frinta z10.h, p6/m, z17.h // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_S | frinta z7.s, p4/m, z27.s // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_D | frinta z17.d, p4/m, z17.d // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_H | frintm z26.h, p7/m, z0.h // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_S | frintm z6.s, p0/m, z28.s // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_D | frintm z29.d, p4/m, z3.d // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_H | frintp z20.h, p4/m, z12.h // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_S | frintp z3.s, p7/m, z18.s // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_D | frintp z28.d, p7/m, z4.d // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_H | frintz z27.h, p2/m, z12.h // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_S | frintz z12.s, p6/m, z3.s // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_D | frintz z12.d, p2/m, z31.d // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_H | frinti z16.h, p4/m, z9.h // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_S | frinti z18.s, p6/m, z27.s // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_D | frinti z26.d, p2/m, z12.d // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_H | frintx z17.h, p0/m, z9.h // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_S | frintx z27.s, p7/m, z16.s // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_D | frintx z21.d, p4/m, z23.d // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAHr | frinta h22, h10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTASr | frinta s15, s7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTADr | frinta d30, d10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv4f16 | frinta v24.4h, v10.4h // FRINTA <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv8f16 | frinta v5.8h, v3.8h // FRINTA <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAv2f32 | frinta v23.2s, v22.2s // FRINTA <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv4f32 | frinta v28.4s, v28.4s // FRINTA <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAv2f64 | frinta v3.2d, v13.2d // FRINTA <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIHr | frinti h31, h14 // FRINTI <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTISr | frinti s23, s9 // FRINTI <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIDr | frinti d8, d12 // FRINTI <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv4f16 | frinti v6.4h, v10.4h // FRINTI <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv8f16 | frinti v22.8h, v7.8h // FRINTI <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIv2f32 | frinti v9.2s, v25.2s // FRINTI <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv4f32 | frinti v23.4s, v7.4s // FRINTI <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIv2f64 | frinti v28.2d, v5.2d // FRINTI <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMHr | frintm h0, h21 // FRINTM <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMSr | frintm s22, s10 // FRINTM <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMDr | frintm d5, d30 // FRINTM <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv4f16 | frintm v3.4h, v8.4h // FRINTM <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv8f16 | frintm v19.8h, v26.8h // FRINTM <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMv2f32 | frintm v15.2s, v8.2s // FRINTM <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv4f32 | frintm v20.4s, v26.4s // FRINTM <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMv2f64 | frintm v20.2d, v11.2d // FRINTM <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNHr | frintn h12, h3 // FRINTN <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNSr | frintn s27, s14 // FRINTN <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNDr | frintn d30, d17 // FRINTN <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv4f16 | frintn v27.4h, v4.4h // FRINTN <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv8f16 | frintn v17.8h, v19.8h // FRINTN <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNv2f32 | frintn v23.2s, v23.2s // FRINTN <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv4f32 | frintn v2.4s, v4.4s // FRINTN <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNv2f64 | frintn v24.2d, v12.2d // FRINTN <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPHr | frintp h17, h31 // FRINTP <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPSr | frintp s14, s10 // FRINTP <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPDr | frintp d25, d13 // FRINTP <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv4f16 | frintp v22.4h, v25.4h // FRINTP <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv8f16 | frintp v18.8h, v11.8h // FRINTP <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPv2f32 | frintp v31.2s, v5.2s // FRINTP <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv4f32 | frintp v0.4s, v24.4s // FRINTP <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPv2f64 | frintp v1.2d, v3.2d // FRINTP <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXHr | frintx h4, h5 // FRINTX <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXSr | frintx s10, s28 // FRINTX <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXDr | frintx d17, d19 // FRINTX <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv4f16 | frintx v24.4h, v25.4h // FRINTX <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv8f16 | frintx v1.8h, v27.8h // FRINTX <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXv2f32 | frintx v2.2s, v14.2s // FRINTX <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv4f32 | frintx v27.4s, v31.4s // FRINTX <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXv2f64 | frintx v24.2d, v20.2d // FRINTX <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZHr | frintz h10, h29 // FRINTZ <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZSr | frintz s11, s23 // FRINTZ <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZDr | frintz d6, d11 // FRINTZ <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv4f16 | frintz v13.4h, v5.4h // FRINTZ <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv8f16 | frintz v20.8h, v21.8h // FRINTZ <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZv2f32 | frintz v15.2s, v19.2s // FRINTZ <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv4f32 | frintz v11.4s, v18.4s // FRINTZ <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZv2f64 | frintz v12.2d, v22.2d // FRINTZ <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1f16 | frsqrte h23, h26 // FRSQRTE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 4 4 2.00 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1i32 | frsqrte s23, s5 // FRSQRTE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1i64 | frsqrte d3, d11 // FRSQRTE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv4f16 | frsqrte v16.4h, v15.4h // FRSQRTE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv8f16 | frsqrte v14.8h, v0.8h // FRSQRTE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 2 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv2f32 | frsqrte v6.2s, v8.2s // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv4f32 | frsqrte v30.4s, v21.4s // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv2f64 | frsqrte v15.2d, v14.2d // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FRSQRTE_ZZ_H | frsqrte z6.h, z30.h // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 4 6 6 0.25 V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FRSQRTE_ZZ_S | frsqrte z27.s, z15.s // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 0.50 V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_D | frsqrte z6.d, z17.d // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTS16 | frsqrts h28, h26, h1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTS32 | frsqrts s28, s1, s11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTSv4f16 | frsqrts v8.4h, v9.4h, v30.4h // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTSv4f32 | frsqrts v20.4s, v26.4s, v27.4s // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV01 | FRSQRTS_ZZZ_H | frsqrts z10.h, z25.h, z22.h // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FSCALE_ZPmZ_H | fscale z2.h, p0/m, z2.h, z21.h // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTHr | fsqrt h13, h24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 0.29 V1UnitV[7], V1UnitV02[7]
+# CHECK-NEXT: 1 | 10 | 10 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTSr | fsqrt s20, s15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 16 | 16 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTDr | fsqrt d25, d21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTv4f16 | fsqrt v24.4h, v14.4h // FSQRT <Vd>.4H, <Vn>.4H \\ ASIMD FP square root, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 13 | 13 | 0.18 | V1UnitV[11], V1UnitV02[11] | FSQRTv8f16 | fsqrt v12.8h, v3.8h // FSQRT <Vd>.8H, <Vn>.8H \\ ASIMD FP square root, Q-form, F16 \\ 1 13 13 0.14 V1UnitV02[14]
+# CHECK-NEXT: 1 | 10 | 10 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTv2f32 | fsqrt v30.2s, v20.2s // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+# CHECK-NEXT: 1 | 10 | 10 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTv4f32 | fsqrt v2.4s, v24.4s // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.2 V1UnitV02[10]
+# CHECK-NEXT: 1 | 16 | 16 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTv2f64 | fsqrt v28.2d, v25.2d // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.12 V1UnitV02[16]
+# CHECK-NEXT: 1 | 13 | 13 | 0.10 | V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10] | FSQRT_ZPmZ_H | fsqrt z13.h, p3/m, z11.h // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 1 13 13 0.08 V1UnitV0[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | FSQRT_ZPmZ_S | fsqrt z2.s, p7/m, z0.s // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 1 10 10 0.1 V1UnitV0[10]
+# CHECK-NEXT: 1 | 16 | 16 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | FSQRT_ZPmZ_D | fsqrt z17.d, p6/m, z17.d // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 1 16 16 0.07 V1UnitV0[15]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZPmI_D | fsub z12.d, p6/m, z12.d, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBHrr | fsub h20, h11, h18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBSrr | fsub s15, s4, s24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBDrr | fsub d25, d26, d4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBv8f16 | fsub v13.8h, v15.8h, v17.8h // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBv2f32 | fsub v1.2s, v31.2s, v27.2s // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZPmZ_S | fsub z24.s, p4/m, z24.s, z10.s // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZZZ_H | fsub z19.h, z8.h, z29.h // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUBR_ZPmI_H | fsubr z22.h, p7/m, z22.h, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUBR_ZPmZ_S | fsubr z13.s, p2/m, z13.s, z4.s // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTMAD_ZZI_D | ftmad z19.d, z19.d, z6.d, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTSMUL_ZZZ_S | ftsmul z21.s, z0.s, z10.s // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTSSEL_ZZZ_D | ftssel z5.d, z0.d, z15.d // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | hint #9 // HINT #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HLT | hlt #0x7a67 // HLT #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HVC | hvc #0xecb9 // HVC #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | ic ialluis // IC <ic_op> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | ic ivau, x6 // IC <ic_op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCB_XPiI | incb x18 // INCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCB_XPiI | incb x17, vl3 // INCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCB_XPiI | incb x17, mul3, mul #7 // INCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCD_XPiI | incd x19 // INCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCD_XPiI | incd x17, vl3 // INCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCD_XPiI | incd x11, vl64, mul #7 // INCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCH_XPiI | inch x24 // INCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCH_XPiI | inch x23 // INCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCH_XPiI | inch x22, vl1, mul #8 // INCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x29 // INCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x2, vl64 // INCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x2, vl8 // INCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCD_ZPiI | incd z24.d // INCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCD_ZPiI | incd z23.d, vl8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCD_ZPiI | incd z20.d, vl2, mul #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCH_ZPiI | inch z29.h // INCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCH_ZPiI | inch z28.h, vl16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCH_ZPiI | inch z29.h, vl16, mul #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCW_ZPiI | incw z17.s // INCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCW_ZPiI | incw z31.s, mul3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INCW_ZPiI | incw z12.s, vl4, mul #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCP_XP_H | incp x7, p0.h // INCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01 | INCP_ZP_D | incp z2.d, p6.d // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_IR_B | index z8.b, #15, w14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_IR_H | index z14.h, #11, w10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_IR_S | index z17.s, #14, w21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 4 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_IR_D | index z5.d, #11, x15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 4 8 8 0.50 V1UnitM0[2],V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_B | index z16.b, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_H | index z13.h, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_S | index z20.s, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_II_D | index z13.d, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 2 5 5 0.50 V1UnitV0[2]
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_RI_B | index z28.b, w27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_RI_H | index z13.h, w28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_RI_S | index z22.s, w7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 4 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_RI_D | index z0.d, x25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 4 8 8 0.50 V1UnitM0[2],V1UnitV0[2]
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_RR_B | index z6.b, w24, w8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_RR_H | index z20.h, w4, w7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INDEX_RR_S | index z10.s, w2, w19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13
+# CHECK-NEXT: 4 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_RR_D | index z2.d, x23, x7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 4 8 8 0.50 V1UnitM0[2],V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi8lane | mov v15.b[7], v6.b[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi16lane | mov v17.h[1], v3.h[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi32lane | mov v4.s[1], v7.s[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi64lane | mov v22.d[1], v25.d[1] // INS <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi8gpr | mov v14.b[3], w12 // INS <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi16gpr | mov v25.h[2], w14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi32gpr | mov v14.s[1], w29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi64gpr | mov v19.d[1], x27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INSR_ZV_D | insr z4.d, d0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INSR_ZR_D | insr z4.d, x14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb // ISB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb // ISB <option> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb #1 // ISB #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTA_VPZ_B | lasta b3, p1, z3.b // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTA_RPZ_B | lasta w16, p0, z10.b // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTB_VPZ_D | lastb d3, p1, z17.d // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTB_RPZ_D | lastb x4, p3, z31.d // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev8b | ld1 { v23.8b }, [x11] // LD1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8b_POST | ld1 { v25.8b }, [x30], #8 // LD1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8b_POST | ld1 { v14.8b }, [x1], x26 // LD1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev16b | ld1 { v12.16b }, [x19] // LD1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev16b_POST | ld1 { v24.16b }, [x28], #16 // LD1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev16b_POST | ld1 { v21.16b }, [x25], x28 // LD1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev4h | ld1 { v8.4h }, [x30] // LD1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4h_POST | ld1 { v4.4h }, [x10], #8 // LD1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4h_POST | ld1 { v17.4h }, [x12], x16 // LD1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev8h | ld1 { v24.8h }, [x27] // LD1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8h_POST | ld1 { v21.8h }, [x24], #16 // LD1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8h_POST | ld1 { v9.8h }, [x9], x27 // LD1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev2s | ld1 { v4.2s }, [x2] // LD1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2s_POST | ld1 { v19.2s }, [x27], #8 // LD1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2s_POST | ld1 { v25.2s }, [x13], x19 // LD1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev4s | ld1 { v3.4s }, [x4] // LD1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4s_POST | ld1 { v24.4s }, [x20], #16 // LD1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4s_POST | ld1 { v29.4s }, [x25], x23 // LD1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev1d | ld1 { v24.1d }, [x9] // LD1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev1d_POST | ld1 { v23.1d }, [x3], #8 // LD1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev1d_POST | ld1 { v19.1d }, [x10], x19 // LD1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev2d | ld1 { v3.2d }, [x28] // LD1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2d_POST | ld1 { v8.2d }, [x16], #16 // LD1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2d_POST | ld1 { v5.2d }, [x1], x29 // LD1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov8b | ld1 { v24.8b, v25.8b }, [x6] // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8b_POST | ld1 { v17.8b, v18.8b }, [x18], #16 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8b_POST | ld1 { v18.8b, v19.8b }, [x6], x11 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov16b | ld1 { v0.16b, v1.16b }, [x14] // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov16b_POST | ld1 { v20.16b, v21.16b }, [x2], #32 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov16b_POST | ld1 { v5.16b, v6.16b }, [x17], x25 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov4h | ld1 { v25.4h, v26.4h }, [x3] // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4h_POST | ld1 { v10.4h, v11.4h }, [x14], #16 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4h_POST | ld1 { v0.4h, v1.4h }, [x24], x15 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov8h | ld1 { v1.8h, v2.8h }, [x27] // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8h_POST | ld1 { v22.8h, v23.8h }, [x13], #32 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8h_POST | ld1 { v9.8h, v10.8h }, [x4], x13 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov2s | ld1 { v6.2s, v7.2s }, [x29] // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2s_POST | ld1 { v23.2s, v24.2s }, [x10], #16 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2s_POST | ld1 { v26.2s, v27.2s }, [x21], x29 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov4s | ld1 { v11.4s, v12.4s }, [x30] // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4s_POST | ld1 { v23.4s, v24.4s }, [x14], #32 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4s_POST | ld1 { v12.4s, v13.4s }, [x27], x22 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov1d | ld1 { v27.1d, v28.1d }, [x7] // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov1d_POST | ld1 { v13.1d, v14.1d }, [x29], #16 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov1d_POST | ld1 { v1.1d, v2.1d }, [x7], x20 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov2d | ld1 { v13.2d, v14.2d }, [x13] // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2d_POST | ld1 { v13.2d, v14.2d }, [x10], #32 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2d_POST | ld1 { v20.2d, v21.2d }, [x29], x28 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev8b | ld1 { v7.8b, v8.8b, v9.8b }, [x12] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8b_POST | ld1 { v13.8b, v14.8b, v15.8b }, [x10], #24 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8b_POST | ld1 { v28.8b, v29.8b, v30.8b }, [x2], x21 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev16b | ld1 { v19.16b, v20.16b, v21.16b }, [x10] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev16b_POST | ld1 { v8.16b, v9.16b, v10.16b }, [x29], #48 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev16b_POST | ld1 { v14.16b, v15.16b, v16.16b }, [x5], x17 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev4h | ld1 { v10.4h, v11.4h, v12.4h }, [x28] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4h_POST | ld1 { v22.4h, v23.4h, v24.4h }, [x6], #24 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4h_POST | ld1 { v11.4h, v12.4h, v13.4h }, [x13], x23 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev8h | ld1 { v21.8h, v22.8h, v23.8h }, [x22] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8h_POST | ld1 { v26.8h, v27.8h, v28.8h }, [x2], #48 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8h_POST | ld1 { v6.8h, v7.8h, v8.8h }, [x22], x6 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev2s | ld1 { v16.2s, v17.2s, v18.2s }, [x27] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2s_POST | ld1 { v3.2s, v4.2s, v5.2s }, [x30], #24 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2s_POST | ld1 { v14.2s, v15.2s, v16.2s }, [x11], x28 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev4s | ld1 { v0.4s, v1.4s, v2.4s }, [x24] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4s_POST | ld1 { v17.4s, v18.4s, v19.4s }, [x28], #48 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4s_POST | ld1 { v5.4s, v6.4s, v7.4s }, [x20], x13 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev1d | ld1 { v14.1d, v15.1d, v16.1d }, [x3] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev1d_POST | ld1 { v21.1d, v22.1d, v23.1d }, [x24], #24 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev1d_POST | ld1 { v25.1d, v26.1d, v27.1d }, [x18], x14 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev2d | ld1 { v12.2d, v13.2d, v14.2d }, [x15] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 3 6 6 1.00 V1UnitL[3]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2d_POST | ld1 { v13.2d, v14.2d, v15.2d }, [x4], #48 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2d_POST | ld1 { v15.2d, v16.2d, v17.2d }, [x10], x6 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 4 6 6 1.00 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv8b | ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x13] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv8b_POST | ld1 { v8.8b, v9.8b, v10.8b, v11.8b }, [x30], #32 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv8b_POST | ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x20], x3 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 4 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv16b | ld1 { v13.16b, v14.16b, v15.16b, v16.16b }, [x9] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv16b_POST | ld1 { v3.16b, v4.16b, v5.16b, v6.16b }, [x17], #64 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv16b_POST | ld1 { v10.16b, v11.16b, v12.16b, v13.16b }, [x19], x29 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv4h | ld1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x15] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv4h_POST | ld1 { v4.4h, v5.4h, v6.4h, v7.4h }, [x12], #32 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv4h_POST | ld1 { v24.4h, v25.4h, v26.4h, v27.4h }, [x25], x0 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 4 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv8h | ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x21] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv8h_POST | ld1 { v12.8h, v13.8h, v14.8h, v15.8h }, [x21], #64 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv8h_POST | ld1 { v14.8h, v15.8h, v16.8h, v17.8h }, [x12], x23 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv2s | ld1 { v21.2s, v22.2s, v23.2s, v24.2s }, [x21] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv2s_POST | ld1 { v27.2s, v28.2s, v29.2s, v30.2s }, [x11], #32 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv2s_POST | ld1 { v24.2s, v25.2s, v26.2s, v27.2s }, [x1], x22 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 4 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv4s | ld1 { v15.4s, v16.4s, v17.4s, v18.4s }, [x28] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv4s_POST | ld1 { v14.4s, v15.4s, v16.4s, v17.4s }, [x8], #64 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv4s_POST | ld1 { v11.4s, v12.4s, v13.4s, v14.4s }, [x2], x28 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv1d | ld1 { v22.1d, v23.1d, v24.1d, v25.1d }, [x4] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv1d_POST | ld1 { v3.1d, v4.1d, v5.1d, v6.1d }, [x23], #32 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv1d_POST | ld1 { v22.1d, v23.1d, v24.1d, v25.1d }, [x9], x22 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 4 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv2d | ld1 { v18.2d, v19.2d, v20.2d, v21.2d }, [x6] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 4 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv2d_POST | ld1 { v3.2d, v4.2d, v5.2d, v6.2d }, [x3], #64 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 5 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv2d_POST | ld1 { v6.2d, v7.2d, v8.2d, v9.2d }, [x17], x18 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 5 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i8 | ld1 { v18.b }[3], [x23] // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i16 | ld1 { v18.h }[3], [x1] // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i32 | ld1 { v8.s }[0], [x24] // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i64 | ld1 { v11.d }[0], [x13] // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i8_POST | ld1 { v23.b }[1], [x13], #1 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i8_POST | ld1 { v10.b }[9], [x25], x14 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i16_POST | ld1 { v6.h }[2], [x26], #2 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i16_POST | ld1 { v30.h }[6], [x27], x3 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i32_POST | ld1 { v5.s }[1], [x10], #4 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i32_POST | ld1 { v13.s }[3], [x6], x24 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i64_POST | ld1 { v26.d }[1], [x28], #8 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i64_POST | ld1 { v1.d }[1], [x20], x30 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_IMM | ld1b { z20.b }, p1/z, [x25] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_IMM | ld1b { z10.b }, p1/z, [x16, #-1, mul vl] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H_IMM | ld1b { z31.h }, p1/z, [x4] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H_IMM | ld1b { z5.h }, p5/z, [x8, #6, mul vl] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S_IMM | ld1b { z1.s }, p3/z, [x12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S_IMM | ld1b { z24.s }, p2/z, [x28, #1, mul vl] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D_IMM | ld1b { z25.d }, p5/z, [x2] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D_IMM | ld1b { z0.d }, p6/z, [x22, #5, mul vl] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B | ld1b { z7.b }, p0/z, [x24, x11] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H | ld1b { z26.h }, p5/z, [x5, x21] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S | ld1b { z22.s }, p3/z, [x16, x12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D | ld1b { z7.d }, p5/z, [x18, x12] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1B_D_UXTW | ld1b { z2.d }, p0/z, [x15, z18.d, uxtw] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLD1B_S_SXTW | ld1b { z20.s }, p6/z, [x2, z0.s, sxtw] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1B_D | ld1b { z15.d }, p4/z, [x23, z9.d] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1B_S_IMM | ld1b { z8.s }, p4/z, [z25.s, #22] // LD1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1B_D_IMM | ld1b { z13.d }, p2/z, [z3.d, #30] // LD1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1D_SXTW_SCALED | ld1d { z21.d }, p1/z, [x24, z31.d, sxtw #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1D_SXTW | ld1d { z7.d }, p0/z, [x13, z15.d, sxtw] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1D_SCALED | ld1d { z14.d }, p1/z, [x26, z27.d, lsl #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1D | ld1d { z30.d }, p7/z, [x14, z16.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1D_IMM | ld1d { z22.d }, p1/z, [z15.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1D_IMM | ld1d { z8.d }, p4/z, [z12.d, #200] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1H_IMM | ld1h { z3.h }, p2/z, [x21] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1H_IMM | ld1h { z15.h }, p0/z, [x25, #-3, mul vl] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1H_S_IMM | ld1h { z9.s }, p1/z, [x17] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1H_S_IMM | ld1h { z1.s }, p3/z, [x14, #5, mul vl] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1H_D_IMM | ld1h { z10.d }, p3/z, [x9] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1H_D_IMM | ld1h { z2.d }, p7/z, [x1, #4, mul vl] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.00 V1UnitL, V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H | ld1h { z26.h }, p5/z, [x10, x19, lsl #1] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_S | ld1h { z29.s }, p7/z, [x23, x11, lsl #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_D | ld1h { z2.d }, p5/z, [x30, x9, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 11 | 11 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_S_SXTW_SCALED | ld1h { z14.s }, p7/z, [x14, z28.s, sxtw #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_D_SXTW_SCALED | ld1h { z28.d }, p7/z, [x8, z9.d, sxtw #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_D_UXTW | ld1h { z16.d }, p5/z, [x7, z9.d, uxtw] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLD1H_S_UXTW | ld1h { z27.s }, p4/z, [x4, z7.s, uxtw] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_D_SCALED | ld1h { z6.d }, p7/z, [x30, z26.d, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_D | ld1h { z11.d }, p2/z, [x20, z25.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1H_S_IMM | ld1h { z6.s }, p7/z, [z31.s] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1H_S_IMM | ld1h { z1.s }, p3/z, [z12.s, #8] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_D_IMM | ld1h { z7.d }, p7/z, [z9.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1H_D_IMM | ld1h { z13.d }, p3/z, [z5.d, #8] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv8b | ld1r { v8.8b }, [x23] // LD1R { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8b_POST | ld1r { v4.8b }, [x25], #1 // LD1R { <Vt>.8B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8b_POST | ld1r { v14.8b }, [x24], x14 // LD1R { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv16b | ld1r { v8.16b }, [x24] // LD1R { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv16b_POST | ld1r { v21.16b }, [x30], #1 // LD1R { <Vt>.16B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv16b_POST | ld1r { v1.16b }, [x3], x9 // LD1R { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv4h | ld1r { v28.4h }, [x9] // LD1R { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4h_POST | ld1r { v10.4h }, [x27], #2 // LD1R { <Vt>.4H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4h_POST | ld1r { v12.4h }, [x8], x20 // LD1R { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv8h | ld1r { v3.8h }, [x16] // LD1R { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8h_POST | ld1r { v27.8h }, [x18], #2 // LD1R { <Vt>.8H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8h_POST | ld1r { v20.8h }, [x20], x4 // LD1R { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv2s | ld1r { v10.2s }, [x20] // LD1R { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2s_POST | ld1r { v28.2s }, [x8], #4 // LD1R { <Vt>.2S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2s_POST | ld1r { v4.2s }, [x0], x12 // LD1R { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv4s | ld1r { v11.4s }, [x3] // LD1R { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4s_POST | ld1r { v18.4s }, [x3], #4 // LD1R { <Vt>.4S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4s_POST | ld1r { v2.4s }, [x4], x1 // LD1R { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv1d | ld1r { v3.1d }, [x15] // LD1R { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv1d_POST | ld1r { v16.1d }, [x2], #8 // LD1R { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv1d_POST | ld1r { v24.1d }, [x21], x3 // LD1R { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv2d | ld1r { v18.2d }, [x0] // LD1R { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x18], #8 // LD1R { <Vt>.2D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x16], x28 // LD1R { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_IMM | ld1rb { z13.b }, p0/z, [x9] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_IMM | ld1rb { z30.b }, p6/z, [x21, #28] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_H_IMM | ld1rb { z10.h }, p1/z, [x9] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_H_IMM | ld1rb { z25.h }, p3/z, [x26, #6] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_S_IMM | ld1rb { z24.s }, p2/z, [x19] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_S_IMM | ld1rb { z16.s }, p1/z, [x8, #54] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_D_IMM | ld1rb { z17.d }, p7/z, [x4] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_D_IMM | ld1rb { z4.d }, p7/z, [x20, #18] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RD_IMM | ld1rd { z12.d }, p7/z, [x20] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RD_IMM | ld1rd { z19.d }, p5/z, [x13, #384] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_IMM | ld1rh { z13.h }, p7/z, [x0] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_IMM | ld1rh { z23.h }, p0/z, [x18, #56] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_S_IMM | ld1rh { z24.s }, p6/z, [x27] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_S_IMM | ld1rh { z6.s }, p7/z, [x1, #84] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_D_IMM | ld1rh { z3.d }, p4/z, [x25] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_D_IMM | ld1rh { z25.d }, p5/z, [x5, #108] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_B_IMM | ld1rqb { z31.b }, p1/z, [x6] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_B_IMM | ld1rqb { z21.b }, p7/z, [x29, #112] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_B | ld1rqb { z7.b }, p6/z, [x26, x26] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_D_IMM | ld1rqd { z10.d }, p0/z, [x28] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_D_IMM | ld1rqd { z29.d }, p5/z, [x6, #-16] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_D | ld1rqd { z5.d }, p6/z, [x7, x8, lsl #3] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_H_IMM | ld1rqh { z29.h }, p3/z, [x3] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_H_IMM | ld1rqh { z29.h }, p4/z, [x30, #112] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1RQ_H | ld1rqh { z9.h }, p0/z, [x23, x11, lsl #1] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load broadcast, scalar + scalar + S \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_W_IMM | ld1rqw { z11.s }, p0/z, [x26] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_W_IMM | ld1rqw { z7.s }, p3/z, [x16, #-80] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_W | ld1rqw { z2.s }, p0/z, [x21, x23, lsl #2] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_H_IMM | ld1rsb { z6.h }, p6/z, [x23] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_H_IMM | ld1rsb { z28.h }, p3/z, [x21, #43] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_S_IMM | ld1rsb { z13.s }, p5/z, [x14] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_S_IMM | ld1rsb { z26.s }, p3/z, [x15, #4] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_D_IMM | ld1rsb { z23.d }, p2/z, [x21] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_D_IMM | ld1rsb { z29.d }, p6/z, [x14, #25] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_S_IMM | ld1rsh { z25.s }, p2/z, [x4] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_S_IMM | ld1rsh { z30.s }, p5/z, [x6, #124] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_D_IMM | ld1rsh { z24.d }, p4/z, [x6] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_D_IMM | ld1rsh { z14.d }, p3/z, [x20, #98] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSW_IMM | ld1rsw { z2.d }, p0/z, [x23] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSW_IMM | ld1rsw { z18.d }, p7/z, [x11] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_IMM | ld1rw { z12.s }, p7/z, [x9] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_IMM | ld1rw { z25.s }, p7/z, [x17, #60] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_D_IMM | ld1rw { z22.d }, p5/z, [x1] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_D_IMM | ld1rw { z2.d }, p3/z, [x3, #36] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H_IMM | ld1sb { z28.h }, p6/z, [x9] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H_IMM | ld1sb { z22.h }, p2/z, [x19, #7, mul vl] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S_IMM | ld1sb { z22.s }, p3/z, [x23] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S_IMM | ld1sb { z2.s }, p6/z, [x22, #-2, mul vl] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D_IMM | ld1sb { z31.d }, p6/z, [x10] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D_IMM | ld1sb { z23.d }, p5/z, [x2, #-4, mul vl] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H | ld1sb { z3.h }, p5/z, [x10, x23] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S | ld1sb { z16.s }, p7/z, [x27, x16] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D | ld1sb { z13.d }, p7/z, [x28, x18] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SB_D_UXTW | ld1sb { z30.d }, p6/z, [x22, z27.d, uxtw] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLD1SB_S_UXTW | ld1sb { z23.s }, p5/z, [x17, z10.s, uxtw] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SB_D | ld1sb { z23.d }, p2/z, [x28, z10.d] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1SB_S_IMM | ld1sb { z14.s }, p4/z, [z18.s, #24] // LD1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SB_D_IMM | ld1sb { z5.d }, p0/z, [z25.d, #31] // LD1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_S_IMM | ld1sh { z8.s }, p3/z, [x21] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_S_IMM | ld1sh { z29.s }, p4/z, [x11, #-4, mul vl] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_D_IMM | ld1sh { z13.d }, p6/z, [x18] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_D_IMM | ld1sh { z19.d }, p2/z, [x29, #-3, mul vl] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1SH_S | ld1sh { z28.s }, p0/z, [x6, x28, lsl #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1SH_D | ld1sh { z26.d }, p0/z, [x7, x12, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 11 | 11 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SH_S_UXTW_SCALED | ld1sh { z22.s }, p3/z, [x7, z1.s, uxtw #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SH_D_SXTW_SCALED | ld1sh { z3.d }, p6/z, [x11, z14.d, sxtw #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SH_D_SXTW | ld1sh { z27.d }, p3/z, [x19, z23.d, sxtw] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLD1SH_S_SXTW | ld1sh { z12.s }, p5/z, [x27, z13.s, sxtw] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SH_D_SCALED | ld1sh { z9.d }, p0/z, [x22, z8.d, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SH_D | ld1sh { z22.d }, p0/z, [x27, z12.d] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1SH_S_IMM | ld1sh { z1.s }, p2/z, [z9.s, #44] // LD1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SH_D_IMM | ld1sh { z11.d }, p5/z, [z30.d, #34] // LD1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D_IMM | ld1sw { z7.d }, p1/z, [x19] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D_IMM | ld1sw { z28.d }, p1/z, [x26, #4, mul vl] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D | ld1sw { z26.d }, p4/z, [x20, x17, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SW_D_SXTW_SCALED | ld1sw { z22.d }, p1/z, [x14, z23.d, sxtw #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SW_D_SXTW | ld1sw { z4.d }, p3/z, [x20, z15.d, sxtw] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SW_D_SCALED | ld1sw { z1.d }, p4/z, [x20, z23.d, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SW_D | ld1sw { z2.d }, p7/z, [x4, z0.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SW_D_IMM | ld1sw { z12.d }, p7/z, [z21.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1SW_D_IMM | ld1sw { z27.d }, p3/z, [z10.d, #24] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 11 | 11 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_SXTW_SCALED | ld1w { z9.s }, p0/z, [x18, z9.s, sxtw #2] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_D_UXTW_SCALED | ld1w { z14.d }, p5/z, [x26, z2.d, uxtw #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_D_UXTW | ld1w { z31.d }, p6/z, [x17, z2.d, uxtw] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLD1W_SXTW | ld1w { z14.s }, p2/z, [x18, z28.s, sxtw] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_D_SCALED | ld1w { z13.d }, p3/z, [x5, z11.d, lsl #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_D | ld1w { z24.d }, p3/z, [x2, z17.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1W_IMM | ld1w { z4.s }, p0/z, [z1.s] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLD1W_IMM | ld1w { z17.s }, p6/z, [z26.s, #60] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_D_IMM | ld1w { z31.d }, p7/z, [z22.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLD1W_D_IMM | ld1w { z2.d }, p3/z, [z6.d, #116] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Twov8b | ld2 { v13.8b, v14.8b }, [x4] // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Twov8b_POST | ld2 { v20.8b, v21.8b }, [x11], #16 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Twov8b_POST | ld2 { v13.8b, v14.8b }, [x4], x7 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov16b | ld2 { v26.16b, v27.16b }, [x16] // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v15.16b, v16.16b }, [x3], #32 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v24.16b, v25.16b }, [x7], x30 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Twov4h | ld2 { v0.4h, v1.4h }, [x21] // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x30], #16 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x22], x1 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov8h | ld2 { v8.8h, v9.8h }, [x28] // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v14.8h, v15.8h }, [x19], #32 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v28.8h, v29.8h }, [x26], x7 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Twov2s | ld2 { v2.2s, v3.2s }, [x16] // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Twov2s_POST | ld2 { v23.2s, v24.2s }, [x5], #16 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Twov2s_POST | ld2 { v22.2s, v23.2s }, [x11], x12 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov4s | ld2 { v22.4s, v23.4s }, [x4] // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v27.4s, v28.4s }, [x18], #32 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v22.4s, v23.4s }, [x26], x29 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 4 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov2d | ld2 { v22.2d, v23.2d }, [x17] // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, D \\ 4 8 8 1.50 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v12.2d, v13.2d }, [x19], #32 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, D \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v6.2d, v7.2d }, [x11], x24 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, D \\ 5 8 8 1.50 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2i8 | ld2 { v29.b, v30.b }[3], [x1] // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2i16 | ld2 { v23.h, v24.h }[7], [x14] // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2i32 | ld2 { v26.s, v27.s }[1], [x17] // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2i64 | ld2 { v1.d, v2.d }[0], [x10] // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i8_POST | ld2 { v20.b, v21.b }[9], [x24], #2 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i8_POST | ld2 { v29.b, v30.b }[6], [x18], x19 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i16_POST | ld2 { v2.h, v3.h }[3], [x12], #4 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i16_POST | ld2 { v11.h, v12.h }[3], [x18], x17 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i32_POST | ld2 { v15.s, v16.s }[1], [x7], #8 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD load, 2 element, one lane, S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i32_POST | ld2 { v29.s, v30.s }[1], [x12], x0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i64_POST | ld2 { v1.d, v2.d }[1], [x3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2i64_POST | ld2 { v10.d, v11.d }[1], [x18], x27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B_IMM | ld2b { z9.b, z10.b }, p2/z, [x22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B_IMM | ld2b { z28.b, z29.b }, p3/z, [x22, #4, mul vl] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B | ld2b { z26.b, z27.b }, p1/z, [x3, x12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 9 9 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D_IMM | ld2d { z12.d, z13.d }, p5/z, [x24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D_IMM | ld2d { z22.d, z23.d }, p2/z, [x21, #-2, mul vl] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D | ld2d { z22.d, z23.d }, p6/z, [x14, x4, lsl #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 9 9 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H_IMM | ld2h { z5.h, z6.h }, p5/z, [x20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H_IMM | ld2h { z27.h, z28.h }, p7/z, [x11, #14, mul vl] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H | ld2h { z18.h, z19.h }, p3/z, [x9, x17, lsl #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv8b | ld2r { v10.8b, v11.8b }, [x20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv8b_POST | ld2r { v18.8b, v19.8b }, [x11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv8b_POST | ld2r { v28.8b, v29.8b }, [x30], x14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv16b | ld2r { v10.16b, v11.16b }, [x23] // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv16b_POST | ld2r { v24.16b, v25.16b }, [x1], #2 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv16b_POST | ld2r { v20.16b, v21.16b }, [x11], x7 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv4h | ld2r { v25.4h, v26.4h }, [x11] // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv4h_POST | ld2r { v28.4h, v29.4h }, [x18], #4 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv4h_POST | ld2r { v21.4h, v22.4h }, [x2], x17 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv8h | ld2r { v23.8h, v24.8h }, [x10] // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv8h_POST | ld2r { v19.8h, v20.8h }, [x29], #4 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv8h_POST | ld2r { v13.8h, v14.8h }, [x13], x5 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv2s | ld2r { v25.2s, v26.2s }, [x19] // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv2s_POST | ld2r { v5.2s, v6.2s }, [x28], #8 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv2s_POST | ld2r { v4.2s, v5.2s }, [x14], x19 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv4s | ld2r { v8.4s, v9.4s }, [x17] // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv4s_POST | ld2r { v22.4s, v23.4s }, [x5], #8 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv4s_POST | ld2r { v29.4s, v30.4s }, [x4], x18 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv1d | ld2r { v9.1d, v10.1d }, [x25] // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv1d_POST | ld2r { v15.1d, v16.1d }, [x26], #16 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, D-form, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv1d_POST | ld2r { v10.1d, v11.1d }, [x28], x26 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, D \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 2.00 | V1UnitL, V1UnitV[2] | LD2Rv2d | ld2r { v26.2d, v27.2d }, [x8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 2.00 V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv2d_POST | ld2r { v14.2d, v15.2d }, [x3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 2.00 | V1UnitI, V1UnitL, V1UnitV[2] | LD2Rv2d_POST | ld2r { v24.2d, v25.2d }, [x6], x14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 4 8 8 2.00 V1UnitI, V1UnitL, V1UnitV[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W_IMM | ld2w { z21.s, z22.s }, p4/z, [x12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W_IMM | ld2w { z29.s, z30.s }, p2/z, [x19, #6, mul vl] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 4 8 8 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W | ld2w { z18.s, z19.s }, p6/z, [x22, x22, lsl #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 9 9 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Threev8b | ld3 { v8.8b, v9.8b, v10.8b }, [x0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Threev8b_POST | ld3 { v6.8b, v7.8b, v8.8b }, [x26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Threev8b_POST | ld3 { v20.8b, v21.8b, v22.8b }, [x25], x24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev16b | ld3 { v15.16b, v16.16b, v17.16b }, [x5] // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v19.16b, v20.16b, v21.16b }, [x3], #48 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v26.16b, v27.16b, v28.16b }, [x8], x29 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Threev4h | ld3 { v15.4h, v16.4h, v17.4h }, [x8] // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Threev4h_POST | ld3 { v4.4h, v5.4h, v6.4h }, [x5], #24 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Threev4h_POST | ld3 { v24.4h, v25.4h, v26.4h }, [x25], x0 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev8h | ld3 { v7.8h, v8.8h, v9.8h }, [x21] // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v4.8h, v5.8h, v6.8h }, [x26], #48 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v12.8h, v13.8h, v14.8h }, [x0], x25 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Threev2s | ld3 { v16.2s, v17.2s, v18.2s }, [x0] // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Threev2s_POST | ld3 { v9.2s, v10.2s, v11.2s }, [x1], #24 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Threev2s_POST | ld3 { v27.2s, v28.2s, v29.2s }, [x23], x4 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev4s | ld3 { v12.4s, v13.4s, v14.4s }, [x25] // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v12.4s, v13.4s, v14.4s }, [x27], #48 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v2.4s, v3.4s, v4.4s }, [x22], x21 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 6 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev2d | ld3 { v10.2d, v11.2d, v12.2d }, [x18] // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, D \\ 6 8 8 1.00 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v25.2d, v26.2d, v27.2d }, [x4], #48 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, D \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v6.2d, v7.2d, v8.2d }, [x10], x24 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, D \\ 7 8 8 1.00 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3i8 | ld3 { v17.b, v18.b, v19.b }[2], [x27] // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3i16 | ld3 { v18.h, v19.h, v20.h }[5], [x16] // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3i32 | ld3 { v1.s, v2.s, v3.s }[3], [x14] // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3i64 | ld3 { v5.d, v6.d, v7.d }[1], [x14] // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, D \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i8_POST | ld3 { v16.b, v17.b, v18.b }[3], [x15], #3 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i8_POST | ld3 { v14.b, v15.b, v16.b }[4], [x23], x6 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i16_POST | ld3 { v11.h, v12.h, v13.h }[1], [x28], #6 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i16_POST | ld3 { v4.h, v5.h, v6.h }[2], [x5], x15 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i32_POST | ld3 { v26.s, v27.s, v28.s }[0], [x14], #12 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD load, 3 element, one lane, S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i32_POST | ld3 { v1.s, v2.s, v3.s }[0], [x26], x20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i64_POST | ld3 { v14.d, v15.d, v16.d }[1], [x30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3i64_POST | ld3 { v23.d, v24.d, v25.d }[0], [x24], x14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3B_IMM | ld3b { z29.b - z31.b }, p3/z, [x17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3B_IMM | ld3b { z23.b - z25.b }, p7/z, [x12, #18, mul vl] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 7 | 8 | 8 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3] | LD3B | ld3b { z23.b - z25.b }, p3/z, [x12, x12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3D_IMM | ld3d { z20.d - z22.d }, p2/z, [x6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3D_IMM | ld3d { z1.d - z3.d }, p2/z, [x9, #-15, mul vl] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 7 | 8 | 8 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3] | LD3D | ld3d { z13.d - z15.d }, p6/z, [x27, x30, lsl #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3H_IMM | ld3h { z26.h - z28.h }, p1/z, [x29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3H_IMM | ld3h { z14.h - z16.h }, p3/z, [x18, #9, mul vl] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 7 | 8 | 8 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3] | LD3H | ld3h { z5.h - z7.h }, p3/z, [x6, x21, lsl #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv8b | ld3r { v24.8b, v25.8b, v26.8b }, [x10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv8b_POST | ld3r { v14.8b, v15.8b, v16.8b }, [x11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv8b_POST | ld3r { v22.8b, v23.8b, v24.8b }, [x0], x11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv16b | ld3r { v17.16b, v18.16b, v19.16b }, [x3] // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv16b_POST | ld3r { v7.16b, v8.16b, v9.16b }, [x29], #3 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv16b_POST | ld3r { v3.16b, v4.16b, v5.16b }, [x20], x5 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv4h | ld3r { v3.4h, v4.4h, v5.4h }, [x1] // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv4h_POST | ld3r { v8.4h, v9.4h, v10.4h }, [x3], #6 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv4h_POST | ld3r { v4.4h, v5.4h, v6.4h }, [x0], x28 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv8h | ld3r { v6.8h, v7.8h, v8.8h }, [x28] // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv8h_POST | ld3r { v4.8h, v5.8h, v6.8h }, [x11], #6 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv8h_POST | ld3r { v3.8h, v4.8h, v5.8h }, [x17], x0 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv2s | ld3r { v18.2s, v19.2s, v20.2s }, [x24] // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv2s_POST | ld3r { v8.2s, v9.2s, v10.2s }, [x22], #12 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv2s_POST | ld3r { v12.2s, v13.2s, v14.2s }, [x0], x14 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv4s | ld3r { v28.4s, v29.4s, v30.4s }, [x2] // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv4s_POST | ld3r { v21.4s, v22.4s, v23.4s }, [x22], #12 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv4s_POST | ld3r { v28.4s, v29.4s, v30.4s }, [x13], x25 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv1d | ld3r { v1.1d, v2.1d, v3.1d }, [x28] // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, D \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv1d_POST | ld3r { v0.1d, v1.1d, v2.1d }, [x7], #24 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, D-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv1d_POST | ld3r { v22.1d, v23.1d, v24.1d }, [x9], x15 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 5 | 8 | 8 | 1.33 | V1UnitL[2], V1UnitV[3] | LD3Rv2d | ld3r { v8.2d, v9.2d, v10.2d }, [x3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 5 8 8 1.33 V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv2d_POST | ld3r { v3.2d, v4.2d, v5.2d }, [x25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 8 | 8 | 1.33 | V1UnitI, V1UnitL[2], V1UnitV[3] | LD3Rv2d_POST | ld3r { v8.2d, v9.2d, v10.2d }, [x18], x13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 6 8 8 1.33 V1UnitI, V1UnitL[2], V1UnitV[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3W_IMM | ld3w { z23.s - z25.s }, p1/z, [x8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 6 | 11 | 11 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | LD3W_IMM | ld3w { z6.s - z8.s }, p4/z, [x0, #18, mul vl] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 6 11 11 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 7 | 8 | 8 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3] | LD3W | ld3w { z27.s - z29.s }, p3/z, [x3, x6, lsl #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 7 8 8 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitS, V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Fourv8b | ld4 { v6.8b, v7.8b, v8.8b, v9.8b }, [x27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Fourv8b_POST | ld4 { v20.8b, v21.8b, v22.8b, v23.8b }, [x10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Fourv8b_POST | ld4 { v18.8b, v19.8b, v20.8b, v21.8b }, [x24], x11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 9 | 9 | 0.75 | V1UnitL[4], V1UnitV[4] | LD4Fourv16b | ld4 { v11.16b, v12.16b, v13.16b, v14.16b }, [x5] // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv16b_POST | ld4 { v10.16b, v11.16b, v12.16b, v13.16b }, [x12], #64 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv16b_POST | ld4 { v12.16b, v13.16b, v14.16b, v15.16b }, [x4], x17 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Fourv4h | ld4 { v21.4h, v22.4h, v23.4h, v24.4h }, [x14] // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Fourv4h_POST | ld4 { v10.4h, v11.4h, v12.4h, v13.4h }, [x19], #32 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Fourv4h_POST | ld4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x15], x17 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 9 | 9 | 0.75 | V1UnitL[4], V1UnitV[4] | LD4Fourv8h | ld4 { v9.8h, v10.8h, v11.8h, v12.8h }, [x1] // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv8h_POST | ld4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x0], #64 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv8h_POST | ld4 { v4.8h, v5.8h, v6.8h, v7.8h }, [x17], x17 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Fourv2s | ld4 { v23.2s, v24.2s, v25.2s, v26.2s }, [x24] // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Fourv2s_POST | ld4 { v25.2s, v26.2s, v27.2s, v28.2s }, [x3], #32 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Fourv2s_POST | ld4 { v22.2s, v23.2s, v24.2s, v25.2s }, [x14], x15 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 9 | 9 | 0.75 | V1UnitL[4], V1UnitV[4] | LD4Fourv4s | ld4 { v17.4s, v18.4s, v19.4s, v20.4s }, [x4] // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv4s_POST | ld4 { v25.4s, v26.4s, v27.4s, v28.4s }, [x19], #64 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv4s_POST | ld4 { v4.4s, v5.4s, v6.4s, v7.4s }, [x28], x3 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 8 | 9 | 9 | 0.75 | V1UnitL[4], V1UnitV[4] | LD4Fourv2d | ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x24] // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, D \\ 8 9 9 0.75 V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv2d_POST | ld4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x0], #64 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, D \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 9 | 9 | 9 | 0.75 | V1UnitI, V1UnitL[4], V1UnitV[4] | LD4Fourv2d_POST | ld4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x27], x4 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, D \\ 9 9 9 0.75 V1UnitI, V1UnitL[4], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4i8 | ld4 { v4.b, v5.b, v6.b, v7.b }[12], [x27] // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4i16 | ld4 { v5.h, v6.h, v7.h, v8.h }[0], [x4] // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4i32 | ld4 { v0.s, v1.s, v2.s, v3.s }[0], [x26] // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4i64 | ld4 { v2.d, v3.d, v4.d, v5.d }[0], [x29] // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, D \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i8_POST | ld4 { v26.b, v27.b, v28.b, v29.b }[4], [x13], #4 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i8_POST | ld4 { v10.b, v11.b, v12.b, v13.b }[11], [x24], x21 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i16_POST | ld4 { v8.h, v9.h, v10.h, v11.h }[0], [x17], #8 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i16_POST | ld4 { v21.h, v22.h, v23.h, v24.h }[2], [x21], x24 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i32_POST | ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x28], #16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD load, 4 element, one lane, S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i32_POST | ld4 { v20.s, v21.s, v22.s, v23.s }[1], [x27], x16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i64_POST | ld4 { v18.d, v19.d, v20.d, v21.d }[1], [x26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4i64_POST | ld4 { v8.d, v9.d, v10.d, v11.d }[0], [x23], x0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4B_IMM | ld4b { z16.b - z19.b }, p3/z, [x23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4B_IMM | ld4b { z7.b - z10.b }, p5/z, [x3, #12, mul vl] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 10 | 13 | 13 | 0.50 | V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4] | LD4B | ld4b { z7.b - z10.b }, p4/z, [x20, x12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4D_IMM | ld4d { z26.d - z29.d }, p7/z, [x10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4D_IMM | ld4d { z27.d - z30.d }, p0/z, [x6, #24, mul vl] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 10 | 13 | 13 | 0.50 | V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4] | LD4D | ld4d { z7.d - z10.d }, p4/z, [x25, x8, lsl #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4H_IMM | ld4h { z4.h - z7.h }, p4/z, [x19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4H_IMM | ld4h { z4.h - z7.h }, p1/z, [x16, #-8, mul vl] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 10 | 13 | 13 | 0.50 | V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4] | LD4H | ld4h { z10.h - z13.h }, p2/z, [x8, x28, lsl #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv8b | ld4r { v20.8b, v21.8b, v22.8b, v23.8b }, [x23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv8b_POST | ld4r { v24.8b, v25.8b, v26.8b, v27.8b }, [x15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv8b_POST | ld4r { v4.8b, v5.8b, v6.8b, v7.8b }, [x26], x6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv16b | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x25] // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv16b_POST | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x14], #4 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv16b_POST | ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x29], x11 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv4h | ld4r { v16.4h, v17.4h, v18.4h, v19.4h }, [x6] // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv4h_POST | ld4r { v14.4h, v15.4h, v16.4h, v17.4h }, [x0], #8 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv4h_POST | ld4r { v21.4h, v22.4h, v23.4h, v24.4h }, [x25], x22 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv8h | ld4r { v4.8h, v5.8h, v6.8h, v7.8h }, [x23] // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv8h_POST | ld4r { v25.8h, v26.8h, v27.8h, v28.8h }, [x7], #8 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv8h_POST | ld4r { v13.8h, v14.8h, v15.8h, v16.8h }, [x19], x27 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv2s | ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x30] // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv2s_POST | ld4r { v23.2s, v24.2s, v25.2s, v26.2s }, [x29], #16 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv2s_POST | ld4r { v19.2s, v20.2s, v21.2s, v22.2s }, [x9], x0 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv4s | ld4r { v7.4s, v8.4s, v9.4s, v10.4s }, [x23] // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv4s_POST | ld4r { v9.4s, v10.4s, v11.4s, v12.4s }, [x3], #16 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv4s_POST | ld4r { v3.4s, v4.4s, v5.4s, v6.4s }, [x10], x22 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv1d | ld4r { v7.1d, v8.1d, v9.1d, v10.1d }, [x26] // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, D \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv1d_POST | ld4r { v11.1d, v12.1d, v13.1d, v14.1d }, [x5], #32 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, D-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv1d_POST | ld4r { v12.1d, v13.1d, v14.1d, v15.1d }, [x30], x17 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 7 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[4] | LD4Rv2d | ld4r { v7.2d, v8.2d, v9.2d, v10.2d }, [x8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 7 8 8 1.00 V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv2d_POST | ld4r { v12.2d, v13.2d, v14.2d, v15.2d }, [x2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[4] | LD4Rv2d_POST | ld4r { v17.2d, v18.2d, v19.2d, v20.2d }, [x21], x13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 8 8 8 1.00 V1UnitI, V1UnitL[3], V1UnitV[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4W_IMM | ld4w { z18.s - z21.s }, p6/z, [x4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 8 | 12 | 12 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | LD4W_IMM | ld4w { z21.s - z24.s }, p5/z, [x16, #-8, mul vl] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 8 12 12 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 10 | 13 | 13 | 0.50 | V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4] | LD4W | ld4w { z25.s - z28.s }, p2/z, [x23, x8, lsl #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 10 13 13 0.50 V1UnitI[2], V1UnitL[4], V1UnitL01[4], V1UnitS[2], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURi | ldapur w7, [x24] // LDAPUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURi | ldapur w25, [x29, #68] // LDAPUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURXi | ldapur x20, [x13] // LDAPUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURXi | ldapur x29, [x4, #-199] // LDAPUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURBi | ldapurb w13, [x17] // LDAPURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURBi | ldapurb w20, [x19, #124] // LDAPURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURHi | ldapurh w3, [x22] // LDAPURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURHi | ldapurh w1, [x6, #113] // LDAPURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBWi | ldapursb w7, [x8] // LDAPURSB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBWi | ldapursb w29, [x22, #-76] // LDAPURSB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBXi | ldapursb x29, [x7] // LDAPURSB <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBXi | ldapursb x6, [x0, #-254] // LDAPURSB <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHWi | ldapursh w17, [x19] // LDAPURSH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHWi | ldapursh w26, [x18, #-114] // LDAPURSH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHXi | ldapursh x3, [x3] // LDAPURSH <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHXi | ldapursh x13, [x25, #30] // LDAPURSH <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSWi | ldapursw x3, [x18] // LDAPURSW <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSWi | ldapursw x21, [x25] // LDAPURSW <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARW | ldar w9, [x20] // LDAR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARW | ldar w15, [x0] // LDAR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARX | ldar x5, [x25] // LDAR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARX | ldar x11, [x2] // LDAR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARB | ldarb w16, [x21] // LDARB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARB | ldarb w14, [x30] // LDARB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARH | ldarh w26, [x25] // LDARH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARH | ldarh w21, [x2] // LDARH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPW | ldaxp w13, w22, [x28] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPW | ldaxp w11, w19, [x20] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPX | ldaxp x25, x8, [x16] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPX | ldaxp x28, x17, [x25] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRW | ldaxr w4, [x5] // LDAXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRW | ldaxr w10, [x7] // LDAXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRX | ldaxr x22, [x21] // LDAXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRX | ldaxr x7, [x1] // LDAXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRB | ldaxrb w12, [x30] // LDAXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRB | ldaxrb w27, [x2] // LDAXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRH | ldaxrh w30, [x16] // LDAXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRH | ldaxrh w14, [x3] // LDAXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B | ldff1b { z10.b }, p3/z, [x10] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B | ldff1b { z2.b }, p5/z, [x28, x2] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_H | ldff1b { z2.h }, p0/z, [x14] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_H | ldff1b { z30.h }, p3/z, [x25, x18] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_S | ldff1b { z17.s }, p5/z, [x24] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_S | ldff1b { z17.s }, p7/z, [x11, x15] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_D | ldff1b { z9.d }, p2/z, [x3] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_D | ldff1b { z5.d }, p2/z, [x6, x8] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1B_D_SXTW | ldff1b { z7.d }, p3/z, [x27, z19.d, sxtw] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLDFF1B_S_SXTW | ldff1b { z13.s }, p3/z, [x24, z25.s, sxtw] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1B_D | ldff1b { z27.d }, p0/z, [x13, z16.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1B_S_IMM | ldff1b { z7.s }, p7/z, [z16.s] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1B_S_IMM | ldff1b { z11.s }, p5/z, [z8.s, #25] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1B_D_IMM | ldff1b { z2.d }, p7/z, [z19.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1B_D_IMM | ldff1b { z3.d }, p5/z, [z0.d, #11] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1D | ldff1d { z21.d }, p2/z, [x20] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1D | ldff1d { z9.d }, p3/z, [x28, x30, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1D_SXTW_SCALED | ldff1d { z21.d }, p4/z, [x11, z12.d, sxtw #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1D_UXTW | ldff1d { z6.d }, p4/z, [x15, z1.d, uxtw] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1D_SCALED | ldff1d { z12.d }, p7/z, [x11, z28.d, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1D | ldff1d { z26.d }, p4/z, [x30, z5.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1D_IMM | ldff1d { z10.d }, p5/z, [z10.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1D_IMM | ldff1d { z21.d }, p6/z, [z3.d, #48] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H | ldff1h { z14.h }, p3/z, [x22] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H | ldff1h { z15.h }, p2/z, [x24, x8, lsl #1] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_S | ldff1h { z23.s }, p0/z, [x12] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_S | ldff1h { z18.s }, p0/z, [x7, x25, lsl #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_D | ldff1h { z16.d }, p0/z, [x11] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_D | ldff1h { z25.d }, p3/z, [x24, x19, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 11 | 11 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_S_SXTW_SCALED | ldff1h { z9.s }, p2/z, [x3, z24.s, sxtw #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_D_UXTW_SCALED | ldff1h { z7.d }, p0/z, [x8, z17.d, uxtw #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_D_SXTW | ldff1h { z9.d }, p5/z, [x4, z10.d, sxtw] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLDFF1H_S_UXTW | ldff1h { z4.s }, p4/z, [x6, z27.s, uxtw] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_D_SCALED | ldff1h { z25.d }, p1/z, [x29, z6.d, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_D | ldff1h { z10.d }, p7/z, [x1, z26.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1H_S_IMM | ldff1h { z4.s }, p1/z, [z27.s] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1H_S_IMM | ldff1h { z5.s }, p3/z, [z8.s, #62] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_D_IMM | ldff1h { z16.d }, p5/z, [z10.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1H_D_IMM | ldff1h { z15.d }, p2/z, [z19.d, #34] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_H | ldff1sb { z0.h }, p2/z, [x2] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_H | ldff1sb { z29.h }, p1/z, [x16, x21] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_S | ldff1sb { z20.s }, p7/z, [x8] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_S | ldff1sb { z8.s }, p2/z, [x4, x14] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_D | ldff1sb { z11.d }, p4/z, [x6] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_D | ldff1sb { z17.d }, p4/z, [x16, x10] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SB_D_SXTW | ldff1sb { z13.d }, p2/z, [x28, z8.d, sxtw] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLDFF1SB_S_SXTW | ldff1sb { z3.s }, p2/z, [x26, z24.s, sxtw] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SB_D | ldff1sb { z10.d }, p7/z, [x20, z6.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1SB_S_IMM | ldff1sb { z18.s }, p3/z, [z9.s] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1SB_S_IMM | ldff1sb { z25.s }, p2/z, [z29.s, #25] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SB_D_IMM | ldff1sb { z8.d }, p0/z, [z24.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SB_D_IMM | ldff1sb { z7.d }, p0/z, [z4.d, #9] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_S | ldff1sh { z2.s }, p2/z, [x6] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_S | ldff1sh { z9.s }, p3/z, [x30, x16, lsl #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_D | ldff1sh { z7.d }, p4/z, [x30] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_D | ldff1sh { z1.d }, p0/z, [x29, x0, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 11 | 11 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_S_SXTW_SCALED | ldff1sh { z25.s }, p4/z, [x5, z9.s, sxtw #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_D_SXTW_SCALED | ldff1sh { z17.d }, p3/z, [x0, z25.d, sxtw #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_D_SXTW | ldff1sh { z12.d }, p7/z, [x5, z15.d, sxtw] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLDFF1SH_S_UXTW | ldff1sh { z8.s }, p5/z, [x3, z21.s, uxtw] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_D_SCALED | ldff1sh { z14.d }, p6/z, [x17, z27.d, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_D | ldff1sh { z23.d }, p4/z, [x22, z0.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1SH_S_IMM | ldff1sh { z6.s }, p4/z, [z6.s] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1SH_S_IMM | ldff1sh { z3.s }, p7/z, [z26.s, #16] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_D_IMM | ldff1sh { z25.d }, p3/z, [z17.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SH_D_IMM | ldff1sh { z2.d }, p3/z, [z31.d, #26] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SW_D | ldff1sw { z16.d }, p2/z, [x8] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SW_D | ldff1sw { z27.d }, p1/z, [x6, x11, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SW_D_UXTW_SCALED | ldff1sw { z27.d }, p3/z, [x5, z20.d, uxtw #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SW_D_SXTW | ldff1sw { z15.d }, p1/z, [x13, z26.d, sxtw] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SW_D_SCALED | ldff1sw { z24.d }, p2/z, [x7, z23.d, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SW_D | ldff1sw { z8.d }, p3/z, [x5, z22.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SW_D_IMM | ldff1sw { z16.d }, p6/z, [z12.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1SW_D_IMM | ldff1sw { z3.d }, p1/z, [z13.d, #60] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W | ldff1w { z2.s }, p5/z, [x13] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W | ldff1w { z9.s }, p3/z, [x16, x19, lsl #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W_D | ldff1w { z31.d }, p6/z, [x3] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W_D | ldff1w { z30.d }, p4/z, [x25, x12, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 4 | 11 | 11 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_UXTW_SCALED | ldff1w { z27.s }, p6/z, [x10, z17.s, uxtw #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 11 11 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_D_SXTW_SCALED | ldff1w { z8.d }, p4/z, [x28, z31.d, sxtw #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_D_UXTW | ldff1w { z1.d }, p0/z, [x23, z14.d, uxtw] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 9 | 9 | 3.00 | V1UnitL, V1UnitV | GLDFF1W_UXTW | ldff1w { z17.s }, p5/z, [x8, z6.s, uxtw] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_D_SCALED | ldff1w { z19.d }, p3/z, [x7, z18.d, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_D | ldff1w { z23.d }, p2/z, [x16, z4.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1W_IMM | ldff1w { z24.s }, p6/z, [z24.s] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 2 | 11 | 11 | 3.00 | V1UnitL, V1UnitV | GLDFF1W_IMM | ldff1w { z20.s }, p0/z, [z6.s, #36] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 3.00 V1UnitL, V1UnitV
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_D_IMM | ldff1w { z21.d }, p5/z, [z12.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 9 | 9 | 1.50 | V1UnitL[2], V1UnitV[2] | GLDFF1W_D_IMM | ldff1w { z29.d }, p2/z, [z11.d, #40] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 4 9 9 1.50 V1UnitL[2], V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_IMM | ldnf1b { z17.b }, p5/z, [x20] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_IMM | ldnf1b { z8.b }, p5/z, [x26, #1, mul vl] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_H_IMM | ldnf1b { z4.h }, p3/z, [x25] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_H_IMM | ldnf1b { z31.h }, p3/z, [x7] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_S_IMM | ldnf1b { z2.s }, p7/z, [x25] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_S_IMM | ldnf1b { z17.s }, p5/z, [x29, #2, mul vl] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_D_IMM | ldnf1b { z6.d }, p5/z, [x26] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_D_IMM | ldnf1b { z18.d }, p4/z, [x20, #5, mul vl] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1D_IMM | ldnf1d { z5.d }, p6/z, [x6] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1D_IMM | ldnf1d { z19.d }, p0/z, [x15, #-1, mul vl] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_IMM | ldnf1h { z7.h }, p5/z, [x22] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_IMM | ldnf1h { z27.h }, p1/z, [x2, #6, mul vl] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_S_IMM | ldnf1h { z18.s }, p2/z, [x13] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_S_IMM | ldnf1h { z8.s }, p2/z, [x29, #-8, mul vl] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_D_IMM | ldnf1h { z26.d }, p5/z, [x5] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_D_IMM | ldnf1h { z20.d }, p0/z, [x29, #-6, mul vl] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_H_IMM | ldnf1sb { z17.h }, p0/z, [x23] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_H_IMM | ldnf1sb { z14.h }, p0/z, [x18, #-5, mul vl] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_S_IMM | ldnf1sb { z23.s }, p0/z, [x3] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_S_IMM | ldnf1sb { z13.s }, p7/z, [x15, #-8, mul vl] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_D_IMM | ldnf1sb { z14.d }, p4/z, [x7] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_D_IMM | ldnf1sb { z13.d }, p7/z, [x25, #6, mul vl] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_S_IMM | ldnf1sh { z28.s }, p4/z, [x9] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_S_IMM | ldnf1sh { z3.s }, p1/z, [x14, #-2, mul vl] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_D_IMM | ldnf1sh { z1.d }, p2/z, [x0] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_D_IMM | ldnf1sh { z14.d }, p3/z, [x8, #3, mul vl] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SW_D_IMM | ldnf1sw { z8.d }, p4/z, [x9] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SW_D_IMM | ldnf1sw { z28.d }, p4/z, [x13, #-7, mul vl] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_IMM | ldnf1w { z15.s }, p5/z, [x27] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_IMM | ldnf1w { z28.s }, p0/z, [x28, #-1, mul vl] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_D_IMM | ldnf1w { z28.d }, p5/z, [x13] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_D_IMM | ldnf1w { z4.d }, p0/z, [x12, #2, mul vl] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPSi | ldnp s1, s13, [x4] // LDNP <St1>, <St2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPSi | ldnp s30, s5, [x11, #-184] // LDNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPDi | ldnp d3, d12, [x21] // LDNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPDi | ldnp d12, d5, [x7, #-424] // LDNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LDNPQi | ldnp q0, q14, [x24] // LDNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Load vector pair, immed offset, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LDNPQi | ldnp q4, q1, [x27, #80] // LDNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDNPWi | ldnp w4, w20, [x25] // LDNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDNPWi | ldnp w30, w4, [x21, #-196] // LDNP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDNPXi | ldnp x7, x30, [x18] // LDNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDNPXi | ldnp x5, x19, [x1, #-240] // LDNP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1B_ZRI | ldnt1b { z9.b }, p2/z, [x21] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1B_ZRI | ldnt1b { z30.b }, p5/z, [x30, #-3, mul vl] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1B_ZRR | ldnt1b { z10.b }, p5/z, [x12, x17] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1D_ZRI | ldnt1d { z27.d }, p2/z, [x12] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1D_ZRI | ldnt1d { z5.d }, p7/z, [x22, #6, mul vl] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1D_ZRR | ldnt1d { z28.d }, p2/z, [x14, x0, lsl #3] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1H_ZRI | ldnt1h { z11.h }, p0/z, [x21] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1H_ZRI | ldnt1h { z19.h }, p1/z, [x24, #-5, mul vl] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1H_ZRR | ldnt1h { z27.h }, p0/z, [x22, x24, lsl #1] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1W_ZRI | ldnt1w { z27.s }, p4/z, [x19] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1W_ZRI | ldnt1w { z15.s }, p0/z, [x22, #3, mul vl] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1W_ZRR | ldnt1w { z25.s }, p4/z, [x12, x21, lsl #2] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPSpost | ldp s19, s15, [x24], #-64 // LDP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPDpost | ldp d9, d1, [x20], #296 // LDP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LDPQpost | ldp q18, q24, [x11], #144 // LDP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Load vector pair, immed post-index, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPSpre | ldp s10, s30, [x0, #-4]! // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPDpre | ldp d26, d11, [x16, #-304]! // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 3 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LDPQpre | ldp q18, q12, [x25, #960]! // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Load vector pair, immed pre-index, Q-form \\ 3 6 6 1.50 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDPSi | ldp s12, s31, [x20, #-192] // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDPDi | ldp d26, d6, [x22, #-144] // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitL[2] | LDPQi | ldp q5, q19, [x9, #-448] // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 2 6 6 1.50 V1UnitL[2]
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDPWpost | ldp w10, w18, [x16], #-96 // LDP <Wt1>, <Wt2>, [<Xn|SP>], #<imm32> \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.50 | V1UnitI, V1UnitL[2] | LDPXpost | ldp x13, x16, [x11], #288 // LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm64> \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 3 4 4 1.50 V1UnitI, V1UnitL[2]
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDPWpre | ldp w7, w16, [x13, #-116]! // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>]! \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.50 | V1UnitI, V1UnitL[2] | LDPXpre | ldp x26, x3, [x14, #16]! // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>]! \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 3 4 4 1.50 V1UnitI, V1UnitL[2]
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDPWi | ldp w25, w23, [x22] // LDP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDPWi | ldp w3, w21, [x17, #40] // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDPXi | ldp x6, x25, [x17] // LDP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDPXi | ldp x9, x21, [x3, #104] // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 2 4 4 1.50 V1UnitL[2]
+# CHECK-NEXT: 3 | 5 | 5 | 2.00 | V1UnitI[2], V1UnitL | LDPSWpost | ldpsw x23, x26, [x30], #-160 // LDPSW <Xt1>, <Xt2>, [<Xn|SP>], #<imm> \\ Load pair, immed post-index or immed pre-index, signed words \\ 3 5 5 2.00 V1UnitI[2], V1UnitL
+# CHECK-NEXT: 3 | 5 | 5 | 2.00 | V1UnitI[2], V1UnitL | LDPSWpre | ldpsw x19, x28, [x21, #-248]! // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! \\ Load pair, immed post-index or immed pre-index, signed words \\ 3 5 5 2.00 V1UnitI[2], V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDPSWi | ldpsw x13, x20, [x15] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 3.00 V1UnitI, V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDPSWi | ldpsw x9, x27, [x8, #80] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 3.00 V1UnitI, V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRWpost | ldr w13, [x2], #-22 // LDR <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRXpost | ldr x6, [x9], #248 // LDR <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRWpre | ldr w20, [x10, #13]! // LDR <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRXpre | ldr x23, [x20, #-24]! // LDR <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWui | ldr w19, [x15, #11620] // LDR <Wt>, [<Xn|SP>, #<pimm32>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXui | ldr x2, [x13, #18528] // LDR <Xt>, [<Xn|SP>, #<pimm64>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRBpost | ldr b0, [x15], #-18 // LDR <Bt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRHpost | ldr h25, [x4], #-156 // LDR <Ht>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRSpost | ldr s28, [x6], #162 // LDR <St>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRDpost | ldr d23, [x8], #-176 // LDR <Dt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRQpost | ldr q5, [x18], #70 // LDR <Qt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRBpre | ldr b9, [x0, #-104]! // LDR <Bt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRHpre | ldr h24, [x10, #34]! // LDR <Ht>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRSpre | ldr s29, [x5, #168]! // LDR <St>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRDpre | ldr d22, [x9, #-1]! // LDR <Dt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRQpre | ldr q27, [x20, #-204]! // LDR <Qt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBui | ldr b23, [x0, #349] // LDR <Bt>, [<Xn|SP>, #<pimmb>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRHui | ldr h1, [x15, #3540] // LDR <Ht>, [<Xn|SP>, #<pimmh>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSui | ldr s14, [x7, #16208] // LDR <St>, [<Xn|SP>, #<pimms>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDui | ldr d4, [x17, #7368] // LDR <Dt>, [<Xn|SP>, #<pimmd>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQui | ldr q14, [x6, #4624] // LDR <Qt>, [<Xn|SP>, #<pimmq>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWl | ldr w15, test // LDR <Wt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXl | ldr x26, test // LDR <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSl | ldr s17, test // LDR <St>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDl | ldr d10, test // LDR <Dt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQl | ldr q22, test // LDR <Qt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitM | LDR_PXI | ldr p0, [x28] // LDR <Pt>, [<Xn|SP>] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitM | LDR_PXI | ldr p1, [x6, #-53, mul vl] // LDR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w30, [x10, x0] // LDR <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x13, [x4, x21] // LDR <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w25, [x18, w26, uxtw] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x20, [x29, w26, uxtw] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w26, [x12, w0, uxtw #2] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x13, [x2, w10, uxtw #3] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w13, [x18, w19, sxtw] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x5, [x26, w12, sxtw] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w16, [x9, w24, sxtw #2] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x21, [x29, w4, sxtw #3] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w19, [x15, x1, sxtx] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x25, [x4, x20, sxtx] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w3, [x1, x17, sxtx #2] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x2, [x13, x26, sxtx #3] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w1, [x18, x17, lsl #2] // LDR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x22, [x17, x3, lsl #3] // LDR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroX | ldr b8, [x30, x10] // LDR <Bt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroW | ldr b25, [x21, w8, uxtw] // LDR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroW | ldr b7, [x9, w29, sxtw] // LDR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroX | ldr b31, [x17, x6, sxtx] // LDR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroX | ldr h11, [x13, x9] // LDR <Ht>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroW | ldr h6, [x4, w4, uxtw] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroW | ldr h28, [x3, w28, sxtw] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroX | ldr h3, [x15, x19, sxtx] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroW | ldr h24, [x27, w5, uxtw #1] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroW | ldr h22, [x28, w11, sxtw #1] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroX | ldr h3, [x18, x26, sxtx #1] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroX | ldr h8, [x23, x19, lsl #1] // LDR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s21, [x1, x29] // LDR <St>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s12, [x30, w5, uxtw] // LDR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s15, [x2, w20, sxtw] // LDR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s11, [x25, x20, sxtx] // LDR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s9, [x24, w27, uxtw #2] // LDR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s7, [x2, w5, sxtw #2] // LDR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s13, [x19, x28, sxtx #2] // LDR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s21, [x10, x4, lsl #2] // LDR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d10, [x23, x10] // LDR <Dt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d24, [x26, w7, uxtw] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d28, [x12, w2, sxtw] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d0, [x7, x29, sxtx] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d24, [x9, w27, uxtw #3] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d5, [x17, w2, sxtw #3] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d2, [x5, x16, sxtx #3] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d2, [x29, x18, lsl #3] // LDR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroX | ldr q9, [x13, x16] // LDR <Qt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroW | ldr q16, [x16, w1, uxtw] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroW | ldr q1, [x17, w5, sxtw] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroX | ldr q1, [x8, x9, sxtx] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 2 7 7 3.00 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroW | ldr q23, [x26, w23, uxtw #4] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroW | ldr q3, [x18, w23, sxtw #4] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroX | ldr q2, [x28, x30, sxtx #4] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroX | ldr q21, [x23, x27, lsl #4] // LDR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDR_ZXI | ldr z26, [x4] // LDR <Zt>, [<Xn|SP>] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDR_ZXI | ldr z18, [x27, #16, mul vl] // LDR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRBBpost | ldrb w4, [x17], #0 // LDRB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRBBpre | ldrb w27, [x23, #114]! // LDRB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBui | ldrb w26, [x19] // LDRB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBui | ldrb w29, [x18, #3179] // LDRB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroX | ldrb w16, [x25, x9] // LDRB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroW | ldrb w9, [x15, w19, uxtw] // LDRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroW | ldrb w25, [x7, w0, sxtw] // LDRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroX | ldrb w0, [x18, x21, sxtx] // LDRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRHHpost | ldrh w9, [x1], #-2 // LDRH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRHHpre | ldrh w12, [x29, #-41]! // LDRH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHui | ldrh w28, [x3] // LDRH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHui | ldrh w27, [x19, #3156] // LDRH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroX | ldrh w20, [x25, x15] // LDRH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroW | ldrh w22, [x0, w24, uxtw] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroW | ldrh w6, [x17, w18, sxtw] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroX | ldrh w21, [x13, x30, sxtx] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroW | ldrh w14, [x21, w21, uxtw #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroW | ldrh w0, [x29, w13, sxtw #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroX | ldrh w11, [x20, x0, sxtx #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroX | ldrh w12, [x17, x27, lsl #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBWpost | ldrsb w12, [x13], #-250 // LDRSB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBXpost | ldrsb x10, [x2], #-229 // LDRSB <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBWpre | ldrsb w5, [x2, #-169]! // LDRSB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBXpre | ldrsb x28, [x12, #-46]! // LDRSB <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWui | ldrsb w5, [x26] // LDRSB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWui | ldrsb w24, [x0, #3862] // LDRSB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXui | ldrsb x6, [x0] // LDRSB <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXui | ldrsb x20, [x0, #653] // LDRSB <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroX | ldrsb w30, [x22, x21] // LDRSB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroW | ldrsb w24, [x2, w14, uxtw] // LDRSB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroW | ldrsb w7, [x1, w8, sxtw] // LDRSB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroX | ldrsb w4, [x8, x25, sxtx] // LDRSB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroX | ldrsb x12, [x28, x27] // LDRSB <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroW | ldrsb x10, [x5, w9, uxtw] // LDRSB <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroW | ldrsb x19, [x23, w24, sxtw] // LDRSB <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroX | ldrsb x20, [x10, x13, sxtx] // LDRSB <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHWpost | ldrsh w5, [x0], #-115 // LDRSH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHXpost | ldrsh x30, [x18], #-50 // LDRSH <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHWpre | ldrsh w27, [x15, #-45]! // LDRSH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHXpre | ldrsh x14, [x24, #27]! // LDRSH <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWui | ldrsh w18, [x13] // LDRSH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWui | ldrsh w11, [x27, #4094] // LDRSH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXui | ldrsh x19, [x26] // LDRSH <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXui | ldrsh x19, [x9, #6652] // LDRSH <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroX | ldrsh w18, [x30, x24] // LDRSH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroW | ldrsh w13, [x25, w7, uxtw] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroW | ldrsh w3, [x16, w28, sxtw] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroX | ldrsh w0, [x13, x14, sxtx] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroW | ldrsh w0, [x5, w21, uxtw #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroW | ldrsh w26, [x6, w29, sxtw #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroX | ldrsh w22, [x26, x15, sxtx #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroX | ldrsh w26, [x20, x21, lsl #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroX | ldrsh x4, [x9, x24] // LDRSH <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroW | ldrsh x25, [x8, w13, uxtw] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroW | ldrsh x25, [x20, w10, sxtw] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroX | ldrsh x6, [x13, x10, sxtx] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroW | ldrsh x15, [x0, w28, uxtw #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroW | ldrsh x19, [x9, w15, sxtw #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroX | ldrsh x1, [x17, x26, sxtx #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroX | ldrsh x7, [x29, x17, lsl #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 1 4 4 3.00 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSWpost | ldrsw x4, [x21], #-93 // LDRSW <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSWpre | ldrsw x6, [x28, #96]! // LDRSW <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWui | ldrsw x1, [x23] // LDRSW <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWui | ldrsw x6, [x19, #4552] // LDRSW <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWl | ldrsw x20, test // LDRSW <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x21, [x25, x7] // LDRSW <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x12, [x28, w12, uxtw] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x22, [x26, w21, sxtw] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x0, [x21, x19, sxtx] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x23, [x17, w19, uxtw #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x23, [x30, w11, sxtw #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x29, [x12, x5, sxtx #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x3, [x1, x17, lsl #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRWi | ldtr w12, [x9] // LDTR <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRWi | ldtr w9, [x3, #-55] // LDTR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRXi | ldtr x9, [x9] // LDTR <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRXi | ldtr x25, [x1, #103] // LDTR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRBi | ldtrb w27, [x7] // LDTRB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRBi | ldtrb w8, [x1, #-90] // LDTRB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRHi | ldtrh w13, [x21] // LDTRH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRHi | ldtrh w10, [x15, #-67] // LDTRH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBWi | ldtrsb w15, [x19] // LDTRSB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBWi | ldtrsb w28, [x19, #-202] // LDTRSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBXi | ldtrsb x17, [x6] // LDTRSB <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBXi | ldtrsb x0, [x11, #180] // LDTRSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHWi | ldtrsh w19, [x26] // LDTRSH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHWi | ldtrsh w16, [x28, #-233] // LDTRSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHXi | ldtrsh x26, [x22] // LDTRSH <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHXi | ldtrsh x27, [x19, #-76] // LDTRSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSWi | ldtrsw x23, [x28] // LDTRSW <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSWi | ldtrsw x26, [x21, #45] // LDTRSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURBi | ldur b24, [x3] // LDUR <Bt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURBi | ldur b9, [x25, #240] // LDUR <Bt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURHi | ldur h29, [x21] // LDUR <Ht>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURHi | ldur h5, [x23, #-5] // LDUR <Ht>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURSi | ldur s12, [x14] // LDUR <St>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURSi | ldur s22, [x10, #108] // LDUR <St>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURDi | ldur d16, [x14] // LDUR <Dt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURDi | ldur d22, [x24, #-198] // LDUR <Dt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURQi | ldur q25, [x9] // LDUR <Qt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURQi | ldur q5, [x24, #233] // LDUR <Qt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURWi | ldur w19, [x30] // LDUR <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURWi | ldur w24, [x12, #202] // LDUR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURXi | ldur x0, [x3] // LDUR <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURXi | ldur x14, [x14, #17] // LDUR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURBBi | ldurb w9, [x24] // LDURB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURBBi | ldurb w12, [x5, #92] // LDURB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURHHi | ldurh w27, [x14] // LDURH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURHHi | ldurh w13, [x30, #-173] // LDURH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBWi | ldursb w5, [x8] // LDURSB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBWi | ldursb w21, [x10, #172] // LDURSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBXi | ldursb x19, [x15] // LDURSB <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBXi | ldursb x16, [x11, #-173] // LDURSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHWi | ldursh w21, [x12] // LDURSH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHWi | ldursh w16, [x18, #203] // LDURSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHXi | ldursh x4, [x28] // LDURSH <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHXi | ldursh x5, [x3, #-133] // LDURSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSWi | ldursw x21, [x7] // LDURSW <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSWi | ldursw x11, [x16, #169] // LDURSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPW | ldxp w23, w14, [x17] // LDXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPW | ldxp w2, w8, [x21] // LDXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPX | ldxp x5, x6, [x30] // LDXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPX | ldxp x10, x26, [x6] // LDXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRW | ldxr w4, [x9] // LDXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRW | ldxr w7, [x3] // LDXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRX | ldxr x6, [x27] // LDXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRX | ldxr x3, [x4] // LDXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRB | ldxrb w17, [x21] // LDXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRB | ldxrb w14, [x3] // LDXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRH | ldxrh w14, [x1] // LDXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRH | ldxrh w24, [x11] // LDXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsl w25, w0, #22 // LSL <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsl x27, x7, #56 // LSL <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_B | lsl z1.b, p1/m, z1.b, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_H | lsl z7.h, p3/m, z7.h, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_S | lsl z10.s, p3/m, z10.s, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_D | lsl z21.d, p7/m, z21.d, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_B | lsl z13.b, z4.b, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_H | lsl z11.h, z16.h, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_S | lsl z16.s, z11.s, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_D | lsl z18.d, z4.d, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVWr | lsl w4, w9, w12 // LSL <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVXr | lsl x7, x29, x22 // LSL <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmZ_D | lsl z3.d, p2/m, z3.d, z15.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_WIDE_ZPmZ_S | lsl z3.s, p6/m, z3.s, z8.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_WIDE_ZZZ_S | lsl z19.s, z25.s, z25.d // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSLR_ZPmZ_H | lslr z3.h, p5/m, z3.h, z23.h // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVWr | lsl w6, w8, w2 // LSLV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVXr | lsl x7, x26, x21 // LSLV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsr w0, w0, #30 // LSR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsr x23, x24, #23 // LSR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_B | lsr z21.b, p5/m, z21.b, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_H | lsr z1.h, p4/m, z1.h, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_S | lsr z24.s, p7/m, z24.s, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_D | lsr z13.d, p3/m, z13.d, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_B | lsr z3.b, z11.b, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_H | lsr z5.h, z12.h, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_S | lsr z21.s, z16.s, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_D | lsr z21.d, z15.d, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVWr | lsr w17, w20, w15 // LSR <Wd>, <Wn>, <Wm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVXr | lsr x24, x4, x20 // LSR <Xd>, <Xn>, <Xm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmZ_D | lsr z30.d, p3/m, z30.d, z28.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_WIDE_ZPmZ_H | lsr z18.h, p3/m, z18.h, z29.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_WIDE_ZZZ_H | lsr z7.h, z30.h, z11.d // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSRR_ZPmZ_B | lsrr z14.b, p1/m, z14.b, z16.b // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVWr | lsr w0, w28, w19 // LSRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVXr | lsr x16, x22, x19 // LSRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_B | mad z17.b, p7/m, z4.b, z5.b // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_H | mad z29.h, p4/m, z31.h, z18.h // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_S | mad z7.s, p4/m, z5.s, z29.s // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MAD_ZPmZZ_D | mad z28.d, p7/m, z18.d, z2.d // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MADDWrrr | madd w15, w9, w9, w29 // MADD <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MADDXrrr | madd x29, x22, x21, x21 // MADD <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv8i16_indexed | mla v15.8h, v22.8h, v4.h[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv2i32_indexed | mla v28.2s, v10.2s, v2.s[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv4i32 | mla v31.4s, v18.4s, v27.4s // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_B | mla z1.b, p0/m, z3.b, z3.b // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_H | mla z21.h, p2/m, z31.h, z30.h // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_S | mla z24.s, p3/m, z11.s, z9.s // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MLA_ZPmZZ_D | mla z2.d, p0/m, z12.d, z5.d // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv8i16_indexed | mls v25.8h, v29.8h, v0.h[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv2i32_indexed | mls v22.2s, v29.2s, v0.s[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv4i32 | mls v26.4s, v5.4s, v28.4s // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_B | mls z11.b, p1/m, z28.b, z6.b // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_H | mls z31.h, p0/m, z25.h, z24.h // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_S | mls z1.s, p5/m, z7.s, z13.s // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MLS_ZPmZZ_D | mls z2.d, p1/m, z17.d, z10.d // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MSUBWrrr | mneg w14, w30, w30 // MNEG <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MSUBXrrr | mneg x21, x3, x9 // MNEG <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmV_S | mov z9.s, p2/m, s10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_B | mov z17.b, z29.b[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_H | mov z26.h, z7.h[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z14.s, z21.s[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_D | mov z22.d, z14.d[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z21.s, s25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWri | orr wsp, wzr, #0xe00 // MOV <Wd|WSP>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x3, #7680 // MOV <Xd|SP>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi8lane | mov v30.b[12], v17.b[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi16lane | mov v10.h[3], v17.h[5] // MOV <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi32lane | mov v19.s[2], v2.s[1] // MOV <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi64lane | mov v21.d[1], v16.d[0] // MOV <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi8gpr | mov v5.b[12], w23 // MOV <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi16gpr | mov v27.h[6], w6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi32gpr | mov v21.s[0], w21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi64gpr | mov v13.d[0], x10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_B | mov z30.b, p7/m, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_D | mov z30.d, p7/m, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_H | mov z10.h, p5/m, #72 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_B | mov z19.b, p6/z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z6.d, p1/z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z12.d, p7/z, #10240 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z30.b, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z2.h, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z20.h, #20992 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w24, #3584 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x15, #3584 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SEL_PPPP | mov p0.b, p0/m, p6.b // MOV <Pd>.B, <Pg>/M, <Pn>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | AND_PPzPP | mov p3.b, p7/z, p2.b // MOV <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | mov w21, w11 // MOV <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | mov x14, x0 // MOV <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi8 | mov b15, v21.b[8] // MOV B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi16 | mov h13, v17.h[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi32 | mov s7, v11.s[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi64 | mov d27, v24.d[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_D | mov z12.d, p5/m, x24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_D | mov z31.d, p6/m, sp // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_B | mov z19.b, w27 // MOV <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_H | mov z17.h, wsp // MOV <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi32 | mov w13, v12.s[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi64_idx0 | mov x30, v18.d[0] // MOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | mov wsp, wsp // MOV <Wd|WSP>, <Wn|WSP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | mov x1, x11 // MOV <Xd|SP>, <Xn|SP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv16i8 | mov v12.16b, v6.16b // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SEL_ZPZZ_D | mov z1.d, p3/m, z6.d // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZZZ | mov z24.d, z25.d // MOV <Zd>.D, <Zn>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w30, #3584 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x4, #3584 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z14.b, #112 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z8.h, #96 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_S | mov z2.s, #2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_D | mov z6.d, #4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORR_PPzPP | mov p2.b, p5.b // MOV <Pd>.B, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv16b_ns | movi v7.16b, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv8i16 | movi v14.8h, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv4i16 | movi v13.4h, #74, lsl #8 // MOVI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2i32 | movi v19.2s, #226 // MOVI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2i32 | movi v0.2s, #137, lsl #24 // MOVI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv4s_msl | movi v1.4s, #122, msl #8 // MOVI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVID | movi d16, #0000000000000000 // MOVI <Dd>, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2d_ns | movi v13.2d, #0xff00ff00ff00ff00 // MOVI <Vd>.2D, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKWi | movk w8, #57951 // MOVK <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKWi | movk w6, #34540 // MOVK <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKXi | movk x1, #56641 // MOVK <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKXi | movk x23, #3111, lsl #48 // MOVK <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNWi | mov w16, #-52527 // MOVN <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNWi | mov w27, #-47743 // MOVN <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNXi | mov x10, #-63432 // MOVN <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNXi | mov x0, #2116973299840843775 // MOVN <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | MOVPRFX_ZPmZ_B | movprfx z22.b, p0/m, z4.b // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_B | mla z22.b, p0/m, z19.b, z25.b // Ignore
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | MOVPRFX_ZZ | movprfx z3, z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZPmZZ_D | fmla z3.d, p0/m, z8.d, z19.d // Ignore
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ANDS_PPzPP | movs p0.b, p7/z, p3.b // MOVS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORRS_PPzPP | movs p4.b, p0.b // MOVS <Pd>.B, <Pn>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w3, #9629 // MOVZ <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w23, #710082560 // MOVZ <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x0, #22630 // MOVZ <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x11, #5760103923406864384 // MOVZ <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MRS | mrs x4, ACTLR_EL1 // MRS <Xt>, <systemreg> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MRS | mrs x14, S2_4_C0_C5_4 // MRS <Xt>, S<op0>_<op1>_<Cn>_<Cm>_<op2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_B | msb z18.b, p1/m, z27.b, z0.b // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_H | msb z27.h, p5/m, z23.h, z1.h // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_S | msb z26.s, p2/m, z0.s, z2.s // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MSB_ZPmZZ_D | msb z1.d, p6/m, z12.d, z12.d // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSRpstateImm4 | msr DAIFSet, #0 // MSR <pstatefield1>, #<imm1> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSRpstateImm4 | msr SPSel, #0 // MSR <pstatefield2>, #<imm2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSR | msr ACTLR_EL3, x18 // MSR <systemreg>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSR | msr S3_6_C8_C12_1, x23 // MSR S<op0>_<op1>_<Cn>_<Cm>_<op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MSUBWrrr | msub w6, w26, w13, w13 // MSUB <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MSUBXrrr | msub x14, x28, x9, x3 // MSUB <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv4i16_indexed | mul v26.4h, v20.4h, v14.h[5] // MUL <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv8i16_indexed | mul v5.8h, v21.8h, v3.h[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv2i32_indexed | mul v29.2s, v10.2s, v3.s[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv4i32_indexed | mul v30.4s, v11.4s, v4.s[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_B | mul z16.b, z16.b, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_H | mul z9.h, z9.h, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_S | mul z23.s, z23.s, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MUL_ZI_D | mul z15.d, z15.d, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv8i16 | mul v3.8h, v9.8h, v8.8h // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_B | mul z17.b, p6/m, z17.b, z9.b // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_H | mul z18.h, p7/m, z18.h, z15.h // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_S | mul z29.s, p6/m, z29.s, z8.s // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MUL_ZPmZ_D | mul z25.d, p1/m, z25.d, z25.d // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MADDWrrr | mul w8, w13, w20 // MUL <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MADDXrrr | mul x12, x8, x25 // MUL <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | mvn w0, w18 // MVN <Wd>, <Wm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | mvn w25, w27, asr #6 // MVN <Wd>, <Wm>, <shift> #<wamount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | mvn x1, x21 // MVN <Xd>, <Xm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | mvn x9, x23, asr #39 // MVN <Xd>, <Xm>, <shift> #<amount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NOTv16i8 | mvn v16.16b, v24.16b // MVN <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4i16 | mvni v9.4h, #237 // MVNI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv8i16 | mvni v8.8h, #171, lsl #8 // MVNI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv2i32 | mvni v7.2s, #81 // MVNI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4i32 | mvni v22.4s, #15, lsl #8 // MVNI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4s_msl | mvni v12.4s, #141, msl #8 // MVNI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | NAND_PPzPP | nand p5.b, p4/z, p5.b, p5.b // NAND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | NANDS_PPzPP | nands p6.b, p3/z, p4.b, p5.b // NANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrs | neg w25, w20, lsl #4 // NEG <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | neg w0, w29, lsl #9 // NEG <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | neg w7, w28, asr #24 // NEG <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | neg x29, x11, lsl #3 // NEG <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | neg x24, x10, lsl #54 // NEG <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | neg x0, x16, lsr #2 // NEG <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NEGv1i64 | neg d18, d20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NEGv2i64 | neg v16.2d, v14.2d // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | NEG_ZPmZ_B | neg z16.b, p2/m, z15.b // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | negs w30, w22, lsl #2 // NEGS <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | negs w8, w8, lsl #15 // NEGS <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | negs w12, w21, asr #15 // NEGS <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | negs x24, x23, lsl #1 // NEGS <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | negs x20, x13, lsl #20 // NEGS <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | negs x1, x22, lsr #30 // NEGS <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCWr | ngc w11, w9 // NGC <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCXr | ngc x30, x4 // NGC <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSWr | ngcs w13, w22 // NGCS <Wd>, <Wm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSXr | ngcs x15, x1 // NGCS <Xd>, <Xm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | nop // NOP \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | NOR_PPzPP | nor p4.b, p4/z, p0.b, p4.b // NOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | NORS_PPzPP | nors p1.b, p0/z, p7.b, p6.b // NORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | EOR_PPzPP | not p7.b, p2/z, p6.b // NOT <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | NOT_ZPmZ_S | not z29.s, p4/m, z9.s // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NOTv8i8 | mvn v15.8b, v29.8b // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | EORS_PPzPP | nots p7.b, p3/z, p1.b // NOTS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z5.b, z5.b, #0x8f // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z14.h, z14.h, #0xff9f // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z14.s, z14.s, #0xfffffffd // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z27.d, z27.d, #0xfffffffffffffffb // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORN_PPzPP | orn p1.b, p2/z, p3.b, p5.b // ORN <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | orn w2, w27, w7 // ORN <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | orn w6, w28, w14, lsl #19 // ORN <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | orn x22, x12, x3 // ORN <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | orn x19, x17, x0, lsl #58 // ORN <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORNv8i8 | orn v29.8b, v19.8b, v16.8b // ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORNS_PPzPP | orns p3.b, p3/z, p0.b, p3.b // ORNS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWri | orr wsp, w27, #0xe00 // ORR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXri | orr x27, x6, #0x1e00 // ORR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z4.b, z4.b, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z26.h, z26.h, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z3.s, z3.s, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z30.d, z30.d, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORR_PPzPP | orr p6.b, p4/z, p4.b, p3.b // ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | orr w14, w1, w23 // ORR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | orr w25, w22, w0, asr #20 // ORR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | orr x11, x6, x13 // ORR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | orr x26, x26, x7, lsl #62 // ORR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i16 | orr v9.4h, #18 // ORR <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv8i16 | orr v20.8h, #175 // ORR <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i32 | orr v4.4s, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i32 | orr v17.4s, #119, lsl #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv16i8 | orr v12.16b, v9.16b, v1.16b // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZPmZ_H | orr z28.h, p3/m, z28.h, z7.h // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZZZ | orr z8.d, z14.d, z19.d // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORRS_PPzPP | orrs p7.b, p7/z, p6.b, p5.b // ORRS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 2 2 2 0.50 V1UnitM0[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | ORV_VPZ_D | orv d19, p6, z31.d // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 4 12 12 0.50 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PFALSE | pfalse p6.b // PFALSE <Pd>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PFIRST_B | pfirst p0.b, p5, p0.b // PFIRST <Pdn>.B, <Pg>, <Pdn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | PMULv8i8 | pmul v30.8b, v0.8b, v27.8b // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | PMULv16i8 | pmul v7.16b, v20.16b, v18.16b // PMUL <Vd>.16B, <Vn>.16B, <Vm>.16B \\ ASIMD multiply/multiply long (8x8) polynomial, Q-form \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PNEXT_S | pnext p5.s, p5, p5.s // PNEXT <Pdn>.<T>, <Pv>, <Pdn>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb #14, p5, [x21] // PRFB #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb #14, p3, [x28, #-24, mul vl] // PRFB #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb pstl1strm, p7, [x5] // PRFB <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb pldl2keep, p1, [x12, #11, mul vl] // PRFB <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRR | prfb pldl1keep, p7, [x4, x9] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Xm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_UXTW_SCALED | prfb pldl3strm, p4, [x3, z15.s, uxtw] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_UXTW_SCALED | prfb pldl1strm, p7, [x28, z4.d, uxtw] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_SCALED | prfb pstl3keep, p2, [x18, z19.d] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl3keep, p1, [z28.s] // PRFB #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl1keep, p0, [z22.s, #21] // PRFB #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl1strm, p2, [z25.s] // PRFB <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl2strm, p1, [z31.s, #18] // PRFB <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pstl2strm, p5, [z25.d] // PRFB #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pldl2keep, p2, [z4.d, #10] // PRFB #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pstl2keep, p5, [z5.d] // PRFB <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pldl1keep, p1, [z31.d, #17] // PRFB <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pstl3strm, p3, [x21] // PRFD #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pldl3keep, p5, [x3, #-7, mul vl] // PRFD #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pstl3keep, p0, [x29] // PRFD <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pldl1strm, p3, [x15, #-16, mul vl] // PRFD <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRR | prfd pstl2keep, p3, [x24, x24, lsl #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_SXTW_SCALED | prfd pstl1strm, p3, [x27, z27.s, sxtw #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_UXTW_SCALED | prfd pstl1keep, p0, [x21, z2.d, uxtw #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_SCALED | prfd pldl1strm, p7, [x22, z22.d, lsl #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pldl2strm, p1, [z2.s] // PRFD #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pstl1keep, p7, [z10.s, #72] // PRFD #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pstl1keep, p3, [z19.s] // PRFD <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pstl2strm, p4, [z26.s, #248] // PRFD <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd #15, p1, [z17.d] // PRFD #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd pldl2strm, p0, [z6.d, #24] // PRFD #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd pstl1keep, p3, [z31.d] // PRFD <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd pstl1strm, p7, [z10.d, #40] // PRFD <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh pldl2strm, p3, [x17] // PRFH #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh #6, p3, [x6, #19, mul vl] // PRFH #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh pldl3keep, p6, [x2] // PRFH <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh pldl2keep, p6, [x18, #-4, mul vl] // PRFH <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRR | prfh pstl2keep, p1, [x28, x9, lsl #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_S_UXTW_SCALED | prfh pldl1strm, p6, [x0, z10.s, uxtw #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_UXTW_SCALED | prfh pldl3keep, p7, [x24, z21.d, uxtw #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_SCALED | prfh pstl1strm, p5, [x10, z6.d, lsl #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_S_PZI | prfh pldl3strm, p6, [z0.s] // PRFH <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_S_PZI | prfh pstl3strm, p0, [z30.s, #12] // PRFH <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_PZI | prfh pstl2keep, p2, [z21.d] // PRFH <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_PZI | prfh pstl2keep, p1, [z8.d, #14] // PRFH <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm pldl1strm, [x5] // PRFM <prfop>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm pstl3keep, [x19, #10160] // PRFM <prfop>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm #25, [x28] // PRFM #<imm5>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm #7, [x15, #6776] // PRFM #<imm5>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMl | prfm pldl3strm, test // PRFM <prfop>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMl | prfm pldl1keep, test // PRFM #<imm5>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pldl1keep, [x25, x16] // PRFM <prfop>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | rprfm #16, x18, [x1] // PRFM #<imm5>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl1keep, [x14, w8, uxtw] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm plil3keep, [x8, w5, uxtw] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl2keep, [x16, w16, sxtw] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm plil2strm, [x25, w11, sxtw] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl2strm, [x3, x24, sxtx] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | rprfm #49, x2, [x5] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl2keep, [x10, w29, uxtw #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pstl1strm, [x9, w27, uxtw #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl1keep, [x24, w0, sxtw #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl3keep, [x30, w25, sxtw #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl1strm, [x18, x20, sxtx #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl2strm, [x29, x25, sxtx #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl3keep, [x2, x5, lsl #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm plil1keep, [x22, x3, lsl #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum pstl1keep, [x7] // PRFUM <prfop>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum pldl2keep, [x7, #-37] // PRFUM <prfop>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum pstl3keep, [x21] // PRFUM #<imm5>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum #23, [x6, #-131] // PRFUM #<imm5>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw pldl2strm, p2, [x4] // PRFW #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw #6, p4, [x7, #6, mul vl] // PRFW #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw pldl3keep, p3, [x2] // PRFW <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw pstl1keep, p7, [x2, #-31, mul vl] // PRFW <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRR | prfw pstl1keep, p4, [x18, x21, lsl #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_UXTW_SCALED | prfw pldl2strm, p0, [x15, z6.s, uxtw #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_SXTW_SCALED | prfw pstl2keep, p0, [x27, z18.d, sxtw #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_SCALED | prfw pstl2keep, p3, [x19, z8.d, lsl #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw #7, p7, [z27.s] // PRFW #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw pstl1strm, p5, [z16.s, #72] // PRFW #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw pldl3keep, p4, [z2.s] // PRFW <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw pstl3keep, p2, [z0.s, #40] // PRFW <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw #7, p1, [z20.d] // PRFW #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw #7, p2, [z10.d, #108] // PRFW #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw pstl1keep, p6, [z12.d] // PRFW <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw pstl2strm, p0, [z18.d, #60] // PRFW <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | psb csync // PSB CSYNC \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DSB | pssbb // PSSBB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTEST_PP | ptest p0, p5.b // PTEST <Pg>, <Pn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_B | ptrue p2.b // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_D | ptrue p3.d, pow2 // PTRUE <Pd>.<T>, <pattern> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_H | ptrue p0.h // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_S | ptrue p4.s, #21 // PTRUE <Pd>.<T>, #<uimm5> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_H | ptrues p3.h // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_D | ptrues p3.d, vl32 // PTRUES <Pd>.<T>, <pattern> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_B | ptrues p0.b // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_D | ptrues p2.d, vl128 // PTRUES <Pd>.<T>, #<uimm5> \\ Predicate set/initialize, set flags \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PUNPKHI_PP | punpkhi p4.h, p4.b // PUNPKHI <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PUNPKLO_PP | punpklo p1.h, p4.b // PUNPKLO <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RADDHNv2i64_v2i32 | raddhn v17.2s, v22.2d, v5.2d // RADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RADDHNv2i64_v4i32 | raddhn2 v21.4s, v11.2d, v1.2d // RADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RBITv16i8 | rbit v16.16b, v21.16b // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RBITWr | rbit w27, w10 // RBIT <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RBITXr | rbit x30, x0 // RBIT <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | RBIT_ZPmZ_S | rbit z23.s, p3/m, z10.s // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | RDFFR_PPz | rdffr p2.b, p1/z // RDFFR <Pd>.B, <Pg>/Z \\ Read first fault register, predicated \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | RDFFR_P | rdffr p5.b // RDFFR <Pd>.B \\ Read first fault register, unpredicated \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitI, V1UnitM | RDFFRS_PPz | rdffrs p7.b, p2/z // RDFFRS <Pd>.B, <Pg>/Z \\ Read first fault register and set flags \\ 1 4 4 2.00 V1UnitI, V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | RDVLI_XI | rdvl x20, #-20 // RDVL <Xd>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | RET | ret // RET \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | RET | ret x14 // RET {<Xn>} \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | REV_PP_H | rev p1.h, p2.h // REV <Pd>.<T>, <Pn>.<T> \\ Predicate reverse \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REV_ZZ_D | rev z11.d, z24.d // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVWr | rev w19, w20 // REV <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVXr | rev x30, x15 // REV <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV16v16i8 | rev16 v5.16b, v26.16b // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV16Wr | rev16 w1, w25 // REV16 <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV16Xr | rev16 x27, x11 // REV16 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV32v8i16 | rev32 v22.8h, v4.8h // REV32 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV32Xr | rev32 x30, x6 // REV32 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVXr | rev x5, x2 // REV64 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV64v2i32 | rev64 v0.2s, v19.2s // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVB_ZPmZ_D | revb z3.d, p2/m, z21.d // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVH_ZPmZ_D | revh z1.d, p5/m, z19.d // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVW_ZPmZ_D | revw z16.d, p1/m, z3.d // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRWrri | ror w20, w13, #21 // ROR <Wd>, <Ws>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRXrri | ror x5, x8, #7 // ROR <Xd>, <Xs>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVWr | ror w29, w26, w0 // ROR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVXr | ror x4, x13, x3 // ROR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVWr | ror w26, w0, w28 // RORV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVXr | ror x21, x29, x17 // RORV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv8i8_shift | rshrn v24.8b, v0.8h, #4 // RSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv4i16_shift | rshrn v8.4h, v24.4s, #16 // RSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv2i32_shift | rshrn v12.2s, v12.2d, #28 // RSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv16i8_shift | rshrn2 v1.16b, v16.8h, #6 // RSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv8i16_shift | rshrn2 v1.8h, v28.4s, #3 // RSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv4i32_shift | rshrn2 v20.4s, v19.2d, #14 // RSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RSUBHNv8i16_v8i8 | rsubhn v3.8b, v9.8h, v16.8h // RSUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RSUBHNv2i64_v4i32 | rsubhn2 v31.4s, v12.2d, v15.2d // RSUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABAv16i8 | saba v8.16b, v27.16b, v13.16b // SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABALv2i32_v2i64 | sabal v2.2d, v5.2s, v31.2s // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABALv4i32_v2i64 | sabal2 v21.2d, v15.4s, v13.4s // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDv2i32 | sabd v12.2s, v11.2s, v27.2s // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SABD_ZPmZ_S | sabd z14.s, p1/m, z14.s, z23.s // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDLv2i32_v2i64 | sabdl v28.2d, v4.2s, v19.2s // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDLv16i8_v8i16 | sabdl2 v10.8h, v30.16b, v4.16b // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SADALPv8i8_v4i16 | sadalp v3.4h, v5.8b // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLv8i8_v8i16 | saddl v7.8h, v3.8b, v23.8b // SADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLv8i16_v4i32 | saddl2 v21.4s, v5.8h, v10.8h // SADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLPv16i8_v8i16 | saddlp v13.8h, v29.16b // SADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | SADDLVv8i8v | saddlv h18, v28.8b // SADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SADDLVv16i8v | saddlv h30, v4.16b // SADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv4i16v | saddlv s24, v29.4h // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | SADDLVv8i16v | saddlv s22, v23.8h // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv4i32v | saddlv d2, v27.4s // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 5 | 14 | 14 | 0.50 | V1UnitV[5], V1UnitV0, V1UnitV1[2], V1UnitV01[3], V1UnitV02, V1UnitV13[3] | SADDV_VPZ_B | saddv d19, p6, z1.b // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | SADDV_VPZ_H | saddv d7, p2, z14.h // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 10 | 10 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | SADDV_VPZ_S | saddv d4, p7, z27.s // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDWv4i16_v4i32 | saddw v8.4s, v0.4s, v1.4h // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDWv16i8_v8i16 | saddw2 v24.8h, v10.8h, v30.16b // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCWr | sbc w0, w16, w1 // SBC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCXr | sbc x19, x3, x9 // SBC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSWr | sbcs w26, w28, w0 // SBCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSXr | sbcs x8, x26, x29 // SBCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sbfiz w14, w5, #21, #8 // SBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sbfiz x14, x1, #56, #2 // SBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sbfiz w24, w11, #5, #20 // SBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sbfx x14, x1, #36, #20 // SBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | asr w16, w16, #31 // SBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sbfx x14, x28, #53, #8 // SBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSWHri | scvtf h18, w17, #30 // SCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSWSri | scvtf s14, w9, #19 // SCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSWDri | scvtf d16, w3, #13 // SCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSXHri | scvtf h28, x25, #23 // SCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSXSri | scvtf s27, x19, #5 // SCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSXDri | scvtf d15, x22, #32 // SCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUWHri | scvtf h22, w7 // SCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUWSri | scvtf s22, w10 // SCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUWDri | scvtf d23, w6 // SCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXHri | scvtf h21, x12 // SCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXSri | scvtf s25, x28 // SCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXDri | scvtf d12, x0 // SCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFh | scvtf h4, h8, #9 // SCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFs | scvtf s29, s12, #1 // SCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFd | scvtf d1, d12, #26 // SCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4i16_shift | scvtf v25.4h, v13.4h, #8 // SCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv8i16_shift | scvtf v4.8h, v8.8h, #10 // SCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2i32_shift | scvtf v5.2s, v2.2s, #26 // SCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4i32_shift | scvtf v2.4s, v24.4s, #10 // SCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2i64_shift | scvtf v11.2d, v2.2d, #42 // SCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv1i16 | scvtf h5, h14 // SCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv1i32 | scvtf s5, s16 // SCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i64 | scvtf d12, d11 // SCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4f16 | scvtf v22.4h, v10.4h // SCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv8f16 | scvtf v16.8h, v13.8h // SCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2f32 | scvtf v9.2s, v31.2s // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4f32 | scvtf v2.4s, v7.4s // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2f64 | scvtf v18.2d, v11.2d // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | SCVTF_ZPmZ_HtoH | scvtf z3.h, p3/m, z29.h // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 4 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoH | scvtf z1.h, p5/m, z27.s // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoS | scvtf z30.s, p4/m, z29.s // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_StoD | scvtf z18.d, p3/m, z16.s // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoH | scvtf z18.h, p1/m, z14.d // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoS | scvtf z10.s, p1/m, z11.d // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoD | scvtf z3.d, p2/m, z27.d // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 12 | 12 | 0.20 | V1UnitI[5], V1UnitM[5], V1UnitM0[5] | SDIVWr | sdiv w6, w28, w24 // SDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[13]
+# CHECK-NEXT: 1 | 20 | 20 | 0.20 | V1UnitI[5], V1UnitM[5], V1UnitM0[5] | SDIVXr | sdiv x19, x2, x14 // SDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[21]
+# CHECK-NEXT: 1 | 12 | 12 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | SDIV_ZPmZ_S | sdiv z24.s, p1/m, z24.s, z14.s // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 20 | 20 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | SDIV_ZPmZ_D | sdiv z7.d, p6/m, z7.d, z20.d // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+# CHECK-NEXT: 1 | 12 | 12 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | SDIVR_ZPmZ_S | sdivr z10.s, p2/m, z10.s, z7.s // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 20 | 20 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | SDIVR_ZPmZ_D | sdivr z0.d, p3/m, z0.d, z9.d // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | SDOT_ZZZI_S | sdot z6.s, z29.b, z0.b[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SDOT_ZZZI_D | sdot z0.d, z18.h, z10.h[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | SDOT_ZZZ_S | sdot z28.s, z30.b, z14.b // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SDOT_ZZZ_D | sdot z19.d, z5.h, z8.h // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SDOTlanev16i8 | sdot v2.4s, v27.16b, v5.4b[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SDOTv8i8 | sdot v3.2s, v20.8b, v10.8b // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SEL_PPPP | sel p1.b, p7, p5.b, p4.b // SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SEL_ZPZZ_H | sel z0.h, p7, z13.h, z13.h // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SETFFR | setffr // SETFFR \\ Set first fault register \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | sev // SEV \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | sevl // SEVL \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHADDv16i8 | shadd v25.16b, v1.16b, v10.16b // SHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHLd | shl d17, d3, #16 // SHL <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv8i8_shift | shl v23.8b, v18.8b, #6 // SHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv8i16_shift | shl v0.8h, v23.8h, #10 // SHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv4i32_shift | shl v0.4s, v18.4s, #30 // SHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv2i64_shift | shl v20.2d, v28.2d, #40 // SHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv8i8 | shll v3.8h, v13.8b, #8 // SHLL <Vd>.8H, <Vn>.8B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv4i16 | shll v26.4s, v18.4h, #16 // SHLL <Vd>.4S, <Vn>.4H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv2i32 | shll v4.2d, v25.2s, #32 // SHLL <Vd>.2D, <Vn>.2S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv16i8 | shll2 v12.8h, v28.16b, #8 // SHLL2 <Vd>.8H, <Vn>.16B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv8i16 | shll2 v11.4s, v22.8h, #16 // SHLL2 <Vd>.4S, <Vn>.8H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv4i32 | shll2 v2.2d, v29.4s, #32 // SHLL2 <Vd>.2D, <Vn>.4S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv8i8_shift | shrn v27.8b, v23.8h, #3 // SHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv4i16_shift | shrn v17.4h, v1.4s, #13 // SHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv2i32_shift | shrn v13.2s, v0.2d, #12 // SHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv16i8_shift | shrn2 v4.16b, v29.8h, #8 // SHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv8i16_shift | shrn2 v9.8h, v18.4s, #10 // SHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv4i32_shift | shrn2 v5.4s, v12.2d, #16 // SHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHSUBv8i16 | shsub v15.8h, v5.8h, v27.8h // SHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SLId | sli d7, d19, #53 // SLI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv16i8_shift | sli v16.16b, v26.16b, #7 // SLI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv4i16_shift | sli v14.4h, v10.4h, #15 // SLI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv2i32_shift | sli v29.2s, v14.2s, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv2i64_shift | sli v25.2d, v21.2d, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SMADDLrrr | smaddl x17, w27, w30, x3 // SMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMAX_ZI_S | smax z3.s, z3.s, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMAX_ZPmZ_B | smax z0.b, p5/m, z0.b, z20.b // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMAXv16i8 | smax v30.16b, v3.16b, v30.16b // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMAXPv8i16 | smaxp v21.8h, v16.8h, v7.8h // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | SMAXVv8i8v | smaxv b4, v30.8b // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SMAXVv16i8v | smaxv b15, v16.16b // SMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv4i16v | smaxv h28, v14.4h // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | SMAXVv8i16v | smaxv h6, v19.8h // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv4i32v | smaxv s3, v14.4s // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 5 | 14 | 14 | 0.50 | V1UnitV[5], V1UnitV0, V1UnitV1[2], V1UnitV01[3], V1UnitV02, V1UnitV13[3] | SMAXV_VPZ_B | smaxv b19, p4, z14.b // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | SMAXV_VPZ_H | smaxv h0, p6, z20.h // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 10 | 10 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | SMAXV_VPZ_S | smaxv s11, p2, z28.s // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+# CHECK-NEXT: 2 | 8 | 8 | 2.00 | V1UnitV[2], V1UnitV01 | SMAXV_VPZ_D | smaxv d24, p5, z24.d // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SMC | smc #0x7e57 // SMC #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMIN_ZI_S | smin z21.s, z21.s, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMIN_ZPmZ_S | smin z22.s, p0/m, z22.s, z30.s // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMINv4i32 | smin v29.4s, v24.4s, v24.4s // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMINPv8i16 | sminp v7.8h, v27.8h, v7.8h // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | SMINVv8i8v | sminv b6, v11.8b // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SMINVv16i8v | sminv b24, v8.16b // SMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMINVv4i16v | sminv h24, v23.4h // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | SMINVv8i16v | sminv h2, v9.8h // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMINVv4i32v | sminv s16, v15.4s // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 5 | 14 | 14 | 0.50 | V1UnitV[5], V1UnitV0, V1UnitV1[2], V1UnitV01[3], V1UnitV02, V1UnitV13[3] | SMINV_VPZ_B | sminv b4, p2, z10.b // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | SMINV_VPZ_H | sminv h15, p7, z10.h // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 10 | 10 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | SMINV_VPZ_S | sminv s29, p0, z27.s // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+# CHECK-NEXT: 2 | 8 | 8 | 2.00 | V1UnitV[2], V1UnitV01 | SMINV_VPZ_D | sminv d17, p2, z18.d // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv4i16_indexed | smlal v16.4s, v9.4h, v11.h[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv2i32_indexed | smlal v0.2d, v25.2s, v1.s[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i16_indexed | smlal2 v1.4s, v9.8h, v0.h[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv4i32_indexed | smlal2 v30.2d, v22.4s, v7.s[2] // SMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i8_v8i16 | smlal v25.8h, v24.8b, v28.8b // SMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i16_v4i32 | smlal2 v30.4s, v31.8h, v13.8h // SMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i16_indexed | smlsl v14.4s, v23.4h, v12.h[7] // SMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv2i32_indexed | smlsl v25.2d, v27.2s, v1.s[1] // SMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv8i16_indexed | smlsl2 v12.4s, v11.8h, v12.h[0] // SMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i32_indexed | smlsl2 v11.2d, v28.4s, v7.s[2] // SMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i16_v4i32 | smlsl v11.4s, v14.4h, v15.4h // SMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv8i16_v4i32 | smlsl2 v21.4s, v27.8h, v16.8h // SMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SMMLA | smmla v0.4s, v17.16b, v31.16b // SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SMSUBLrrr | smnegl x3, w23, w18 // SMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi8to32_idx0 | smov w15, v22.b[0] // SMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi8to32 | smov w6, v28.b[9] // SMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi16to32_idx0 | smov w26, v27.h[0] // SMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi16to32 | smov w18, v29.h[6] // SMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi8to64_idx0 | smov x21, v0.b[0] // SMOV <Xd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi8to64 | smov x16, v29.b[8] // SMOV <Xd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi16to64_idx0 | smov x9, v27.h[0] // SMOV <Xd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi16to64 | smov x4, v21.h[2] // SMOV <Xd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi32to64_idx0 | smov x15, v3.s[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMOVvi32to64 | smov x5, v29.s[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SMSUBLrrr | smsubl x8, w24, w13, x6 // SMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_B | smulh z11.b, p5/m, z11.b, z17.b // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_H | smulh z8.h, p0/m, z8.h, z4.h // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_S | smulh z27.s, p7/m, z27.s, z30.s // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SMULH_ZPmZ_D | smulh z4.d, p7/m, z4.d, z28.d // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | SMULHrr | smulh x8, x29, x17 // SMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SMADDLrrr | smull x19, w0, w6 // SMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i16_indexed | smull v3.4s, v26.4h, v1.h[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv2i32_indexed | smull v31.2d, v23.2s, v6.s[2] // SMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv8i16_indexed | smull2 v13.4s, v18.8h, v0.h[3] // SMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i32_indexed | smull2 v11.2d, v1.4s, v7.s[0] // SMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv2i32_v2i64 | smull v28.2d, v26.2s, v20.2s // SMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i32_v2i64 | smull2 v7.2d, v14.4s, v15.4s // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQABSv1i64 | sqabs d15, d26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQABSv8i16 | sqabs v25.8h, v24.8h // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_B | sqadd z1.b, z1.b, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_H | sqadd z18.h, z18.h, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_D | sqadd z3.d, z3.d, #158 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZZZ_D | sqadd z19.d, z27.d, z28.d // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQADDv1i16 | sqadd h12, h18, h10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQADDv2i32 | sqadd v15.2s, v13.2s, v28.2s // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x26, w26 // SQDECB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x16, w16, vl64 // SQDECB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x4, w4, vl1, mul #16 // SQDECB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiI | sqdecb x4 // SQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiI | sqdecb x28, vl6 // SQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiI | sqdecb x20, vl7, mul #4 // SQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiWdI | sqdecd x1, w1 // SQDECD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiWdI | sqdecd x11, w11, mul3 // SQDECD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiWdI | sqdecd x14, w14, vl2, mul #16 // SQDECD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x18 // SQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x11, vl5 // SQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x21, all, mul #13 // SQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECD_ZPiI | sqdecd z27.d // SQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECD_ZPiI | sqdecd z2.d, vl128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECD_ZPiI | sqdecd z23.d, vl1, mul #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x7, w7 // SQDECH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x10, w10, vl128 // SQDECH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x16, w16, vl6, mul #11 // SQDECH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x6 // SQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x17, vl128 // SQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x27, vl128, mul #4 // SQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECH_ZPiI | sqdech z16.h // SQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECH_ZPiI | sqdech z21.h, vl6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECH_ZPiI | sqdech z7.h, mul3, mul #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECP_XPWd_B | sqdecp x1, p4.b, w1 // SQDECP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECP_XP_D | sqdecp x26, p6.d // SQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01 | SQDECP_ZP_D | sqdecp z10.d, p3.d // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x13, w13 // SQDECW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x2, w2, pow2 // SQDECW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x26, w26, vl8, mul #10 // SQDECW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x10 // SQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x17, vl128 // SQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x13, mul4, mul #3 // SQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECW_ZPiI | sqdecw z7.s // SQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECW_ZPiI | sqdecw z10.s, pow2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQDECW_ZPiI | sqdecw z28.s, vl2, mul #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv1i32_indexed | sqdmlal s23, h16, v4.h[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv1i64_indexed | sqdmlal d12, s18, v3.s[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i16_indexed | sqdmlal v20.4s, v30.4h, v12.h[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv2i32_indexed | sqdmlal v11.2d, v24.2s, v0.s[3] // SQDMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv8i16_indexed | sqdmlal2 v2.4s, v17.8h, v5.h[6] // SQDMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i32_indexed | sqdmlal2 v23.2d, v30.4s, v6.s[0] // SQDMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALi32 | sqdmlal d16, s12, s15 // SQDMLAL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i16_v4i32 | sqdmlal v8.4s, v24.4h, v31.4h // SQDMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv8i16_v4i32 | sqdmlal2 v29.4s, v11.8h, v13.8h // SQDMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv1i32_indexed | sqdmlsl s26, h21, v11.h[1] // SQDMLSL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv1i64_indexed | sqdmlsl d6, s16, v3.s[1] // SQDMLSL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i16_indexed | sqdmlsl v4.4s, v22.4h, v13.h[2] // SQDMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv2i32_indexed | sqdmlsl v26.2d, v7.2s, v3.s[0] // SQDMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv8i16_indexed | sqdmlsl2 v2.4s, v28.8h, v4.h[6] // SQDMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i32_indexed | sqdmlsl2 v4.2d, v3.4s, v3.s[2] // SQDMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLi32 | sqdmlsl d13, s21, s8 // SQDMLSL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i16_v4i32 | sqdmlsl v11.4s, v19.4h, v5.4h // SQDMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv8i16_v4i32 | sqdmlsl2 v27.4s, v8.8h, v22.8h // SQDMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i16_indexed | sqdmulh h14, h17, v6.h[6] // SQDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i32_indexed | sqdmulh s19, s6, v6.s[3] // SQDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv4i16_indexed | sqdmulh v8.4h, v16.4h, v5.h[4] // SQDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv2i32_indexed | sqdmulh v16.2s, v24.2s, v7.s[2] // SQDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i16 | sqdmulh h26, h21, h17 // SQDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv2i32 | sqdmulh v20.2s, v11.2s, v29.2s // SQDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv1i32_indexed | sqdmull s25, h5, v1.h[3] // SQDMULL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv1i64_indexed | sqdmull d29, s23, v0.s[2] // SQDMULL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv4i16_indexed | sqdmull v8.4s, v19.4h, v1.h[2] // SQDMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv2i32_indexed | sqdmull v20.2d, v10.2s, v6.s[2] // SQDMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv8i16_indexed | sqdmull2 v10.4s, v25.8h, v0.h[7] // SQDMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv4i32_indexed | sqdmull2 v4.2d, v29.4s, v2.s[3] // SQDMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQDMULLi32 | sqdmull d19, s2, s0 // SQDMULL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply long \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv2i32_v2i64 | sqdmull v14.2d, v23.2s, v13.2s // SQDMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv8i16_v4i32 | sqdmull2 v12.4s, v11.8h, v1.8h // SQDMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x12, w12 // SQINCB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x1, w1, vl8 // SQINCB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x16, w16, vl2, mul #16 // SQINCB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiI | sqincb x5 // SQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiI | sqincb x4, vl6 // SQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiI | sqincb x30, all, mul #7 // SQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiWdI | sqincd x28, w28 // SQINCD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiWdI | sqincd x16, w16, vl8 // SQINCD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiWdI | sqincd x22, w22, vl6, mul #16 // SQINCD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x10 // SQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x17, vl5 // SQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x13, vl64 // SQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCD_ZPiI | sqincd z24.d // SQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCD_ZPiI | sqincd z10.d, vl128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCD_ZPiI | sqincd z29.d, vl128, mul #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x28, w28 // SQINCH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x30, w30, vl1 // SQINCH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x16, w16, vl4, mul #2 // SQINCH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x23 // SQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x10, vl64 // SQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x16, pow2, mul #2 // SQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCH_ZPiI | sqinch z3.h // SQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCH_ZPiI | sqinch z23.h, vl4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCH_ZPiI | sqinch z6.h, vl128, mul #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCP_XPWd_H | sqincp x13, p2.h, w13 // SQINCP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCP_XP_H | sqincp x0, p7.h // SQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01 | SQINCP_ZP_H | sqincp z9.h, p1.h // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x24, w24 // SQINCW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x16, w16, mul4 // SQINCW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x27, w27, vl32, mul #15 // SQINCW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x29 // SQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x25, vl7 // SQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x21, vl8, mul #3 // SQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCW_ZPiI | sqincw z30.s // SQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCW_ZPiI | sqincw z8.s, mul3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SQINCW_ZPiI | sqincw z0.s, vl5, mul #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQNEGv1i64 | sqneg d24, d22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQNEGv16i8 | sqneg v30.16b, v15.16b // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i16_indexed | sqrdmlah h14, h4, v6.h[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i32_indexed | sqrdmlah s24, s17, v6.s[2] // SQRDMLAH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv4i16_indexed | sqrdmlah v17.4h, v18.4h, v4.h[7] // SQRDMLAH <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv2i32_indexed | sqrdmlah v10.2s, v17.2s, v3.s[3] // SQRDMLAH <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i32 | sqrdmlah s3, s3, s5 // SQRDMLAH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv8i16 | sqrdmlah v16.8h, v30.8h, v28.8h // SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i16_indexed | sqrdmlsh h13, h26, v4.h[2] // SQRDMLSH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i32_indexed | sqrdmlsh s26, s29, v7.s[0] // SQRDMLSH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv8i16_indexed | sqrdmlsh v1.8h, v21.8h, v8.h[1] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i16_indexed | sqrdmlsh v8.4h, v11.4h, v1.h[3] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv2i32_indexed | sqrdmlsh v20.2s, v29.2s, v4.s[3] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i32_indexed | sqrdmlsh v21.4s, v9.4s, v1.s[0] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i32 | sqrdmlsh s30, s20, s13 // SQRDMLSH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i16 | sqrdmlsh v20.4h, v2.4h, v23.4h // SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i16_indexed | sqrdmulh h3, h25, v2.h[1] // SQRDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i32_indexed | sqrdmulh s9, s24, v4.s[3] // SQRDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv8i16_indexed | sqrdmulh v0.8h, v15.8h, v0.h[5] // SQRDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv2i32_indexed | sqrdmulh v6.2s, v29.2s, v4.s[2] // SQRDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i16 | sqrdmulh h5, h2, h20 // SQRDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv2i32 | sqrdmulh v31.2s, v17.2s, v4.2s // SQRDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHLv1i64 | sqrshl d6, d1, d30 // SQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHLv8i8 | sqrshl v15.8b, v26.8b, v21.8b // SQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQRSHRNb | sqrshrn b6, h24, #3 // SQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQRSHRNh | sqrshrn h11, s22, #8 // SQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQRSHRNs | sqrshrn s4, d9, #13 // SQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv8i8_shift | sqrshrn v31.8b, v31.8h, #2 // SQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv4i16_shift | sqrshrn v27.4h, v11.4s, #8 // SQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv2i32_shift | sqrshrn v4.2s, v30.2d, #10 // SQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv16i8_shift | sqrshrn2 v11.16b, v30.8h, #7 // SQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv8i16_shift | sqrshrn2 v14.8h, v3.4s, #12 // SQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv4i32_shift | sqrshrn2 v13.4s, v28.2d, #24 // SQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQRSHRUNb | sqrshrun b5, h0, #3 // SQRSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQRSHRUNh | sqrshrun h25, s11, #7 // SQRSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQRSHRUNs | sqrshrun s15, d18, #2 // SQRSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv8i8_shift | sqrshrun v0.8b, v3.8h, #7 // SQRSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv4i16_shift | sqrshrun v5.4h, v8.4s, #7 // SQRSHRUN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv2i32_shift | sqrshrun v7.2s, v8.2d, #13 // SQRSHRUN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv16i8_shift | sqrshrun2 v14.16b, v14.8h, #3 // SQRSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv8i16_shift | sqrshrun2 v9.8h, v16.4s, #10 // SQRSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv4i32_shift | sqrshrun2 v12.4s, v23.2d, #30 // SQRSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLb | sqshl b15, b3, #4 // SQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLh | sqshl h21, h0, #5 // SQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLs | sqshl s26, s9, #24 // SQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLd | sqshl d8, d23, #17 // SQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv16i8_shift | sqshl v25.16b, v26.16b, #5 // SQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv4i16_shift | sqshl v29.4h, v1.4h, #7 // SQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv2i32_shift | sqshl v0.2s, v5.2s, #1 // SQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv2i64_shift | sqshl v11.2d, v2.2d, #23 // SQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv1i32 | sqshl s17, s4, s23 // SQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv16i8 | sqshl v23.16b, v23.16b, v23.16b // SQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUb | sqshlu b3, b27, #5 // SQSHLU B<d>, B<n>, #<shiftb> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUh | sqshlu h23, h4, #6 // SQSHLU H<d>, H<n>, #<shifth> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUs | sqshlu s29, s29, #30 // SQSHLU S<d>, S<n>, #<shifts> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUd | sqshlu d14, d5, #22 // SQSHLU D<d>, D<n>, #<shiftd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv8i8_shift | sqshlu v11.8b, v17.8b, #6 // SQSHLU <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv8i16_shift | sqshlu v18.8h, v8.8h, #14 // SQSHLU <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv4i32_shift | sqshlu v25.4s, v7.4s, #13 // SQSHLU <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv2i64_shift | sqshlu v19.2d, v14.2d, #39 // SQSHLU <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRNb | sqshrn b17, h30, #7 // SQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRNh | sqshrn h30, s15, #5 // SQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRNs | sqshrn s16, d0, #20 // SQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv8i8_shift | sqshrn v3.8b, v25.8h, #1 // SQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv4i16_shift | sqshrn v23.4h, v14.4s, #6 // SQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv2i32_shift | sqshrn v6.2s, v29.2d, #10 // SQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv16i8_shift | sqshrn2 v31.16b, v31.8h, #8 // SQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv8i16_shift | sqshrn2 v13.8h, v6.4s, #13 // SQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv4i32_shift | sqshrn2 v30.4s, v0.2d, #1 // SQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNb | sqshrun b3, h16, #3 // SQSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNh | sqshrun h11, s10, #7 // SQSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNs | sqshrun s18, d1, #13 // SQSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNv8i8_shift | sqshrun v21.8b, v27.8h, #5 // SQSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNv4i16_shift | sqshrun v18.4h, v19.4s, #2 // SQSHRUN <Vd>.4H, <Vn>.4S, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNv2i32_shift | sqshrun v2.2s, v14.2d, #3 // SQSHRUN <Vd>.2S, <Vn>.2D, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNv16i8_shift | sqshrun2 v10.16b, v28.8h, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNv8i16_shift | sqshrun2 v4.8h, v28.4s, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSHRUNv4i32_shift | sqshrun2 v7.4s, v18.2d, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_B | sqsub z13.b, z13.b, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_H | sqsub z28.h, z28.h, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_S | sqsub z11.s, z11.s, #14 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZZZ_S | sqsub z28.s, z9.s, z12.s // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSUBv1i8 | sqsub b3, b13, b12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSUBv8i16 | sqsub v20.8h, v18.8h, v12.8h // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv1i8 | sqxtn b11, h22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv2i32 | sqxtn v3.2s, v17.2d // SQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv8i16 | sqxtn2 v17.8h, v27.4s // SQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv1i8 | sqxtun b30, h18 // SQXTUN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv8i8 | sqxtun v26.8b, v21.8h // SQXTUN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv16i8 | sqxtun2 v22.16b, v6.8h // SQXTUN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SRHADDv8i8 | srhadd v29.8b, v3.8b, v8.8b // SRHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SRId | sri d30, d17, #61 // SRI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv16i8_shift | sri v23.16b, v30.16b, #2 // SRI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv4i16_shift | sri v1.4h, v0.4h, #4 // SRI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv2i32_shift | sri v28.2s, v6.2s, #16 // SRI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv2i64_shift | sri v8.2d, v19.2d, #40 // SRI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHLv1i64 | srshl d30, d8, d8 // SRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHLv8i8 | srshl v20.8b, v23.8b, v27.8b // SRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SRSHRd | srshr d20, d18, #27 // SRSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv8i8_shift | srshr v20.8b, v0.8b, #7 // SRSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv8i16_shift | srshr v27.8h, v19.8h, #9 // SRSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv2i32_shift | srshr v8.2s, v20.2s, #31 // SRSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv2i64_shift | srshr v31.2d, v17.2d, #33 // SRSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SRSRAd | srsra d13, d10, #25 // SRSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv16i8_shift | srsra v31.16b, v15.16b, #5 // SRSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv4i16_shift | srsra v14.4h, v27.4h, #7 // SRSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv2i32_shift | srsra v17.2s, v8.2s, #8 // SRSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv2i64_shift | srsra v22.2d, v4.2d, #12 // SRSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DSB | ssbb // SSBB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLv1i64 | sshl d29, d30, d9 // SSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLv2i64 | sshl v13.2d, v7.2d, v27.2d // SSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v9.8h, v2.8b, #0 // SSHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i16_shift | sshll v12.4s, v3.4h, #4 // SSHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv2i32_shift | sshll v17.2d, v6.2s, #22 // SSHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv16i8_shift | sshll2 v28.8h, v12.16b, #7 // SSHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i16_shift | sshll2 v29.4s, v22.8h, #7 // SSHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v17.2d, v13.4s, #22 // SSHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSHRd | sshr d3, d18, #10 // SSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv8i8_shift | sshr v20.8b, v28.8b, #2 // SSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv4i16_shift | sshr v20.4h, v23.4h, #10 // SSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv2i32_shift | sshr v13.2s, v23.2s, #2 // SSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv2i64_shift | sshr v3.2d, v8.2d, #61 // SSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSRAd | ssra d28, d30, #51 // SSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv8i8_shift | ssra v9.8b, v18.8b, #2 // SSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv4i16_shift | ssra v21.4h, v24.4h, #3 // SSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv2i32_shift | ssra v28.2s, v17.2s, #6 // SSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv2i64_shift | ssra v0.2d, v23.2d, #35 // SSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBLv4i16_v4i32 | ssubl v13.4s, v9.4h, v5.4h // SSUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBLv8i16_v4i32 | ssubl2 v18.4s, v29.8h, v17.8h // SSUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBWv2i32_v2i64 | ssubw v5.2d, v13.2d, v4.2s // SSUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBWv8i16_v4i32 | ssubw2 v4.4s, v26.4s, v31.8h // SSUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b | st1 { v18.8b }, [x15] // ST1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b | st1 { v31.16b }, [x29] // ST1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h | st1 { v19.4h }, [x7] // ST1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h | st1 { v27.8h }, [x17] // ST1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s | st1 { v25.2s }, [x6] // ST1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s | st1 { v22.4s }, [x19] // ST1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d | st1 { v20.1d }, [x10] // ST1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d | st1 { v8.2d }, [x15] // ST1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v16.8b }, [x14], #8 // ST1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v10.16b }, [x8], #16 // ST1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v29.4h }, [x17], #8 // ST1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v14.8h }, [x28], #16 // ST1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v18.2s }, [x20], #8 // ST1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v28.4s }, [x1], #16 // ST1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v17.1d }, [x27], #8 // ST1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v30.2d }, [x4], #16 // ST1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v13.8b }, [x8], x7 // ST1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v4.16b }, [x7], x26 // ST1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v17.4h }, [x10], x4 // ST1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v18.8h }, [x15], x1 // ST1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v6.2s }, [x17], x24 // ST1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v26.4s }, [x20], x29 // ST1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v13.1d }, [x3], x20 // ST1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v15.2d }, [x21], x11 // ST1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b | st1 { v8.8b, v9.8b }, [x18] // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b | st1 { v1.16b, v2.16b }, [x4] // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h | st1 { v22.4h, v23.4h }, [x22] // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h | st1 { v18.8h, v19.8h }, [x2] // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s | st1 { v13.2s, v14.2s }, [x9] // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s | st1 { v15.4s, v16.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d | st1 { v21.1d, v22.1d }, [x29] // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d | st1 { v26.2d, v27.2d }, [x28] // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v23.8b, v24.8b }, [x4], #16 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v15.16b, v16.16b }, [x16], #32 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v7.4h, v8.4h }, [x7], #16 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v8.8h, v9.8h }, [x1], #32 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v23.2s, v24.2s }, [x7], #16 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v8.4s, v9.4s }, [x15], #32 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v14.1d, v15.1d }, [x11], #16 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v12.2d, v13.2d }, [x2], #32 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v3.8b, v4.8b }, [x28], x14 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v19.16b, v20.16b }, [x13], x7 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v28.4h, v29.4h }, [x14], x5 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v9.8h, v10.8h }, [x28], x9 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v10.2s, v11.2s }, [x10], x2 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v13.4s, v14.4s }, [x8], x15 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v5.1d, v6.1d }, [x9], x14 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v14.2d, v15.2d }, [x24], x1 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b | st1 { v15.8b, v16.8b, v17.8b }, [x0] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 6 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b | st1 { v27.16b, v28.16b, v29.16b }, [x18] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h | st1 { v13.4h, v14.4h, v15.4h }, [x7] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 6 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h | st1 { v8.8h, v9.8h, v10.8h }, [x16] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s | st1 { v12.2s, v13.2s, v14.2s }, [x3] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 6 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s | st1 { v19.4s, v20.4s, v21.4s }, [x7] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d | st1 { v5.1d, v6.1d, v7.1d }, [x3] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 6 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d | st1 { v13.2d, v14.2d, v15.2d }, [x27] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 6 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v3.8b, v4.8b, v5.8b }, [x21], #24 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v25.16b, v26.16b, v27.16b }, [x4], #48 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v24.4h, v25.4h, v26.4h }, [x9], #24 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v0.8h, v1.8h, v2.8h }, [x7], #48 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v3.2s, v4.2s, v5.2s }, [x4], #24 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v25.4s, v26.4s, v27.4s }, [x14], #48 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v7.1d, v8.1d, v9.1d }, [x13], #24 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v19.2d, v20.2d, v21.2d }, [x5], #48 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v5.8b, v6.8b, v7.8b }, [x17], x25 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v12.16b, v13.16b, v14.16b }, [x29], x23 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v18.4h, v19.4h, v20.4h }, [x0], x14 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v16.8h, v17.8h, v18.8h }, [x1], x18 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v1.2s, v2.2s, v3.2s }, [x15], x29 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v2.4s, v3.4s, v4.4s }, [x29], x6 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v8.1d, v9.1d, v10.1d }, [x13], x27 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 7 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v8.2d, v9.2d, v10.2d }, [x18], x19 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 7 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x14] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 8 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b | st1 { v18.16b, v19.16b, v20.16b, v21.16b }, [x29] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h | st1 { v23.4h, v24.4h, v25.4h, v26.4h }, [x24] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 8 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h | st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x19] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s | st1 { v6.2s, v7.2s, v8.2s, v9.2s }, [x13] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 8 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s | st1 { v26.4s, v27.4s, v28.4s, v29.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d | st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x10] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 4 2 2 1.00 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 8 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d | st1 { v25.2d, v26.2d, v27.2d, v28.2d }, [x19] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 8 2 2 0.50 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v27.8b, v28.8b, v29.8b, v30.8b }, [x17], #32 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x0], #64 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v18.4h, v19.4h, v20.4h, v21.4h }, [x22], #32 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v12.8h, v13.8h, v14.8h, v15.8h }, [x13], #64 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v13.2s, v14.2s, v15.2s, v16.2s }, [x25], #32 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x11], #64 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x13], #32 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v12.2d, v13.2d, v14.2d, v15.2d }, [x25], #64 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x25], x28 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x24], x5 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x25], x19 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v20.8h, v21.8h, v22.8h, v23.8h }, [x18], x0 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v4.2s, v5.2s, v6.2s, v7.2s }, [x9], x5 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x12], x30 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 5 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v23.1d, v24.1d, v25.1d, v26.1d }, [x23], x4 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 5 2 2 1.00 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 9 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v20.2d, v21.2d, v22.2d, v23.2d }, [x7], x14 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 9 2 2 0.50 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8 | st1 { v1.b }[5], [x1] // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16 | st1 { v0.h }[2], [x1] // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32 | st1 { v31.s }[1], [x16] // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64 | st1 { v15.d }[1], [x8] // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, D \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v15.b }[1], [x12], #1 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v16.b }[3], [x0], x2 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v29.h }[2], [x27], #2 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v15.h }[4], [x30], x9 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v3.s }[1], [x24], #4 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v26.s }[0], [x2], x30 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v19.d }[1], [x9], #8 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v29.d }[0], [x26], x22 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_H_IMM | st1b { z7.h }, p2, [x14] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_S_IMM | st1b { z16.s }, p4, [x20, #3, mul vl] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_S | st1b { z17.s }, p3, [x20, x0] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1B_D_UXTW | st1b { z0.d }, p4, [x11, z13.d, uxtw] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1B_S_SXTW | st1b { z16.s }, p4, [x19, z25.s, sxtw] // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1B_D | st1b { z10.d }, p3, [x12, z21.d] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1B_S_IMM | st1b { z17.s }, p7, [z28.s] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1B_S_IMM | st1b { z16.s }, p0, [z25.s, #7] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1B_D_IMM | st1b { z15.d }, p6, [z27.d] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1B_D_IMM | st1b { z2.d }, p0, [z21.d, #24] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1D_SXTW_SCALED | st1d { z10.d }, p2, [x26, z5.d, sxtw #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1D_UXTW | st1d { z18.d }, p2, [x7, z1.d, uxtw] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1D_SCALED | st1d { z9.d }, p6, [x6, z12.d, lsl #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1D | st1d { z3.d }, p3, [x1, z30.d] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1D_IMM | st1d { z18.d }, p0, [z7.d] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1D_IMM | st1d { z4.d }, p2, [z2.d, #136] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1H_S_IMM | st1h { z28.s }, p3, [x18] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1H_IMM | st1h { z23.h }, p1, [x14, #-8, mul vl] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | ST1H_S | st1h { z25.s }, p3, [x17, x8, lsl #1] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1H_S_SXTW_SCALED | st1h { z12.s }, p3, [x24, z30.s, sxtw #1] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Scatter store, 32-bit scaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1H_D_UXTW_SCALED | st1h { z26.d }, p5, [x9, z17.d, uxtw #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1H_D_SXTW | st1h { z23.d }, p1, [x5, z25.d, sxtw] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1H_S_SXTW | st1h { z14.s }, p4, [x22, z17.s, sxtw] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1H_D_SCALED | st1h { z23.d }, p3, [x25, z11.d, lsl #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1H_D | st1h { z0.d }, p4, [x21, z21.d] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1H_S_IMM | st1h { z29.s }, p5, [z9.s] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1H_S_IMM | st1h { z4.s }, p7, [z23.s, #40] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1H_D_IMM | st1h { z27.d }, p2, [z3.d] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1H_D_IMM | st1h { z11.d }, p6, [z7.d, #38] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1W_SXTW_SCALED | st1w { z25.s }, p1, [x9, z28.s, sxtw #2] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Scatter store, 32-bit scaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1W_D_SXTW_SCALED | st1w { z13.d }, p3, [x16, z9.d, sxtw #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1W_D_SXTW | st1w { z21.d }, p1, [x24, z23.d, sxtw] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1W_UXTW | st1w { z17.s }, p1, [x5, z22.s, uxtw] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1W_D_SCALED | st1w { z28.d }, p1, [x5, z8.d, lsl #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1W_D | st1w { z26.d }, p3, [x3, z0.d] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1W_IMM | st1w { z28.s }, p6, [z21.s] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | SST1W_IMM | st1w { z26.s }, p3, [z24.s, #120] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 4 10 10 1.00 V1UnitL[2], V1UnitL01[2], V1UnitV[2]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1W_D_IMM | st1w { z3.d }, p0, [z12.d] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | SST1W_D_IMM | st1w { z17.d }, p2, [z1.d, #80] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b | st2 { v14.8b, v15.8b }, [x2] // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b | st2 { v6.16b, v7.16b }, [x23] // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h | st2 { v10.4h, v11.4h }, [x18] // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h | st2 { v10.8h, v11.8h }, [x18] // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s | st2 { v25.2s, v26.2s }, [x29] // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s | st2 { v26.4s, v27.4s }, [x14] // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d | st2 { v10.2d, v11.2d }, [x1] // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, D \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v21.8b, v22.8b }, [x22], #16 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v26.16b, v27.16b }, [x2], #32 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v19.4h, v20.4h }, [x27], #16 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v28.8h, v29.8h }, [x22], #32 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v1.2s, v2.2s }, [x26], #16 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v19.4s, v20.4s }, [x7], #32 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v22.2d, v23.2d }, [x18], #32 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v29.8b, v30.8b }, [x9], x2 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v17.16b, v18.16b }, [x4], x0 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v9.4h, v10.4h }, [x7], x25 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v8.8h, v9.8h }, [x11], x8 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v17.2s, v18.2s }, [x2], x8 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v9.4s, v10.4s }, [x23], x12 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v29.2d, v30.2d }, [x25], x11 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8 | st2 { v21.b, v22.b }[15], [x15] // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16 | st2 { v28.h, v29.h }[2], [x6] // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32 | st2 { v14.s, v15.s }[1], [x25] // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64 | st2 { v17.d, v18.d }[1], [x1] // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, D \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v9.b, v10.b }[15], [x12], #2 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v19.b, v20.b }[9], [x27], x28 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v18.h, v19.h }[3], [x30], #4 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v13.h, v14.h }[5], [x23], x24 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v23.s, v24.s }[1], [x22], #8 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v16.s, v17.s }[3], [x12], x16 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v27.d, v28.d }[0], [x16], #16 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v6.d, v7.d }[1], [x14], x5 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2B_IMM | st2b { z19.b, z20.b }, p1, [x18] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2B_IMM | st2b { z26.b, z27.b }, p7, [x15, #-6, mul vl] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2B | st2b { z19.b, z20.b }, p1, [x23, x27] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2D_IMM | st2d { z29.d, z30.d }, p4, [x8] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2D_IMM | st2d { z16.d, z17.d }, p3, [x20, #14, mul vl] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2D | st2d { z17.d, z18.d }, p7, [x2, x28, lsl #3] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2H_IMM | st2h { z5.h, z6.h }, p7, [x23] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2H_IMM | st2h { z11.h, z12.h }, p6, [x4, #10, mul vl] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | ST2H | st2h { z3.h, z4.h }, p3, [x22, x16, lsl #1] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 3 4 4 2.00 V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2W_IMM | st2w { z14.s, z15.s }, p4, [x17] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2W_IMM | st2w { z9.s, z10.s }, p5, [x19, #-8, mul vl] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST2W | st2w { z5.s, z6.s }, p3, [x23, x13, lsl #2] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 2.00 V1UnitL, V1UnitL01, V1UnitV
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b | st3 { v10.8b, v11.8b, v12.8b }, [x18] // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 6 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b | st3 { v26.16b, v27.16b, v28.16b }, [x4] // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h | st3 { v25.4h, v26.4h, v27.4h }, [x11] // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 6 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h | st3 { v0.8h, v1.8h, v2.8h }, [x0] // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s | st3 { v19.2s, v20.2s, v21.2s }, [x30] // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 6 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s | st3 { v24.4s, v25.4s, v26.4s }, [x8] // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 6 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d | st3 { v24.2d, v25.2d, v26.2d }, [x25] // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, D \\ 6 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v25.8b, v26.8b, v27.8b }, [x23], #24 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x26], #48 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v24.4h, v25.4h, v26.4h }, [x3], #24 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v23.8h, v24.8h, v25.8h }, [x22], #48 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v7.2s, v8.2s, v9.2s }, [x8], #24 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v11.4s, v12.4s, v13.4s }, [x15], #48 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v1.2d, v2.2d, v3.2d }, [x4], #48 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, D \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v16.8b, v17.8b, v18.8b }, [x26], x2 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x3], x18 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v2.4h, v3.4h, v4.4h }, [x4], x4 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v27.8h, v28.8h, v29.8h }, [x27], x8 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v26.2s, v27.2s, v28.2s }, [x2], x25 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v5.4s, v6.4s, v7.4s }, [x18], x29 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 7 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v26.2d, v27.2d, v28.2d }, [x14], x5 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, D \\ 7 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8 | st3 { v8.b, v9.b, v10.b }[4], [x18] // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16 | st3 { v11.h, v12.h, v13.h }[4], [x0] // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32 | st3 { v9.s, v10.s, v11.s }[2], [x20] // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, S \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64 | st3 { v16.d, v17.d, v18.d }[0], [x13] // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, D \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v26.b, v27.b, v28.b }[1], [x12], #3 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v27.b, v28.b, v29.b }[15], [x19], x23 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v24.h, v25.h, v26.h }[2], [x14], #6 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v1.h, v2.h, v3.h }[2], [x0], x23 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v25.s, v26.s, v27.s }[2], [x10], #12 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD store, 3 element, one lane, S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v8.s, v9.s, v10.s }[0], [x11], x20 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, S \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v19.d, v20.d, v21.d }[1], [x5], #24 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD store, 3 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v10.d, v11.d, v12.d }[0], [x12], x11 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3B_IMM | st3b { z0.b - z2.b }, p6, [x26] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3B_IMM | st3b { z22.b - z24.b }, p6, [x25, #3, mul vl] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 15 | 7 | 7 | 0.40 | V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5] | ST3B | st3b { z14.b - z16.b }, p2, [x29, x27] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3D_IMM | st3d { z6.d - z8.d }, p2, [x12] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3D_IMM | st3d { z20.d - z22.d }, p5, [x15, #9, mul vl] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 15 | 7 | 7 | 0.40 | V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5] | ST3D | st3d { z15.d - z17.d }, p7, [x0, x9, lsl #3] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3H_IMM | st3h { z17.h - z19.h }, p3, [x14] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3H_IMM | st3h { z21.h - z23.h }, p0, [x15, #6, mul vl] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 15 | 7 | 7 | 0.40 | V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5] | ST3H | st3h { z2.h - z4.h }, p3, [x21, x9, lsl #1] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3W_IMM | st3w { z9.s - z11.s }, p3, [x29] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 10 | 7 | 7 | 0.40 | V1UnitL[5], V1UnitL01[5], V1UnitV[5] | ST3W_IMM | st3w { z11.s - z13.s }, p4, [x13, #15, mul vl] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 10 7 7 0.40 V1UnitL[5], V1UnitL01[5], V1UnitV[5]
+# CHECK-NEXT: 15 | 7 | 7 | 0.40 | V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5] | ST3W | st3w { z19.s - z21.s }, p2, [x22, x28, lsl #2] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 15 7 7 0.40 V1UnitI[5], V1UnitL[5], V1UnitL01[5], V1UnitS[5], V1UnitV[5]
+# CHECK-NEXT: 6 | 6 | 6 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv8b | st4 { v17.8b, v18.8b, v19.8b, v20.8b }, [x8] // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 6 6 6 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 12 | 7 | 7 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv16b | st4 { v7.16b, v8.16b, v9.16b, v10.16b }, [x15] // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 12 7 7 0.33 V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 6 | 6 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv4h | st4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x13] // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 6 6 6 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 12 | 7 | 7 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8h | st4 { v11.8h, v12.8h, v13.8h, v14.8h }, [x1] // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 12 7 7 0.33 V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 6 | 6 | 6 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv2s | st4 { v15.2s, v16.2s, v17.2s, v18.2s }, [x18] // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 6 6 6 0.67 V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 12 | 7 | 7 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4s | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x6] // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 12 7 7 0.33 V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 8 | 4 | 4 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST4Fourv2d | st4 { v25.2d, v26.2d, v27.2d, v28.2d }, [x16] // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, D \\ 8 4 4 0.50 V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 7 | 6 | 6 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv8b_POST | st4 { v16.8b, v17.8b, v18.8b, v19.8b }, [x24], #32 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 13 | 7 | 7 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x13], #64 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 7 | 6 | 6 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv4h_POST | st4 { v17.4h, v18.4h, v19.4h, v20.4h }, [x3], #32 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 13 | 7 | 7 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8h_POST | st4 { v18.8h, v19.8h, v20.8h, v21.8h }, [x5], #64 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 7 | 6 | 6 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv2s_POST | st4 { v26.2s, v27.2s, v28.2s, v29.2s }, [x17], #32 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 13 | 7 | 7 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4s_POST | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x7], #64 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 9 | 4 | 4 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST4Fourv2d_POST | st4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x25], #64 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, D \\ 9 4 4 0.50 V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 7 | 6 | 6 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv8b_POST | st4 { v24.8b, v25.8b, v26.8b, v27.8b }, [x24], x8 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 13 | 7 | 7 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x21], x21 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 7 | 6 | 6 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv4h_POST | st4 { v11.4h, v12.4h, v13.4h, v14.4h }, [x29], x3 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 13 | 7 | 7 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8h_POST | st4 { v16.8h, v17.8h, v18.8h, v19.8h }, [x13], x3 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 7 | 6 | 6 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST4Fourv2s_POST | st4 { v13.2s, v14.2s, v15.2s, v16.2s }, [x0], x0 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 7 6 6 0.67 V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3]
+# CHECK-NEXT: 13 | 7 | 7 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4s_POST | st4 { v26.4s, v27.4s, v28.4s, v29.4s }, [x1], x22 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 13 7 7 0.33 V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6]
+# CHECK-NEXT: 9 | 4 | 4 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST4Fourv2d_POST | st4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x10], x28 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, D \\ 9 4 4 0.50 V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4]
+# CHECK-NEXT: 6 | 6 | 6 | 1.00 | V1UnitL[3], V1UnitV[3] | ST4i8 | st4 { v10.b, v11.b, v12.b, v13.b }[3], [x5] // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 6 6 6 1.00 V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 6 | 6 | 6 | 1.00 | V1UnitL[3], V1UnitV[3] | ST4i16 | st4 { v5.h, v6.h, v7.h, v8.h }[4], [x13] // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 6 6 6 1.00 V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 6 | 6 | 6 | 1.00 | V1UnitL[3], V1UnitV[3] | ST4i32 | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x7] // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, S \\ 6 6 6 1.00 V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 4 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64 | st4 { v23.d, v24.d, v25.d, v26.d }[1], [x5] // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, D \\ 4 4 4 1.00 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 7 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | ST4i8_POST | st4 { v22.b, v23.b, v24.b, v25.b }[0], [x29], #4 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 7 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | ST4i8_POST | st4 { v6.b, v7.b, v8.b, v9.b }[9], [x26], x21 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 7 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | ST4i16_POST | st4 { v19.h, v20.h, v21.h, v22.h }[2], [x18], #8 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 7 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | ST4i16_POST | st4 { v6.h, v7.h, v8.h, v9.h }[4], [x9], x9 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 7 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | ST4i32_POST | st4 { v19.s, v20.s, v21.s, v22.s }[2], [x27], #16 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD store, 4 element, one lane, S \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 7 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | ST4i32_POST | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x29], x21 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, S \\ 7 6 6 1.00 V1UnitI, V1UnitL[3], V1UnitV[3]
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x16], #32 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD store, 4 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 5 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x12], x11 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, D \\ 5 4 4 1.00 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4B_IMM | st4b { z22.b - z25.b }, p0, [x0] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4B_IMM | st4b { z1.b - z4.b }, p7, [x1, #20, mul vl] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 27 | 11 | 11 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST4B | st4b { z28.b - z31.b }, p4, [x27, x20] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4D_IMM | st4d { z19.d - z22.d }, p1, [x11] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4D_IMM | st4d { z0.d - z3.d }, p6, [x7, #-24, mul vl] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 27 | 11 | 11 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST4D | st4d { z28.d - z31.d }, p5, [x19, x20, lsl #3] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4H_IMM | st4h { z14.h - z17.h }, p1, [x24] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4H_IMM | st4h { z27.h - z30.h }, p3, [x26, #16, mul vl] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 27 | 11 | 11 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST4H | st4h { z2.h - z5.h }, p5, [x30, x17, lsl #1] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4W_IMM | st4w { z3.s - z6.s }, p0, [x0] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 18 | 19 | 19 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST4W_IMM | st4w { z5.s - z8.s }, p2, [x0, #-20, mul vl] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 18 19 19 0.22 V1UnitL[9], V1UnitL01[9], V1UnitV[9]
+# CHECK-NEXT: 27 | 11 | 11 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST4W | st4w { z21.s - z24.s }, p5, [x5, x18, lsl #2] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 27 11 11 0.22 V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9]
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRB | stlrb w19, [x26] // STLRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRB | stlrb w9, [x19] // STLRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRH | stlrh w4, [x7] // STLRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRH | stlrh w20, [x5] // STLRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURWi | stlur w3, [x27] // STLUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURWi | stlur w0, [x15, #-14] // STLUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURXi | stlur x23, [x25] // STLUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURXi | stlur x18, [x6, #101] // STLUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURBi | stlurb w30, [x17] // STLURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURBi | stlurb w25, [x21, #-8] // STLURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURHi | stlurh w9, [x29] // STLURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURHi | stlurh w6, [x27, #-224] // STLURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPW | stlxp w26, w11, w12, [x7] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPW | stlxp w24, w10, w16, [x8] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPX | stlxp w1, x25, x26, [x10] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPX | stlxp w10, x7, x20, [x22] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRW | stlxr w23, w8, [x6] // STLXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRW | stlxr w29, w28, [x26] // STLXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRX | stlxr w23, x8, [x7] // STLXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRX | stlxr w14, x18, [x23] // STLXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRB | stlxrb w2, w7, [x10] // STLXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRB | stlxrb w0, w1, [x20] // STLXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRH | stlxrh w16, w17, [x21] // STLXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRH | stlxrh w12, w26, [x23] // STLXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPSi | stnp s29, s16, [x11] // STNP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPSi | stnp s17, s19, [x27, #-40] // STNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPDi | stnp d4, d3, [x30] // STNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPDi | stnp d25, d31, [x28, #328] // STNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPQi | stnp q28, q22, [x3] // STNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPQi | stnp q17, q15, [x16, #656] // STNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPWi | stnp w29, w25, [x5] // STNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPWi | stnp w16, w18, [x27, #-232] // STNP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPXi | stnp x20, x16, [x8] // STNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPXi | stnp x6, x20, [x15, #-120] // STNP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRI | stnt1b { z18.b }, p7, [x21] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRI | stnt1b { z9.b }, p6, [x26, #-7, mul vl] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRR | stnt1b { z18.b }, p1, [x1, x20] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRI | stnt1d { z16.d }, p3, [x3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRI | stnt1d { z27.d }, p4, [x16, #-6, mul vl] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRR | stnt1d { z11.d }, p0, [x18, x22, lsl #3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1H_ZRI | stnt1h { z27.h }, p5, [x16] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1H_ZRI | stnt1h { z2.h }, p2, [x30, #-8, mul vl] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | STNT1H_ZRR | stnt1h { z0.h }, p1, [x7, x1, lsl #1] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRI | stnt1w { z9.s }, p3, [x20] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRI | stnt1w { z12.s }, p4, [x11, #-6, mul vl] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRR | stnt1w { z28.s }, p6, [x6, x0, lsl #2] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSpost | stp s10, s19, [x13], #76 // STP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Store vector pair, immed post-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDpost | stp d19, d20, [x30], #-144 // STP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Store vector pair, immed post-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV, V1UnitV01 | STPQpost | stp q3, q17, [x14], #-976 // STP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Store vector pair, immed post-index, Q-form \\ 4 2 2 1.00 V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSpre | stp s19, s24, [x27, #-224]! // STP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Store vector pair, immed pre-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDpre | stp d16, d21, [x28, #168]! // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Store vector pair, immed pre-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 4 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV, V1UnitV01 | STPQpre | stp q10, q31, [x0, #608]! // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Store vector pair, immed pre-index, Q-form \\ 4 2 2 1.00 V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSi | stp s27, s11, [x30] // STP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDi | stp d30, d19, [x25] // STP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPQi | stp q25, q3, [x27] // STP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSi | stp s29, s13, [x0, #-44] // STP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDi | stp d15, d12, [x20, #-72] // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPQi | stp q13, q16, [x3, #320] // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPWpost | stp w18, w8, [x6], #196 // STP <Wt1>, <Wt2>, [<Xn|SP>], #<imms> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPXpost | stp x10, x17, [x7], #-328 // STP <Xt1>, <Xt2>, [<Xn|SP>], #<immd> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPWpre | stp w4, w3, [x0, #-36]! // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPXpre | stp x14, x13, [x24, #-272]! // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPWi | stp w27, w30, [x20] // STP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPXi | stp x3, x6, [x16] // STP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPWi | stp w9, w14, [x10, #-24] // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPXi | stp x27, x4, [x14, #-448] // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRWpost | str w14, [x2], #-72 // STR <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRXpost | str x28, [x14], #-130 // STR <Xt>, [<Xn|SP>], #<simm> \\ Store register, immed post-index \\ 3 1 1 2.00 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRWpre | str w9, [x29, #-227]! // STR <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRXpre | str x13, [x5, #233]! // STR <Xt>, [<Xn|SP>, #<simm>]! \\ Store register, immed pre-index \\ 3 1 1 2.00 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWui | str w2, [x30] // STR <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWui | str w28, [x2, #1796] // STR <Wt>, [<Xn|SP>, #<pimm32>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXui | str x22, [x29] // STR <Xt>, [<Xn|SP>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXui | str x2, [x10, #9472] // STR <Xt>, [<Xn|SP>, #<pimm64>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBpost | str b21, [x28], #-62 // STR <Bt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHpost | str h13, [x10], #-194 // STR <Ht>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSpost | str s14, [x8], #166 // STR <St>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDpost | str d24, [x10], #134 // STR <Dt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQpost | str q20, [x30], #-108 // STR <Qt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBpre | str b9, [x24, #242]! // STR <Bt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHpre | str h0, [x4, #-193]! // STR <Ht>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSpre | str s19, [x23, #115]! // STR <St>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDpre | str d20, [x2, #-30]! // STR <Dt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQpre | str q24, [x20, #62]! // STR <Qt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBui | str b5, [x11] // STR <Bt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBui | str b20, [x23, #2409] // STR <Bt>, [<Xn|SP>, #<pimm8>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHui | str h23, [x15] // STR <Ht>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHui | str h24, [x6, #492] // STR <Ht>, [<Xn|SP>, #<pimm16>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSui | str s25, [x19] // STR <St>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSui | str s2, [x14, #984] // STR <St>, [<Xn|SP>, #<pimm32>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDui | str d15, [x2] // STR <Dt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDui | str d27, [x7, #25704] // STR <Dt>, [<Xn|SP>, #<pimm64>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQui | str q13, [x16] // STR <Qt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQui | str q4, [x7, #96] // STR <Qt>, [<Xn|SP>, #<pimm128>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitL, V1UnitL01 | STR_PXI | str p4, [x5] // STR <Pt>, [<Xn|SP>] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitL, V1UnitL01 | STR_PXI | str p3, [x21, #-78, mul vl] // STR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w14, [x9, x17] // STR <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x5, [x0, x22] // STR <Xt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w24, [x21, w29, uxtw] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x27, [x26, w24, uxtw] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w28, [x29, w29, sxtw] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x25, [x1, w24, sxtw] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w2, [x24, x12, sxtx] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x3, [x24, x27, sxtx] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w29, [x30, w30, uxtw #2] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x5, [x13, w8, uxtw #3] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w28, [x7, w24, sxtw #2] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x23, [x2, w26, sxtw #3] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w11, [x8, x30, sxtx #2] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x20, [x4, x2, sxtx #3] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w8, [x11, x10, lsl #2] // STR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x27, [x2, x11, lsl #3] // STR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroX | str b14, [x13, x25] // STR <Bt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroW | str b30, [x16, w26, uxtw] // STR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroW | str b20, [x19, w3, sxtw] // STR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroX | str b13, [x29, x19, sxtx] // STR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h16, [x5, x24] // STR <Ht>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h15, [x15, w15, uxtw] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h3, [x6, w15, sxtw] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h2, [x1, x28, sxtx] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h30, [x29, w30, uxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h10, [x21, w11, sxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h0, [x15, x9, sxtx #1] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h13, [x0, x26, lsl #1] // STR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Store vector reg, register offset, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s2, [x16, x17] // STR <St>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s20, [x24, w10, uxtw] // STR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s4, [x9, w14, sxtw] // STR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s3, [x23, x26, sxtx] // STR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s27, [x17, w9, uxtw #2] // STR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s0, [x11, w20, sxtw #2] // STR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s20, [x17, x14, sxtx #2] // STR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s0, [x15, x28, lsl #2] // STR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d5, [x26, x6] // STR <Dt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d11, [x9, w5, uxtw] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d16, [x20, w8, sxtw] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d0, [x12, x9, sxtx] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d4, [x21, w25, uxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d28, [x20, w4, sxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d20, [x13, x23, sxtx #3] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d31, [x19, x28, lsl #3] // STR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q13, [x24, x1] // STR <Qt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q1, [x25, w9, uxtw] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q25, [x20, w15, sxtw] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q25, [x0, x15, sxtx] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, Q-form \\ 3 2 2 2.00 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q6, [x13, w0, uxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q27, [x4, w15, sxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q3, [x23, x0, sxtx #4] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q27, [x1, x28, lsl #4] // STR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Store vector reg, register offset, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STR_ZXI | str z3, [x0] // STR <Zt>, [<Xn|SP>] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STR_ZXI | str z8, [x6, #188, mul vl] // STR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRBBpost | strb w23, [x11], #34 // STRB <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRBBpre | strb w5, [x19, #-175]! // STRB <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBui | strb w18, [x30] // STRB <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBui | strb w12, [x9, #2315] // STRB <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroW | strb w5, [x26, w7, uxtw] // STRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroW | strb w18, [x2, w28, sxtw] // STRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroX | strb w21, [x21, x7, sxtx] // STRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroX | strb w9, [x6, x21] // STRB <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHpost | strh w21, [x8], #192 // STRH <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHpre | strh w8, [x26, #-204]! // STRH <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHui | strh w6, [x7] // STRH <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHui | strh w0, [x19, #7514] // STRH <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroX | strh w12, [x0, x11] // STRH <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroW | strh w5, [x18, w8, uxtw] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroW | strh w28, [x29, w0, sxtw] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroX | strh w7, [x17, x0, sxtx] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroW | strh w7, [x2, w14, uxtw #1] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroW | strh w7, [x16, w29, sxtw #1] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroX | strh w5, [x1, x13, sxtx #1] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store register, register offset, extend, scale by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroX | strh w14, [x28, x2, lsl #1] // STRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Store register, register offset, scaled by 1 \\ 2 1 1 2.00 V1UnitD, V1UnitL, V1UnitL01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRWi | sttr w17, [x20] // STTR <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRWi | sttr w14, [x30, #-35] // STTR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRXi | sttr x10, [x16] // STTR <Xt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRXi | sttr x16, [x8, #-25] // STTR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRBi | sttrb w13, [x2] // STTRB <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRBi | sttrb w0, [x20, #-114] // STTRB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRHi | sttrh w26, [x11] // STTRH <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRHi | sttrh w11, [x30, #-78] // STTRH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURBi | stur b29, [x8] // STUR <Bt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURBi | stur b5, [x0, #80] // STUR <Bt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x15] // STUR <Ht>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x12, #-227] // STUR <Ht>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURSi | stur s10, [x4] // STUR <St>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURSi | stur s9, [x14, #21] // STUR <St>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURDi | stur d1, [x28] // STUR <Dt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURDi | stur d6, [x6, #188] // STUR <Dt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURQi | stur q6, [x16] // STUR <Qt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURQi | stur q5, [x13, #-253] // STUR <Qt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURWi | stur w29, [x27] // STUR <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURWi | stur w14, [x2, #-34] // STUR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURXi | stur x29, [x10] // STUR <Xt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURXi | stur x30, [x25, #127] // STUR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURBBi | sturb w21, [x5] // STURB <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURBBi | sturb w25, [x26, #-117] // STURB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURHHi | sturh w0, [x11] // STURH <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURHHi | sturh w7, [x10, #-209] // STURH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPW | stxp w29, w24, w6, [x9] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPW | stxp w26, w19, w22, [x11] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPX | stxp w30, x6, x3, [x1] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPX | stxp w7, x2, x10, [x25] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRW | stxr w19, w21, [x9] // STXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRW | stxr w25, w1, [x24] // STXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRX | stxr w25, x30, [x28] // STXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRX | stxr w30, x20, [x23] // STXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRB | stxrb w0, w26, [x10] // STXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRB | stxrb w10, w16, [x25] // STXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRH | stxrh w0, w20, [x8] // STXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRH | stxrh w12, w14, [x1] // STXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrx | sub w13, wsp, w10 // SUB <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrx | sub w22, wsp, w13, uxtb // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrx | sub w18, wsp, w23, sxtb #1 // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrx | sub w13, wsp, w8, lsl #4 // SUB <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x6, x8, x22 // SUB <Xd>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrx | sub x16, x2, w19, uxtb // SUB <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 2 2 2.00 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrx | sub x16, x3, w27, uxtb #2 // SUB <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x4, x13, x16, lsl #3 // SUB <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWri | sub wsp, wsp, #50 // SUB <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWri | sub wsp, wsp, #84, lsl #12 // SUB <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXri | sub x18, x22, #36 // SUB <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXri | sub x17, x20, #184 // SUB <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_B | sub z18.b, z18.b, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_S | sub z22.s, z22.s, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_H | sub z15.h, z15.h, #50176 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrs | sub w0, w21, w2, lsl #4 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | sub w22, w7, w13, lsl #19 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | sub w1, w18, w16, asr #4 // SUB <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x27, x29, x16, lsl #1 // SUB <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | sub x24, x10, x15, lsl #35 // SUB <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | sub x24, x19, x13, lsr #20 // SUB <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBv1i64 | sub d18, d25, d0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBv2i32 | sub v15.2s, v14.2s, v11.2s // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZPmZ_H | sub z18.h, p4/m, z18.h, z7.h // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZZZ_B | sub z29.b, z19.b, z8.b // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBHNv4i32_v4i16 | subhn v7.4h, v10.4s, v13.4s // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBHNv2i64_v4i32 | subhn2 v24.4s, v24.2d, v8.2d // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_B | subr z13.b, z13.b, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_S | subr z17.s, z17.s, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_D | subr z15.d, z15.d, #100 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZPmZ_D | subr z21.d, p7/m, z21.d, z24.d // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | subs w25, wsp, w13 // SUBS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | subs w10, wsp, w9, uxth // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | subs w20, wsp, w3, sxth #2 // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrx | subs w12, wsp, w27, lsl #4 // SUBS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x16, x20, x21 // SUBS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrx | subs x15, x2, w11, uxtb // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrx64 | subs x13, x15, x14, sxtx #1 // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x30, x1, x26, lsl #3 // SUBS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWri | subs w25, wsp, #239 // SUBS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWri | subs w13, wsp, #75, lsl #12 // SUBS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXri | subs x9, x3, #173 // SUBS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXri | subs x30, x25, #82, lsl #12 // SUBS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | subs w16, w27, w25 // SUBS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | subs w0, w30, w27, lsl #4 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | subs w17, w27, w3, lsl #20 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | subs w27, w7, w27, asr #5 // SUBS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x21, x22, x17 // SUBS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x18, x1, x5 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | subs x28, x26, x4, lsl #49 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | subs x26, x14, x30, lsr #35 // SUBS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SUDOTlanev8i8 | sudot v4.2s, v20.8b, v18.4b[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SUDOT_ZZZI | sudot z5.s, z30.b, z3.b[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUNPKHI_ZZ_D | sunpkhi z22.d, z16.s // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUNPKLO_ZZ_H | sunpklo z10.h, z0.b // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUQADDv1i8 | suqadd b15, b21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUQADDv16i8 | suqadd v26.16b, v27.16b // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SVC | svc #0x89cb // SVC #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sxtb w7, w20 // SXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxtb x18, w14 // SXTB <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTB_ZPmZ_H | sxtb z16.h, p5/m, z15.h // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTH_ZPmZ_S | sxth z4.s, p7/m, z11.s // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTW_ZPmZ_D | sxtw z12.d, p1/m, z16.d // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sxth w23, w2 // SXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxth x22, w17 // SXTH <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v4.8h, v21.8b, #0 // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v20.2d, v30.4s, #0 // SXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxtw x18, w22 // SXTW <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | sys #6, c6, c0, #3 // SYS #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | sys #7, c12, c5, #3, x8 // SYS #<op1>, <Cn>, <Cm>, #<op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSLxt | sysl x16, #5, c11, c8, #5 // SYSL <Xt>, #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBLv8i8Two | tbl v7.8b, { v2.16b, v3.16b }, v17.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 2 2 2 1.00 V1UnitV[2], V1UnitV01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBLv16i8Three | tbl v3.16b, { v10.16b, v11.16b, v12.16b }, v29.16b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 3 table regs \\ 2 4 4 1.00 V1UnitV01[2]
+# CHECK-NEXT: 3 | 4 | 4 | 0.67 | V1UnitV[3], V1UnitV01[3] | TBLv8i8Four | tbl v9.8b, { v22.16b, v23.16b, v24.16b, v25.16b }, v14.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 4 table regs \\ 3 4 4 0.67 V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBLv16i8One | tbl v29.16b, { v3.16b }, v17.16b // TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 2 2 2 1.00 V1UnitV[2], V1UnitV01[2]
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBNZW | tbnz w3, #28, test // TBNZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBNZX | tbnz x30, #48, test // TBNZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBXv8i8Two | tbx v25.8b, { v13.16b, v14.16b }, v30.8b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 2 table reg \\ 2 4 4 1.00 V1UnitV01[2]
+# CHECK-NEXT: 3 | 6 | 6 | 0.67 | V1UnitV[3], V1UnitV01[3] | TBXv16i8Three | tbx v22.16b, { v3.16b, v4.16b, v5.16b }, v25.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 3 table reg \\ 3 6 6 0.67 V1UnitV01[3]
+# CHECK-NEXT: 5 | 6 | 6 | 0.40 | V1UnitV[5], V1UnitV01[5] | TBXv16i8Four | tbx v23.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v26.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 4 table reg \\ 5 6 6 0.40 V1UnitV[5], V1UnitV01[5]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBXv8i8One | tbx v16.8b, { v21.16b }, v18.8b // TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 1 table reg \\ 2 2 2 1.00 V1UnitV[2], V1UnitV01[2]
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBZW | tbz w17, #16, test // TBZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBZX | tbz x22, #41, test // TBZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | tlbi vmalle1 // TLBI <tlbi_op> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | tlbi ipas2e1is, x7 // TLBI <tlbi_op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | TRN1v2i32 | trn1 v30.2s, v21.2s, v25.2s // TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | TRN1_PPP_S | trn1 p1.s, p4.s, p0.s // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | TRN2_PPP_H | trn2 p0.h, p5.h, p7.h // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | TRN2v2i64 | trn2 v27.2d, v29.2d, v10.2d // TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWri | tst w25, #0xe00 // TST <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXri | tst x3, #0x1e00 // TST <Xn>, #<immd> \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | tst w9, w14 // TST <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | tst w10, w3, asr #16 // TST <Wn>, <Wm>, <shift> #<wamount> \\ Test/Compare, shift by immed \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | tst x11, x28 // TST <Xn>, <Xm> \\ ALU, basic, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | tst x9, x7, asr #33 // TST <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 1 2 2 2.00 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABAv16i8 | uaba v13.16b, v14.16b, v19.16b // UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABALv2i32_v2i64 | uabal v13.2d, v16.2s, v11.2s // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABALv8i16_v4i32 | uabal2 v17.4s, v0.8h, v1.8h // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDv4i32 | uabd v23.4s, v4.4s, v30.4s // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UABD_ZPmZ_B | uabd z5.b, p5/m, z5.b, z10.b // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDLv4i16_v4i32 | uabdl v13.4s, v26.4h, v7.4h // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDLv4i32_v2i64 | uabdl2 v15.2d, v9.4s, v10.4s // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UADALPv2i32_v1i64 | uadalp v31.1d, v14.2s // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLv8i8_v8i16 | uaddl v29.8h, v8.8b, v31.8b // UADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLv8i16_v4i32 | uaddl2 v15.4s, v22.8h, v14.8h // UADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLPv2i32_v1i64 | uaddlp v15.1d, v5.2s // UADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | UADDLVv8i8v | uaddlv h24, v24.8b // UADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UADDLVv16i8v | uaddlv h19, v31.16b // UADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv4i16v | uaddlv s12, v24.4h // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | UADDLVv8i16v | uaddlv s30, v0.8h // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv4i32v | uaddlv d6, v19.4s // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 5 | 14 | 14 | 0.50 | V1UnitV[5], V1UnitV0, V1UnitV1[2], V1UnitV01[3], V1UnitV02, V1UnitV13[3] | UADDV_VPZ_B | uaddv d9, p5, z1.b // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | UADDV_VPZ_H | uaddv d26, p0, z25.h // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 10 | 10 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | UADDV_VPZ_S | uaddv d4, p1, z1.s // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+# CHECK-NEXT: 2 | 8 | 8 | 2.00 | V1UnitV[2], V1UnitV01 | UADDV_VPZ_D | uaddv d28, p6, z6.d // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDWv2i32_v2i64 | uaddw v17.2d, v9.2d, v12.2s // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDWv8i16_v4i32 | uaddw2 v15.4s, v13.4s, v4.8h // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | ubfiz w11, w6, #30, #1 // UBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | ubfiz x27, x15, #49, #9 // UBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsl w19, w16, #7 // UBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | ubfiz x4, x30, #5, #51 // UBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | ubfx w13, w18, #25, #3 // UBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsr x23, x26, #59 // UBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSWHri | ucvtf h8, w24, #16 // UCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSWSri | ucvtf s7, w16, #29 // UCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSWDri | ucvtf d5, w17, #23 // UCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSXHri | ucvtf h13, x17, #12 // UCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSXSri | ucvtf s25, x2, #37 // UCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSXDri | ucvtf d20, x11, #43 // UCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUWHri | ucvtf h30, w4 // UCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUWSri | ucvtf s22, w8 // UCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUWDri | ucvtf d8, w15 // UCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXHri | ucvtf h17, x12 // UCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXSri | ucvtf s8, x0 // UCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXDri | ucvtf d22, x17 // UCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFh | ucvtf h22, h16, #11 // UCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFs | ucvtf s17, s18, #18 // UCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFd | ucvtf d19, d1, #2 // UCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4i16_shift | ucvtf v18.4h, v11.4h, #7 // UCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv8i16_shift | ucvtf v22.8h, v20.8h, #10 // UCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2i32_shift | ucvtf v16.2s, v17.2s, #11 // UCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4i32_shift | ucvtf v17.4s, v23.4s, #2 // UCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2i64_shift | ucvtf v18.2d, v20.2d, #60 // UCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv1i16 | ucvtf h7, h21 // UCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv1i32 | ucvtf s25, s7 // UCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 2 4 4 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i64 | ucvtf d30, d29 // UCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4f16 | ucvtf v9.4h, v25.4h // UCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 4 | 6 | 6 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv8f16 | ucvtf v24.8h, v31.8h // UCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 4 6 6 1.00 V1UnitV[2], V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2f32 | ucvtf v14.2s, v2.2s // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4f32 | ucvtf v20.4s, v0.4s // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 2 4 4 1.00 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2f64 | ucvtf v27.2d, v3.2d // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 4 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | UCVTF_ZPmZ_HtoH | ucvtf z31.h, p5/m, z30.h // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 4 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoH | ucvtf z23.h, p7/m, z9.s // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoS | ucvtf z1.s, p1/m, z10.s // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_StoD | ucvtf z24.d, p5/m, z9.s // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 3 3 1.00 V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoH | ucvtf z30.h, p2/m, z24.d // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoS | ucvtf z9.s, p5/m, z9.d // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoD | ucvtf z18.d, p6/m, z19.d // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 12 | 12 | 0.20 | V1UnitI[5], V1UnitM[5], V1UnitM0[5] | UDIVWr | udiv w12, w17, w22 // UDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[13]
+# CHECK-NEXT: 1 | 20 | 20 | 0.20 | V1UnitI[5], V1UnitM[5], V1UnitM0[5] | UDIVXr | udiv x7, x2, x23 // UDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[21]
+# CHECK-NEXT: 1 | 12 | 12 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | UDIV_ZPmZ_S | udiv z30.s, p5/m, z30.s, z10.s // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 20 | 20 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | UDIV_ZPmZ_D | udiv z31.d, p5/m, z31.d, z29.d // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+# CHECK-NEXT: 1 | 12 | 12 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | UDIVR_ZPmZ_S | udivr z19.s, p4/m, z19.s, z8.s // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 20 | 20 | 0.14 | V1UnitV[7], V1UnitV0[7], V1UnitV01[7], V1UnitV02[7] | UDIVR_ZPmZ_D | udivr z3.d, p5/m, z3.d, z8.d // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[21]
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | UDOT_ZZZI_S | udot z0.s, z5.b, z4.b[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UDOT_ZZZI_D | udot z19.d, z1.h, z13.h[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | UDOT_ZZZ_S | udot z22.s, z29.b, z4.b // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UDOT_ZZZ_D | udot z9.d, z1.h, z11.h // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTlanev8i8 | udot v10.2s, v11.8b, v21.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTlanev16i8 | udot v7.4s, v21.16b, v6.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTv8i8 | udot v19.2s, v31.8b, v17.8b // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UHADDv8i16 | uhadd v10.8h, v7.8h, v7.8h // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UHSUBv4i16 | uhsub v12.4h, v16.4h, v28.4h // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UMADDLrrr | umaddl x9, w28, w9, x19 // UMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMAX_ZI_B | umax z8.b, z8.b, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMAX_ZPmZ_B | umax z27.b, p1/m, z27.b, z13.b // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMAXv16i8 | umax v7.16b, v11.16b, v7.16b // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMAXPv8i16 | umaxp v15.8h, v8.8h, v12.8h // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | UMAXVv8i8v | umaxv b19, v7.8b // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UMAXVv16i8v | umaxv b12, v10.16b // UMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv4i16v | umaxv h27, v5.4h // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | UMAXVv8i16v | umaxv h11, v22.8h // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv4i32v | umaxv s5, v25.4s // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 5 | 14 | 14 | 0.50 | V1UnitV[5], V1UnitV0, V1UnitV1[2], V1UnitV01[3], V1UnitV02, V1UnitV13[3] | UMAXV_VPZ_B | umaxv b9, p7, z19.b // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | UMAXV_VPZ_H | umaxv h8, p7, z26.h // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 10 | 10 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | UMAXV_VPZ_S | umaxv s15, p2, z28.s // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+# CHECK-NEXT: 2 | 8 | 8 | 2.00 | V1UnitV[2], V1UnitV01 | UMAXV_VPZ_D | umaxv d11, p4, z11.d // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMIN_ZI_S | umin z21.s, z21.s, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMIN_ZPmZ_S | umin z31.s, p2/m, z31.s, z4.s // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMINv16i8 | umin v0.16b, v26.16b, v2.16b // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMINPv4i32 | uminp v28.4s, v16.4s, v15.4s // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | UMINVv8i8v | uminv b23, v21.8b // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UMINVv16i8v | uminv b3, v10.16b // UMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMINVv4i16v | uminv h6, v22.4h // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitV[2], V1UnitV13 | UMINVv8i16v | uminv h23, v3.8h // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMINVv4i32v | uminv s29, v19.4s // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 5 | 14 | 14 | 0.50 | V1UnitV[5], V1UnitV0, V1UnitV1[2], V1UnitV01[3], V1UnitV02, V1UnitV13[3] | UMINV_VPZ_B | uminv b2, p5, z8.b // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 5 14 14 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | UMINV_VPZ_H | uminv h28, p0, z0.h // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 4 12 12 0.50 V1UnitV1[2]
+# CHECK-NEXT: 4 | 10 | 10 | 0.50 | V1UnitV[4], V1UnitV1[2], V1UnitV01[3], V1UnitV13[2] | UMINV_VPZ_S | uminv s10, p1, z29.s // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 4 10 10 0.50 V1UnitV1[2]
+# CHECK-NEXT: 2 | 8 | 8 | 2.00 | V1UnitV[2], V1UnitV01 | UMINV_VPZ_D | uminv d24, p5, z29.d // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 2.00 V1UnitV[2], V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i16_indexed | umlal v22.4s, v14.4h, v0.h[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv2i32_indexed | umlal v28.2d, v31.2s, v0.s[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv8i16_indexed | umlal2 v31.4s, v7.8h, v15.h[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i32_indexed | umlal2 v10.2d, v4.4s, v3.s[2] // UMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i16_v4i32 | umlal v29.4s, v20.4h, v30.4h // UMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i32_v2i64 | umlal2 v10.2d, v28.4s, v19.4s // UMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv4i16_indexed | umlsl v21.4s, v12.4h, v7.h[5] // UMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv2i32_indexed | umlsl v20.2d, v20.2s, v2.s[0] // UMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv8i16_indexed | umlsl2 v27.4s, v28.8h, v6.h[4] // UMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv4i32_indexed | umlsl2 v30.2d, v23.4s, v1.s[2] // UMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv2i32_v2i64 | umlsl v11.2d, v23.2s, v1.2s // UMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv16i8_v8i16 | umlsl2 v11.8h, v20.16b, v2.16b // UMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UMMLA | ummla v14.4s, v17.16b, v25.16b // UMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | UMSUBLrrr | umnegl x23, w5, w23 // UMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi8_idx0 | umov w6, v22.b[0] // UMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi8 | umov w29, v0.b[11] // UMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi16_idx0 | umov w10, v25.h[0] // UMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi16 | umov w6, v7.h[3] // UMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi32_idx0 | mov w8, v8.s[0] // UMOV <Wd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi32 | mov w20, v1.s[3] // UMOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi64_idx0 | mov x20, v11.d[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMOVvi64 | mov x29, v7.d[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UMSUBLrrr | umsubl x21, w16, w28, x6 // UMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_B | umulh z20.b, p4/m, z20.b, z6.b // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_H | umulh z30.h, p6/m, z30.h, z15.h // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_S | umulh z11.s, p7/m, z11.s, z8.s // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UMULH_ZPmZ_D | umulh z3.d, p3/m, z3.d, z2.d // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.50 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | UMULHrr | umulh x23, x22, x19 // UMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | UMADDLrrr | umull x5, w17, w23 // UMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i16_indexed | umull v27.4s, v1.4h, v8.h[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv2i32_indexed | umull v22.2d, v28.2s, v6.s[1] // UMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv8i16_indexed | umull2 v18.4s, v26.8h, v10.h[1] // UMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i32_indexed | umull2 v28.2d, v21.4s, v1.s[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i16_v4i32 | umull v23.4s, v26.4h, v19.4h // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv16i8_v8i16 | umull2 v11.8h, v29.16b, v29.16b // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_B | uqadd z18.b, z18.b, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_S | uqadd z2.s, z2.s, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_S | uqadd z24.s, z24.s, #56 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZZZ_H | uqadd z6.h, z28.h, z5.h // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQADDv1i32 | uqadd s0, s24, s30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQADDv2i64 | uqadd v14.2d, v22.2d, v20.2d // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w10 // UQDECB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w8, vl3 // UQDECB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w3, vl32 // UQDECB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_XPiI | uqdecb x8 // UQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_XPiI | uqdecb x3, vl5 // UQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_XPiI | uqdecb x22, mul3, mul #2 // UQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_WPiI | uqdecd w11 // UQDECD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_WPiI | uqdecd w27, vl256 // UQDECD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_WPiI | uqdecd w6, vl32, mul #10 // UQDECD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x1 // UQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x12, vl8 // UQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x10, vl64, mul #10 // UQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECD_ZPiI | uqdecd z0.d // UQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECD_ZPiI | uqdecd z8.d, vl3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECD_ZPiI | uqdecd z27.d, vl16, mul #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w30 // UQDECH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w28, mul3 // UQDECH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w5, vl5, mul #8 // UQDECH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x2 // UQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x15, vl7 // UQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x17, vl256, mul #10 // UQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECH_ZPiI | uqdech z5.h // UQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECH_ZPiI | uqdech z16.h, vl128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECH_ZPiI | uqdech z27.h, vl128, mul #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECP_WP_H | uqdecp w19, p5.h // UQDECP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECP_XP_B | uqdecp x1, p1.b // UQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01 | UQDECP_ZP_S | uqdecp z20.s, p0.s // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w17 // UQDECW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w11, vl256 // UQDECW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w13, mul4, mul #13 // UQDECW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x7 // UQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x28, vl32 // UQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x0, vl256, mul #3 // UQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECW_ZPiI | uqdecw z29.s // UQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECW_ZPiI | uqdecw z22.s, vl2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQDECW_ZPiI | uqdecw z20.s, vl2, mul #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w2 // UQINCB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w21, vl128 // UQINCB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w0, all, mul #13 // UQINCB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_XPiI | uqincb x24 // UQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_XPiI | uqincb x18, vl7 // UQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_XPiI | uqincb x13, vl256, mul #13 // UQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_WPiI | uqincd w23 // UQINCD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_WPiI | uqincd w27, vl4 // UQINCD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_WPiI | uqincd w7, vl32, mul #16 // UQINCD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x0 // UQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x29, mul4 // UQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x20, pow2, mul #3 // UQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCD_ZPiI | uqincd z29.d // UQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCD_ZPiI | uqincd z4.d, vl64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCD_ZPiI | uqincd z12.d, vl6, mul #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w4 // UQINCH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w23, mul3 // UQINCH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w27, vl7, mul #3 // UQINCH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x8 // UQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x13, mul3 // UQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x5, mul4, mul #9 // UQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCH_ZPiI | uqinch z21.h // UQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCH_ZPiI | uqinch z1.h, vl8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCH_ZPiI | uqinch z7.h, vl7, mul #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCP_WP_D | uqincp w4, p5.d // UQINCP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCP_XP_D | uqincp x13, p5.d // UQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01 | UQINCP_ZP_S | uqincp z1.s, p0.s // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.50 V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV, V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w13 // UQINCW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w26, vl8 // UQINCW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w3, vl16, mul #13 // UQINCW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x26 // UQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x13, vl256 // UQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x29, vl7, mul #6 // UQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCW_ZPiI | uqincw z26.s // UQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCW_ZPiI | uqincw z31.s, vl5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UQINCW_ZPiI | uqincw z12.s, vl7, mul #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 1.00 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHLv1i32 | uqrshl s17, s5, s8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHLv8i8 | uqrshl v25.8b, v13.8b, v23.8b // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQRSHRNb | uqrshrn b12, h9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQRSHRNh | uqrshrn h1, s28, #2 // UQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQRSHRNs | uqrshrn s1, d4, #12 // UQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv8i8_shift | uqrshrn v17.8b, v24.8h, #4 // UQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv4i16_shift | uqrshrn v29.4h, v25.4s, #10 // UQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv2i32_shift | uqrshrn v16.2s, v0.2d, #10 // UQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv16i8_shift | uqrshrn2 v5.16b, v28.8h, #6 // UQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv8i16_shift | uqrshrn2 v28.8h, v22.4s, #15 // UQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv4i32_shift | uqrshrn2 v20.4s, v13.2d, #4 // UQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLb | uqshl b16, b25, #3 // UQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLh | uqshl h22, h27, #3 // UQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLs | uqshl s9, s5, #2 // UQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLd | uqshl d25, d1, #30 // UQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv16i8_shift | uqshl v25.16b, v0.16b, #7 // UQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv4i16_shift | uqshl v1.4h, v12.4h, #15 // UQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv2i32_shift | uqshl v23.2s, v4.2s, #17 // UQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv2i64_shift | uqshl v28.2d, v23.2d, #48 // UQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv1i8 | uqshl b22, b26, b2 // UQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv4i16 | uqshl v8.4h, v17.4h, v13.4h // UQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSHRNb | uqshrn b16, h27, #6 // UQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSHRNh | uqshrn h4, s2, #15 // UQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSHRNs | uqshrn s0, d15, #22 // UQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv8i8_shift | uqshrn v19.8b, v26.8h, #3 // UQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv4i16_shift | uqshrn v31.4h, v17.4s, #8 // UQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv2i32_shift | uqshrn v1.2s, v11.2d, #9 // UQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv16i8_shift | uqshrn2 v23.16b, v16.8h, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv8i16_shift | uqshrn2 v1.8h, v12.4s, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv4i32_shift | uqshrn2 v30.4s, v29.2d, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_B | uqsub z26.b, z26.b, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_S | uqsub z19.s, z19.s, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_H | uqsub z15.h, z15.h, #26624 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZZZ_D | uqsub z25.d, z13.d, z19.d // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSUBv1i32 | uqsub s16, s21, s6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSUBv4i32 | uqsub v19.4s, v0.4s, v5.4s // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv1i32 | uqxtn s3, d27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv2i32 | uqxtn v26.2s, v5.2d // UQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv16i8 | uqxtn2 v15.16b, v22.8h // UQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | URECPEv2i32 | urecpe v10.2s, v8.2s // URECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | URECPEv4i32 | urecpe v1.4s, v23.4s // URECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | URHADDv2i32 | urhadd v16.2s, v19.2s, v2.2s // URHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHLv1i64 | urshl d24, d22, d29 // URSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHLv8i8 | urshl v31.8b, v5.8b, v3.8b // URSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | URSHRd | urshr d23, d19, #62 // URSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv16i8_shift | urshr v23.16b, v14.16b, #2 // URSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv4i16_shift | urshr v16.4h, v13.4h, #7 // URSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv4i32_shift | urshr v10.4s, v10.4s, #21 // URSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv2i64_shift | urshr v2.2d, v16.2d, #30 // URSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | URSQRTEv2i32 | ursqrte v15.2s, v20.2s // URSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | URSQRTEv4i32 | ursqrte v31.4s, v14.4s // URSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 2.00 V1UnitV, V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | URSRAd | ursra d24, d24, #48 // URSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv8i8_shift | ursra v14.8b, v18.8b, #1 // URSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv4i16_shift | ursra v9.4h, v9.4h, #16 // URSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv2i32_shift | ursra v25.2s, v17.2s, #9 // URSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv2i64_shift | ursra v17.2d, v16.2d, #61 // URSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOTlanev8i8 | usdot v0.2s, v18.8b, v10.4b[3] // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOT_ZZZI | usdot z5.s, z25.b, z2.b[1] // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOTv8i8 | usdot v17.2s, v0.8b, v29.8b // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOT_ZZZ | usdot z8.s, z6.b, z18.b // USDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLv1i64 | ushl d7, d17, d3 // USHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLv8i8 | ushl v6.8b, v26.8b, v6.8b // USHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv8i8_shift | ushll v18.8h, v24.8b, #4 // USHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v12.4s, v10.4h, #3 // USHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv2i32_shift | ushll v16.2d, v16.2s, #31 // USHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #3 // USHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv8i16_shift | ushll2 v18.4s, v22.8h, #13 // USHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i32_shift | ushll2 v31.2d, v12.4s, #11 // USHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USHRd | ushr d23, d22, #58 // USHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 4.00 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv8i8_shift | ushr v24.8b, v0.8b, #2 // USHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv8i16_shift | ushr v21.8h, v31.8h, #11 // USHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv2i32_shift | ushr v27.2s, v24.2s, #14 // USHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv2i64_shift | ushr v0.2d, v27.2d, #48 // USHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USMMLA | usmmla v25.4s, v10.16b, v11.16b // USMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USQADDv1i16 | usqadd h14, h13 // USQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USQADDv2i64 | usqadd v18.2d, v23.2d // USQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USRAd | usra d22, d24, #9 // USRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv16i8_shift | usra v16.16b, v5.16b, #5 // USRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv4i16_shift | usra v18.4h, v22.4h, #11 // USRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv2i32_shift | usra v13.2s, v12.2s, #24 // USRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv2i64_shift | usra v30.2d, v30.2d, #41 // USRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBLv4i16_v4i32 | usubl v22.4s, v18.4h, v3.4h // USUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBLv16i8_v8i16 | usubl2 v12.8h, v23.16b, v15.16b // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBWv8i8_v8i16 | usubw v30.8h, v12.8h, v20.8b // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBWv8i16_v4i32 | usubw2 v2.4s, v0.4s, v30.8h // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UUNPKHI_ZZ_D | uunpkhi z26.d, z26.s // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UUNPKLO_ZZ_S | uunpklo z10.s, z11.h // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | uxtb w2, w23 // UXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTB_ZPmZ_D | uxtb z1.d, p2/m, z11.d // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTH_ZPmZ_S | uxth z6.s, p3/m, z18.s // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTW_ZPmZ_D | uxtw z23.d, p4/m, z3.d // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | uxth w7, w14 // UXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v1.4s, v22.4h, #0 // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #0 // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UZP1v2i32 | uzp1 v9.2s, v29.2s, v20.2s // UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UZP1_PPP_D | uzp1 p5.d, p3.d, p5.d // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UZP2_PPP_S | uzp2 p6.s, p0.s, p6.s // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UZP2v4i32 | uzp2 v18.4s, v12.4s, v31.4s // UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | wfe // WFE \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | wfi // WFI \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELE_PXX_H | whilele p6.h, x28, x30 // WHILELE <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELO_PXX_B | whilelo p3.b, x9, x7 // WHILELO <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELS_PWW_B | whilels p4.b, w4, w20 // WHILELS <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELT_PXX_S | whilelt p7.s, x20, x6 // WHILELT <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 2 3 3 0.50 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | WRFFR | wrffr p7.b // WRFFR <Pn>.B \\ Write to first fault register \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | XTNv8i8 | xtn v20.8b, v17.8h // XTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | XTNv16i8 | xtn2 v31.16b, v26.8h // XTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | yield // YIELD \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ZIP1v2i64 | zip1 v21.2d, v4.2d, v11.2d // ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ZIP1_PPP_D | zip1 p0.d, p1.d, p4.d // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ZIP2_PPP_S | zip2 p3.s, p5.s, p4.s // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ZIP2v4i32 | zip2 v2.4s, v20.4s, v5.4s // ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
diff --git a/llvm/tools/llvm-mca/CMakeLists.txt b/llvm/tools/llvm-mca/CMakeLists.txt
index 4ef8b9afa12a7..bcccf17d4d64b 100644
--- a/llvm/tools/llvm-mca/CMakeLists.txt
+++ b/llvm/tools/llvm-mca/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_tool(llvm-mca
Views/BottleneckAnalysis.cpp
Views/DispatchStatistics.cpp
Views/InstructionInfoView.cpp
+ Views/SchedulingInfoView.cpp
Views/InstructionView.cpp
Views/RegisterFileStatistics.cpp
Views/ResourcePressureView.cpp
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/llvm/tools/llvm-mca/Views/InstructionInfoView.h
index 3befafda90a38..2827f718cc318 100644
--- a/llvm/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -66,6 +66,7 @@ class InstructionInfoView : public InstructionView {
struct InstructionInfoViewData {
unsigned NumMicroOpcodes = 0;
unsigned Latency = 0;
+ unsigned Advance = 0; // ReadAvance Bypasses cycles
std::optional<double> RThroughput = 0.0;
bool mayLoad = false;
bool mayStore = false;
diff --git a/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp b/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp
new file mode 100644
index 0000000000000..39c9caf685ba8
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp
@@ -0,0 +1,212 @@
+//===--------------------- SchedulingInfoView.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the SchedulingInfoView API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SchedulingInfoView.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace mca {
+
+void SchedulingInfoView::getComment(const MCInst &MCI,
+ std::string &CommentString) const {
+ StringRef s = MCI.getLoc().getPointer();
+ std::string InstrStr;
+ size_t pos = 0, pos_cmt = 0;
+
+ // Recognized comments are after assembly instructions on the same line.
+ // It is usefull to add in comment scheduling information from architecture
+ // specification.
+ // '#' comment mark is not supported by llvm-mca
+
+ CommentString = "";
+ if ((pos = s.find("\n")) != std::string::npos) {
+ InstrStr = s.substr(0, pos);
+ // C style comment
+ if (((pos_cmt = InstrStr.find("/*")) != std::string::npos) &&
+ ((pos = InstrStr.find("*/")) != std::string::npos)) {
+ CommentString = InstrStr.substr(pos_cmt, pos);
+ return;
+ }
+ // C++ style comment
+ if ((pos_cmt = InstrStr.find("//")) != std::string::npos) {
+ CommentString = InstrStr.substr(pos_cmt, pos);
+ return;
+ }
+ }
+ return;
+}
+
+void SchedulingInfoView::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ std::string CommentString;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ ArrayRef<MCInst> Source = getSource();
+ if (!Source.size())
+ return;
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ FOS << "\n\nResources:\n";
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip invalid resources with zero units.
+ if (!NumUnits)
+ continue;
+
+ FOS << '[' << ResourceIndex << ']';
+ FOS.PadToColumn(6);
+ FOS << "- " << ProcResource.Name << ':' << NumUnits;
+ if (ProcResource.SubUnitsIdxBegin) {
+ FOS.PadToColumn(20);
+ for (unsigned U = 0; U < NumUnits; ++U) {
+ FOS << SM.getProcResource(ProcResource.SubUnitsIdxBegin[U])->Name;
+ if ((U + 1) < NumUnits) {
+ FOS << ", ";
+ }
+ }
+ }
+ FOS << '\n';
+ ResourceIndex++;
+ }
+
+ FOS << "\n\nScheduling Info:\n";
+ FOS << "[1]: #uOps\n[2]: Latency\n[3]: Bypass Latency\n"
+ << "[4]: Throughput\n[5]: Resources\n"
+ << "[6]: LLVM OpcodeName\n[7]: Instruction\n"
+ << "[8]: Comment if any\n";
+
+ // paddings for each scheduling info output. Start at [2]
+ std::vector<unsigned> paddings = {7, 12, 18, 27, 94, 113, 150};
+ for (unsigned i = 0; i < paddings.size(); i++) {
+ FOS << " [" << i + 1 << "]";
+ FOS.PadToColumn(paddings[i]);
+ }
+ FOS << "[" << paddings.size() + 1 << "]\n";
+
+ for (const auto &[Index, IIVDEntry, Inst] : enumerate(IIVD, Source)) {
+ getComment(Inst, CommentString);
+
+ FOS << " " << IIVDEntry.NumMicroOpcodes;
+ FOS.PadToColumn(paddings[0]);
+ FOS << "| " << IIVDEntry.Latency;
+ FOS.PadToColumn(paddings[1]);
+ FOS << "| " << IIVDEntry.Bypass;
+ FOS.PadToColumn(paddings[2]);
+ if (IIVDEntry.Throughput) {
+ double RT = *IIVDEntry.Throughput;
+ FOS << "| " << format("%.2f", RT);
+ } else {
+ FOS << "| -";
+ }
+ FOS.PadToColumn(paddings[3]);
+ FOS << "| " << IIVDEntry.Resources;
+ FOS.PadToColumn(paddings[4]);
+ FOS << "| " << IIVDEntry.OpcodeName;
+ FOS.PadToColumn(paddings[5]);
+ FOS << "| " << printInstructionString(Inst);
+ FOS.PadToColumn(paddings[6]);
+ FOS << ' ' << CommentString << '\n';
+ }
+
+ FOS.flush();
+ OS << Buffer;
+}
+
+void SchedulingInfoView::collectData(
+ MutableArrayRef<SchedulingInfoViewData> IIVD) const {
+ const MCSubtargetInfo &STI = getSubTargetInfo();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (const auto &[Inst, IIVDEntry] : zip(getSource(), IIVD)) {
+ const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+
+ // Obtain the scheduling class information from the instruction
+ // and instruments.
+ auto IVecIt = InstToInstruments.find(&Inst);
+ unsigned SchedClassID =
+ IVecIt == InstToInstruments.end()
+ ? MCDesc.getSchedClass()
+ : IM.getSchedClassID(MCII, Inst, IVecIt->second);
+ unsigned CPUID = SM.getProcessorID();
+
+ // Try to solve variant scheduling classes.
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID =
+ STI.resolveVariantSchedClass(SchedClassID, &Inst, &MCII, CPUID);
+
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ IIVDEntry.OpcodeName = (std::string)MCII.getName(Inst.getOpcode());
+ IIVDEntry.NumMicroOpcodes = SCDesc.NumMicroOps;
+ IIVDEntry.Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ IIVDEntry.Bypass =
+ IIVDEntry.Latency - MCSchedModel::getForwardingDelayCycles(STI, SCDesc);
+ IIVDEntry.Throughput =
+ 1.0 / MCSchedModel::getReciprocalThroughput(STI, SCDesc);
+ raw_string_ostream TempStream(IIVDEntry.Resources);
+
+ const MCWriteProcResEntry *Index = STI.getWriteProcResBegin(&SCDesc);
+ const MCWriteProcResEntry *Last = STI.getWriteProcResEnd(&SCDesc);
+ auto sep = "";
+ for (; Index != Last; ++Index) {
+ if (!Index->ReleaseAtCycle)
+ continue;
+ const MCProcResourceDesc *MCProc =
+ SM.getProcResource(Index->ProcResourceIdx);
+ if (Index->ReleaseAtCycle != 1) {
+ // Output ReleaseAtCycle between [] if not 1 (default)
+ TempStream << sep
+ << format("%s[%d]", MCProc->Name, Index->ReleaseAtCycle);
+ } else {
+ TempStream << sep << format("%s", MCProc->Name);
+ }
+ sep = ", ";
+ }
+ TempStream.flush();
+ }
+}
+
+// Construct a JSON object from a single SchedulingInfoViewData object.
+json::Object
+SchedulingInfoView::toJSON(const SchedulingInfoViewData &IIVD) const {
+ json::Object JO({{"NumMicroOpcodes", IIVD.NumMicroOpcodes},
+ {"Latency", IIVD.Latency},
+ {"LatencyWithBypass", IIVD.Bypass},
+ {"Throughput", IIVD.Throughput.value_or(0.0)}});
+ return JO;
+}
+
+json::Value SchedulingInfoView::toJSON() const {
+ ArrayRef<MCInst> Source = getSource();
+ if (!Source.size())
+ return json::Value(nullptr);
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ json::Array InstInfo;
+ for (const auto &I : enumerate(IIVD)) {
+ const SchedulingInfoViewData &IIVDEntry = I.value();
+ json::Object JO = toJSON(IIVDEntry);
+ JO.try_emplace("Instruction", (unsigned)I.index());
+ InstInfo.push_back(std::move(JO));
+ }
+ return json::Object({{"InstructionList", json::Value(std::move(InstInfo))}});
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/llvm/tools/llvm-mca/Views/SchedulingInfoView.h b/llvm/tools/llvm-mca/Views/SchedulingInfoView.h
new file mode 100644
index 0000000000000..fbebfd07cfb0b
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/SchedulingInfoView.h
@@ -0,0 +1,97 @@
+//===--------------------- SchedulingInfoView.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the instruction scheduling info view.
+///
+/// The goal fo the instruction scheduling info view is to print the latency,
+/// latency with bypass,
+// throughput, pipeline ressources and uOps information for every instruction
+// in the input sequence.
+///
+/// Example:
+///
+/// Instruction Info:
+/// [1]: #uOps
+/// [2]: Latency
+/// [3]: Bypass latency
+/// [3]: Throughput
+/// [4]: Resources
+///
+/// [1] [2] [3] [4] [5] [6] Instructions:
+/// 1 4 1 vmulps %xmm0, %xmm1, %xmm2
+/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
+/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULINGINFOVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_SCHEDULINGINFOVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/CodeEmitter.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+/// A view that prints out generic instruction information.
+class SchedulingInfoView : public InstructionView {
+ const llvm::MCInstrInfo &MCII;
+ CodeEmitter &CE;
+ using UniqueInst = std::unique_ptr<Instruction>;
+ ArrayRef<UniqueInst> LoweredInsts;
+ const InstrumentManager &IM;
+ using InstToInstrumentsT =
+ DenseMap<const MCInst *, SmallVector<mca::Instrument *>>;
+ const InstToInstrumentsT &InstToInstruments;
+
+ struct SchedulingInfoViewData {
+ unsigned NumMicroOpcodes = 0;
+ unsigned Latency = 0;
+ unsigned Bypass = 0; // ReadAvance Bypasses cycles
+ std::optional<double> Throughput = 0.0;
+ std::string OpcodeName = "";
+ std::string Resources = "";
+ };
+ using IIVDVec = SmallVector<SchedulingInfoViewData, 16>;
+
+ /// Place the data into the array of SchedulingInfoViewData IIVD.
+ void collectData(MutableArrayRef<SchedulingInfoViewData> IIVD) const;
+
+public:
+ SchedulingInfoView(const llvm::MCSubtargetInfo &ST,
+ const llvm::MCInstrInfo &II, CodeEmitter &C,
+ llvm::ArrayRef<llvm::MCInst> S, llvm::MCInstPrinter &IP,
+ ArrayRef<UniqueInst> LoweredInsts,
+ const InstrumentManager &IM,
+ const InstToInstrumentsT &InstToInstruments)
+ : InstructionView(ST, IP, S), MCII(II), CE(C), LoweredInsts(LoweredInsts),
+ IM(IM), InstToInstruments(InstToInstruments) {}
+
+ /// Extract comment (//, /* */) from the source assembly placed just after
+ /// instruction.
+ void getComment(const llvm::MCInst &Inst, std::string &CommentString) const;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SchedulingInfoView"; }
+ json::Value toJSON() const override;
+ json::Object toJSON(const SchedulingInfoViewData &IIVD) const;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index cc5d4f5fa05de..0fa76cb501006 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -30,6 +30,7 @@
#include "Views/ResourcePressureView.h"
#include "Views/RetireControlUnitStatistics.h"
#include "Views/SchedulerStatistics.h"
+#include "Views/SchedulingInfoView.h"
#include "Views/SummaryView.h"
#include "Views/TimelineView.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -235,6 +236,10 @@ static cl::opt<bool> PrintInstructionInfoView(
cl::desc("Print the instruction info view (enabled by default)"),
cl::cat(ViewOptions), cl::init(true));
+static cl::opt<bool> PrintSchedulingInfoView(
+ "scheduling-info", cl::desc("Print the instruction scheduling info view"),
+ cl::cat(ViewOptions), cl::init(false));
+
static cl::opt<bool> EnableAllStats("all-stats",
cl::desc("Print all hardware statistics"),
cl::cat(ViewOptions), cl::init(false));
@@ -676,14 +681,22 @@ int main(int argc, char **argv) {
std::make_unique<mca::InstructionView>(*STI, *IP, Insts));
}
- // Create the views for this pipeline, execute, and emit a report.
- if (PrintInstructionInfoView) {
- Printer.addView(std::make_unique<mca::InstructionInfoView>(
- *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
- ShowBarriers, *IM, InstToInstruments));
+ // Create the views for instruction's scheduling info dump.
+ if (PrintSchedulingInfoView) {
+ Printer.addView(std::make_unique<mca::SchedulingInfoView>(
+ *STI, *MCII, CE, Insts, *IP, LoweredSequence, *IM,
+ InstToInstruments));
+ } else {
+ // Create the views for this pipeline, execute, and emit a report.
+ if (PrintInstructionInfoView) {
+ Printer.addView(std::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
+ ShowBarriers, *IM, InstToInstruments));
+ }
+
+ Printer.addView(
+ std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
}
- Printer.addView(
- std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
if (!runPipeline(*P))
return 1;
@@ -754,10 +767,17 @@ int main(int argc, char **argv) {
*STI, *IP, Insts, S.getNumIterations()));
}
- if (PrintInstructionInfoView)
- Printer.addView(std::make_unique<mca::InstructionInfoView>(
- *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
- ShowBarriers, *IM, InstToInstruments));
+ if (PrintSchedulingInfoView) {
+ Printer.addView(std::make_unique<mca::SchedulingInfoView>(
+ *STI, *MCII, CE, Insts, *IP, LoweredSequence, *IM,
+ InstToInstruments));
+ PrintResourcePressureView = false;
+ } else {
+ if (PrintInstructionInfoView)
+ Printer.addView(std::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
+ ShowBarriers, *IM, InstToInstruments));
+ }
// Fetch custom Views that are to be placed after the InstructionInfoView.
// Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
>From 62983875488ca2ae7c47e5cf776d5cf2f48db386 Mon Sep 17 00:00:00 2001
From: Julien Villette <julien.villette at sipearl.com>
Date: Mon, 27 Jan 2025 12:10:44 +0100
Subject: [PATCH 2/3] [mca] update_mca_test_checks.py helpers to manage
llvm-mca -scheduling-info new option
When using llvm-mca -scheduling-info and if assembly
test contains comments with reference values of scheduling
info: <MicroOps> <Latency> <Forward Latency> <Throughput> <Units>
To check coherency between llvm-mca -scheduling-info output
and scheduling references in comment, use --check-sched-info.
Exit with error if found deferences and report them.
This is usefull to check new scheduling info patches as
we can specify source documentation references for each
instructions and so be able to understand easier differences.
Example of comment in AArch64/Neoverse/V1-scheduling-info.s:
abs D15, D23 // ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
llvm-mca -scheduling-info output:
1 2 2 4.00 - ABSv1i64 V1UnitSVE01, V1UnitV,
abs d15, d23 // ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
update_mca_test_checks.py is searching for 4 values at the begining and
after comment // and compare these values. Values order must be the same
as llvm-mca output. And it will check that all resources in comment
(reference) is included in llvm-mca output.
It is possible to update source test scheduling information references
using -update-sched-info option. If you want to update test source
references and llvm-mca output references, you have to run two times
update_mca_test_checks.py -update-sched-info. First time to update
scheduling information references and second time to update llvm-mca new
output reference.
---
llvm/utils/update_mca_test_checks.py | 183 +++++++++++++++++++++++++++
1 file changed, 183 insertions(+)
diff --git a/llvm/utils/update_mca_test_checks.py b/llvm/utils/update_mca_test_checks.py
index 486cb66b827f3..67ae49c3b6291 100755
--- a/llvm/utils/update_mca_test_checks.py
+++ b/llvm/utils/update_mca_test_checks.py
@@ -12,6 +12,7 @@
import os
import sys
import warnings
+import re
from UpdateTestChecks import common
@@ -64,6 +65,18 @@ def _get_parser():
default="llvm-mca",
help="the binary to use to generate the test case " "(default: llvm-mca)",
)
+ parser.add_argument(
+ "--check-sched-info",
+ action="store_true",
+ help="check scheduling info if references are given "
+ "in comment after each instruction",
+ )
+ parser.add_argument(
+ "--update-sched-info",
+ action="store_true",
+ help="updating scheduling info references given "
+ "in comment after each instruction",
+ )
parser.add_argument("tests", metavar="<test-path>", nargs="+")
return parser
@@ -245,6 +258,156 @@ def _align_matching_blocks(all_blocks, farthest_indexes):
return False
+def _check_sched_values(line, scheds, units, err, updates):
+ """Check for scheduling values differences
+ between values reported by llvm-mca with -scheduling-info option
+ and values in comment at the end of assembly instruction line (//).
+ Reference units must be included in the list reported by llvm-mca.
+ """
+
+ _err = []
+ # Got zip of llvm output and values from comment
+ infos = ["MicroOps", "Latency", "Forward latency", "Throughput"]
+ sched_info = zip(infos, scheds[0].split(), scheds[1].split())
+ for si in sched_info:
+ if float(si[1]) != float(si[2]):
+ updates.add("sched")
+ _err.append(
+ "\t=> {} LLVM value {} != reference value in comment {}\n".format(
+ si[0], si[1], si[2]
+ )
+ )
+
+ for u in units[1]:
+ if not u in units[0]:
+ updates.add("units")
+ _err.append(
+ "\t=> LLVM units {} != reference units in comment {}\n".format(
+ units[0], units[1]
+ )
+ )
+ break
+
+ if len(_err) > 0:
+ err.append("{}\n{}".format(line, "".join(_err)))
+ return True
+
+ return False
+
+
+def _replace_values(oldvalue, newvalue):
+ """Replacing values with the same format (spaces)
+ than oldvalue
+ """
+
+ fmt = re.sub("[0-9.]+", "{}", oldvalue)
+ return fmt.format(*newvalue.split())
+
+
+def _has_comment(line, cmt_format):
+ """Returns True if line contains C++ or C style
+ comment. Set cmt_format first and optional second
+ comment caracters.
+ """
+
+ cpp_comment = re.search("\/\/", line)
+ c_comment = re.search("\/\*", line)
+ if "//" in line:
+ cmt_format.append("//")
+ return True
+
+ if "/*" in line:
+ cmt_format.append("/*")
+ cmt_format.append("*/")
+ return True
+
+ return False
+
+
+def _sched_info(raw_tool_output, test_path, check_sched_info, update_sched_info):
+ """Check scheduling info if passed in assembly comment after each
+ instructions.
+
+ Recognized form is:
+ 1 | 2 | 2 | 4.00 | ABSv1i64 | V1UnitV, | abs d15, d23
+ // ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+
+ Format:
+ [1] // [2] \\ [3] \\ [4]
+ [1]: <llvm-mca output> <asm instruction>
+ [2]: <Architecture description>
+ [3]: <Scheduling info reference>
+ [4]: <micro ops> <latency> <forward latency> <throughput> <units>
+
+ <llvm-mca output> with -scheduling-info option:
+ <MicroOps> | <latency> | <fwd latency> | <throughput> |
+ <side effect> | <llvm opcode> <units>
+
+ The goal is to check [4] regarding llvm-mca output with -scheduling-info [1]
+ option. It will allow to check scheduling info easily when
+ doing code review of scheduling info merge requests.
+ If a difference is found, the comment should be fixed and checked
+ against documentation.
+ """
+
+ scheduling_info = re.compile(
+ "(^\s+|\\\\\s+)([0-9]+[\s|]+[0-9]+" "[\s|]+[0-9]+[\s|]+\s+[0-9.]+)"
+ )
+ units_info = re.compile(
+ "(^\s+|\\\\\s+)[0-9]+[\s|]+[0-9]+" "[\s|]+[0-9]+[\s|]+\s+[0-9.]+[\s|]+([^|*/]+)"
+ )
+
+ fixes = {}
+ err = []
+ instr_idx = 0
+ for b in raw_tool_output.split("\n\n"):
+ for line in b.splitlines():
+ cmt_format = []
+ if _has_comment(line, cmt_format):
+ scheds = scheduling_info.findall(line)
+ scheds = [s[1].replace("|", "") for s in scheds]
+ if len(scheds) == 2:
+ cmt = cmt_format[0] + line.split(cmt_format[0])[1]
+ units = units_info.findall(line)
+ c_units = [re.sub("\s", "", u[1]).split(",") for u in units]
+ updates = set()
+ if _check_sched_values(line, scheds, c_units, err, updates):
+ if update_sched_info:
+ if "sched" in updates:
+ cmt = cmt.replace(
+ scheds[1], _replace_values(scheds[1], scheds[0])
+ )
+ if "units" in updates:
+ cmt = cmt.replace(units[1][1], units[0][1])
+
+ fixes[instr_idx] = cmt
+ instr_idx = instr_idx + 1
+
+ if update_sched_info:
+ with open(test_path) as f:
+ # Overwrite test with new fixed comments if any.
+ # Test file will be read again just before writing final checking
+ output_lines = []
+ instr_idx = 0
+
+ for line in f:
+ out = line.rstrip()
+ cmt_format = []
+ if _has_comment(line, cmt_format) and not re.match("^#", line):
+ if fixes.get(instr_idx) is not None:
+ out = line.split(cmt_format[0])[0] + fixes[instr_idx]
+
+ instr_idx = instr_idx + 1
+
+ output_lines.append(out)
+
+ with open(test_path, "wb") as f:
+ f.writelines(["{}\n".format(l).encode("utf-8") for l in output_lines])
+
+ if check_sched_info:
+ if len(err) > 0:
+ raise Error("{}".format("".join(err)))
+
def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa
"""For each run line, run the tool with the specified args and collect the
output. We use the concept of 'blocks' for uniquing, where a block is
@@ -289,6 +452,20 @@ def _block_key(tool_args, prefixes):
line if line.strip() else "" for line in raw_tool_output.splitlines()
)
+ # Check if -scheduling-info passed to llvm-mca to check comments if any
+ if "-scheduling-info" in tool_args:
+ _sched_info(
+ raw_tool_output,
+ test_path,
+ args.check_sched_info,
+ args.update_sched_info,
+ )
+ else:
+ if args.check_sched_info:
+ _warn("--check-sched-info: ignored: need llvm-mca -scheduling-info")
+ if args.update_sched_info:
+ _warn("--update-sched-info: ignored: need llvm-mca -scheduling-info")
+
# Split blocks, stripping all trailing whitespace, but keeping preceding
# whitespace except for newlines so that columns will line up visually.
all_blocks[key] = [
@@ -550,6 +727,12 @@ def update_test_file(args, test_path, autogenerated_note):
run_infos = _get_run_infos(run_lines, args)
common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
+
+ if args.update_sched_info:
+ # Read again input lines in case of changes (scheduling info updates in comments)
+ with open(test_path) as f:
+ input_lines = [l.rstrip() for l in f]
+
_write_output(
test_path,
input_lines,
>From 84e754240c004f01ff3756d07241e2181cda5693 Mon Sep 17 00:00:00 2001
From: Julien Villette <julien.villette at sipearl.com>
Date: Thu, 13 Feb 2025 10:40:39 +0100
Subject: [PATCH 3/3] [MCA] SchedulingInfoView: consider negative
ReadAdvanceCycle and AcquireAtCycle
MR #126703:
- Negative ReadAdvance cycles can be negative, so add ForwardingDelayCycles to Latency (computeInstrLatency).
---
llvm/include/llvm/MC/MCSchedule.h | 6 ++---
llvm/lib/MC/MCSchedule.cpp | 22 +++++++++----------
.../llvm-mca/Views/SchedulingInfoView.cpp | 14 ++++++++----
3 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index 4e72f633596a3..57c8ebeee02a7 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -402,9 +402,9 @@ struct MCSchedModel {
static unsigned getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
unsigned WriteResourceIdx = 0);
- /// Returns the maximum forwarding delay for maximum write latency.
- static unsigned getForwardingDelayCycles(const MCSubtargetInfo &STI,
- const MCSchedClassDesc &SCDesc);
+ /// Returns the bypass delay cycle for the maximum latency write cycle
+ static unsigned getBypassDelayCycles(const MCSubtargetInfo &STI,
+ const MCSchedClassDesc &SCDesc);
/// Returns the default initialized model.
static const MCSchedModel Default;
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 36147f1fa9983..08e735cc4cfb6 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -175,16 +175,15 @@ MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
return std::abs(DelayCycles);
}
-unsigned
-MCSchedModel::getForwardingDelayCycles(const MCSubtargetInfo &STI,
- const MCSchedClassDesc &SCDesc) {
+unsigned MCSchedModel::getBypassDelayCycles(const MCSubtargetInfo &STI,
+ const MCSchedClassDesc &SCDesc) {
ArrayRef<MCReadAdvanceEntry> Entries = STI.getReadAdvanceEntries(SCDesc);
if (Entries.empty())
return 0;
unsigned Latency = 0;
- unsigned maxLatency = 0;
+ unsigned MaxLatency = 0;
unsigned WriteResourceID = 0;
unsigned DefEnd = SCDesc.NumWriteLatencyEntries;
@@ -192,21 +191,20 @@ MCSchedModel::getForwardingDelayCycles(const MCSubtargetInfo &STI,
// Lookup the definition's write latency in SubtargetInfo.
const MCWriteLatencyEntry *WLEntry =
STI.getWriteLatencyEntry(&SCDesc, DefIdx);
- // Early exit if we found an invalid latency.
- // Consider no bypass
+ unsigned Cycles = (unsigned)WLEntry->Cycles;
+ // Invalid latency. Consider 0 cycle latency
if (WLEntry->Cycles < 0)
- return 0;
- maxLatency = std::max(Latency, static_cast<unsigned>(WLEntry->Cycles));
- if (maxLatency > Latency) {
+ Cycles = 0;
+ if (Cycles > Latency) {
+ MaxLatency = Cycles;
WriteResourceID = WLEntry->WriteResourceID;
}
- Latency = maxLatency;
+ Latency = MaxLatency;
}
for (const MCReadAdvanceEntry &E : Entries) {
- if (E.WriteResourceID == WriteResourceID) {
+ if (E.WriteResourceID == WriteResourceID)
return E.Cycles;
- }
}
llvm_unreachable("WriteResourceID not found in MCReadAdvanceEntry entries");
diff --git a/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp b/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp
index 39c9caf685ba8..f05f4e311400e 100644
--- a/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp
+++ b/llvm/tools/llvm-mca/Views/SchedulingInfoView.cpp
@@ -154,8 +154,12 @@ void SchedulingInfoView::collectData(
IIVDEntry.OpcodeName = (std::string)MCII.getName(Inst.getOpcode());
IIVDEntry.NumMicroOpcodes = SCDesc.NumMicroOps;
IIVDEntry.Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // Add extra latency due to delays in the forwarding data paths.
+ IIVDEntry.Latency += MCSchedModel::getForwardingDelayCycles(
+ STI.getReadAdvanceEntries(SCDesc));
+ // Get latency with bypass
IIVDEntry.Bypass =
- IIVDEntry.Latency - MCSchedModel::getForwardingDelayCycles(STI, SCDesc);
+ IIVDEntry.Latency - MCSchedModel::getBypassDelayCycles(STI, SCDesc);
IIVDEntry.Throughput =
1.0 / MCSchedModel::getReciprocalThroughput(STI, SCDesc);
raw_string_ostream TempStream(IIVDEntry.Resources);
@@ -168,10 +172,12 @@ void SchedulingInfoView::collectData(
continue;
const MCProcResourceDesc *MCProc =
SM.getProcResource(Index->ProcResourceIdx);
- if (Index->ReleaseAtCycle != 1) {
+ if (Index->ReleaseAtCycle > 1) {
// Output ReleaseAtCycle between [] if not 1 (default)
- TempStream << sep
- << format("%s[%d]", MCProc->Name, Index->ReleaseAtCycle);
+ // This is to be able to evaluate throughput.
+ // See getReciprocalThroughput in MCSchedule.cpp
+ // TODO: report AcquireAtCycle to check this scheduling info.
+ TempStream << sep << format("%s[%d]", MCProc->Name, Index->ReleaseAtCycle);
} else {
TempStream << sep << format("%s", MCProc->Name);
}
More information about the llvm-commits
mailing list