[llvm] 9394088 - [SVE][InstrFormats] Explcitly set hasSideEffects for all SVE instructions.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 04:33:00 PST 2023
Author: Paul Walker
Date: 2023-01-25T12:30:46Z
New Revision: 9394088ca02693b3b7d98c2996f42ce653255e7d
URL: https://github.com/llvm/llvm-project/commit/9394088ca02693b3b7d98c2996f42ce653255e7d
DIFF: https://github.com/llvm/llvm-project/commit/9394088ca02693b3b7d98c2996f42ce653255e7d.diff
LOG: [SVE][InstrFormats] Explcitly set hasSideEffects for all SVE instructions.
The instruction property hasSideEffects relies on the presence of
tablegen isel patterns when constructing its value, unless
specifically overriden. Since adding SVE scheduling information
we've noticed this property flip-flop as isel patterns have been
updated. To make things consistent (and correct) this patch
explicitly sets the property for all SVE instructions.
This has resulted in the following notable changes:
* Normal load and store instructions no longer report having side
effects.
* All prefetch instructions correctly report having side effects.
* FFR related instructions continue to report having side effects.
This is likely overkill but I've chosen to remain cautious here.
* Most all integer instructions no longer report having side effects.
* Most all floating point instructions no longer report having side
effects, but do now report their potential for raising FP
exceptions. I do not know how to test the latter so I've again
took a caution route of taging all floating point instructions
except for DUPs.
* The conflict detection intrinsics now report they don't touch
memory.
NOTE: SVE isel makes significant use of psuedo instructions but
this patch makes no effort to update them.
NOTE: We'll need a similar patch for SME but without a scheduling
model it'll be harder to verify the results.
Differential Revision: https://reviews.llvm.org/D142122
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
llvm/test/CodeGen/AArch64/sve-insert-vector.ll
llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index daafd1f7c3b77..a88d96f7f6a27 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1355,7 +1355,8 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class SVE2_CONFLICT_DETECT_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMAnyPointerType<llvm_any_ty>,
- LLVMMatchType<1>]>;
+ LLVMMatchType<1>],
+ [IntrNoMem]>;
class SVE2_3VectorArg_Indexed_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index cef8d41218e80..1eaf799453eda 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -328,6 +328,7 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -657,6 +658,7 @@ class sve_int_pfalse<bits<6> opc, string asm>
let Inst{8-4} = 0b00000;
let Inst{3-0} = Pd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -690,6 +692,7 @@ class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
let Inst{4-0} = 0b00000;
let Defs = [NZCV];
+ let hasSideEffects = 0;
let isCompare = 1;
}
@@ -724,8 +727,9 @@ class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Pdn = $_Pdn";
let Defs = [NZCV];
- let isPTestLike = 1;
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
+ let isPTestLike = 1;
}
multiclass sve_int_pfirst<bits<5> opc, string asm, SDPatternOperator op> {
@@ -772,6 +776,7 @@ class sve_int_count_r<bits<2> sz8_64, bits<5> opc, string asm,
!strconcat(asm, "\t$Rdn, $Pg, $_Rdn"),
!strconcat(asm, "\t$Rdn, $Pg"));
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_count_r_s32<bits<5> opc, string asm,
@@ -877,6 +882,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_count_v<bits<5> opc, string asm,
@@ -915,6 +921,8 @@ class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
let Inst{9} = opc{0};
let Inst{8-5} = Pn;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_pcount_pred<bits<4> opc, string asm,
@@ -951,6 +959,7 @@ class sve_int_count<bits<3> opc, string asm>
let Inst{9-5} = pattern;
let Inst{4-0} = Rd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -993,6 +1002,7 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty,
@@ -1027,6 +1037,7 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
let Inst{4-0} = Rdn;
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
@@ -1093,6 +1104,7 @@ class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
!strconcat(asm, "\t$Rdn, $pattern, mul $imm4"));
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm,
@@ -1158,6 +1170,8 @@ class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{21-10} = 0b100000001110;
let Inst{9-5} = Rn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_dup_r<string asm, SDPatternOperator op> {
@@ -1192,6 +1206,8 @@ class sve_int_perm_dup_i<bits<5> tsz, Operand immtype, string asm,
let Inst{15-10} = 0b001000;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_dup_i<string asm> {
@@ -1296,6 +1312,8 @@ class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
@@ -1391,6 +1409,7 @@ class sve2_int_perm_tbx<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_perm_tbx<string asm, bits<2> opc, SDPatternOperator op> {
@@ -1423,6 +1442,8 @@ class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{21-10} = 0b111000001110;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
@@ -1461,6 +1482,8 @@ class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator op> {
@@ -1489,6 +1512,8 @@ class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
let Inst{15-10} = 0b001110;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_unpk<bits<2> opc, string asm, SDPatternOperator op> {
@@ -1517,6 +1542,7 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
@@ -1547,6 +1573,7 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
@@ -1598,6 +1625,7 @@ class sve_int_perm_extract_i<string asm>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_extract_i<string asm, SDPatternOperator op> {
@@ -1620,6 +1648,8 @@ class sve2_int_perm_extract_i_cons<string asm>
let Inst{12-10} = imm8{2-0};
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
//===----------------------------------------------------------------------===//
@@ -1643,6 +1673,8 @@ class sve_int_sel_vvv<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{13-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
@@ -1706,7 +1738,7 @@ class sve_int_pred_log<bits<4> opc, string asm>
!strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm"));
let Defs = !if(!eq (opc{2}, 1), [NZCV], []);
-
+ let hasSideEffects = 0;
}
multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
@@ -1769,6 +1801,7 @@ class sve_int_log_imm<bits<2> opc, string asm>
let DecoderMethod = "DecodeSVELogicalImmInstruction";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_log_imm<bits<2> opc, string asm, string alias, SDPatternOperator op> {
@@ -1814,8 +1847,9 @@ class sve_int_dup_mask_imm<string asm>
let Inst{17-5} = imms;
let Inst{4-0} = Zd;
- let isReMaterializable = 1;
let DecoderMethod = "DecodeSVELogicalImmInstruction";
+ let hasSideEffects = 0;
+ let isReMaterializable = 1;
}
multiclass sve_int_dup_mask_imm<string asm> {
@@ -1865,6 +1899,8 @@ class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
@@ -1906,6 +1942,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
@@ -1944,6 +1982,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
@@ -2003,6 +2043,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
@@ -2072,6 +2114,9 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
@@ -2125,6 +2170,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zda = $_Zda";
let ElementSize = zprty.ElementSize;
let DestructiveInstType = DestructiveTernaryCommWithRev;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, string Ps,
@@ -2165,6 +2212,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op,
@@ -2209,6 +2258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bits<2> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2p1_fp_bfma_by_indexed_elem<string asm, bits<2> opc> {
@@ -2271,6 +2322,9 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, bit o2, string asm, ZPRRegOp zprty
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2p1_fp_bfmul_by_indexed_elem<string asm> {
@@ -2339,6 +2393,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fcmla<string asm, SDPatternOperator op> {
@@ -2379,6 +2435,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -2427,6 +2485,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
@@ -2465,6 +2525,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
@@ -2514,6 +2576,8 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
@@ -2557,6 +2621,8 @@ class sve2_fp_mla_long_by_indexed_elem<bits<3> opc, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_mla_long_by_indexed_elem<bits<3> opc, string asm,
@@ -2592,6 +2658,8 @@ class sve2_fp_mla_long<bits<3> opc, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_mla_long<bits<3> opc, string asm, ValueType OutVT,
@@ -2620,6 +2688,8 @@ class sve_int_arith_vl<bit opc, string asm, bit streaming_sve = 0b0>
let Inst{11} = streaming_sve;
let Inst{10-5} = imm6;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b0>
@@ -2638,6 +2708,7 @@ class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b
let Inst{10-5} = imm6;
let Inst{4-0} = Rd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -2662,6 +2733,8 @@ class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm,
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
@@ -2711,6 +2784,8 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = Sz;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
@@ -2825,6 +2900,9 @@ class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm,
let Inst{15-10} = 0b001100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -2861,6 +2939,7 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_pred_log<bits<3> opc, string asm, string Ps,
@@ -2987,6 +3066,7 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
@@ -3024,6 +3104,7 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
@@ -3061,6 +3142,7 @@ class sve2_int_mla<bits<2> sz, bits<5> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_mla<bit S, string asm, SDPatternOperator op> {
@@ -3106,6 +3188,7 @@ class sve2_int_mla_by_indexed_elem<bits<2> sz, bits<6> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
@@ -3184,6 +3267,7 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
@@ -3216,6 +3300,7 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
@@ -3262,6 +3347,7 @@ class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_cintx_dot<string asm, SDPatternOperator op> {
@@ -3316,6 +3402,7 @@ class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_cintx_dot_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -3386,6 +3473,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
@@ -3430,6 +3519,8 @@ class sve2_int_mul_by_indexed_elem<bits<2> sz, bits<4> opc, string asm,
let Inst{13-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm,
@@ -3506,6 +3597,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op,
@@ -3548,6 +3640,7 @@ class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty1.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperator op> {
@@ -3583,6 +3676,7 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
@@ -3642,6 +3736,8 @@ class sve2_wide_int_arith<bits<2> sz, bits<5> opc, string asm,
let Inst{14-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_wide_int_arith_long<bits<5> opc, string asm,
@@ -3706,6 +3802,8 @@ class sve2_misc<bits<2> sz, bits<4> opc, string asm,
let Inst{13-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_misc_bitwise<bits<4> opc, string asm, SDPatternOperator op> {
@@ -3750,6 +3848,7 @@ class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm,
@@ -3783,6 +3882,8 @@ class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
@@ -3825,6 +3926,7 @@ class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
@@ -3888,6 +3990,7 @@ class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
@@ -3934,6 +4037,7 @@ class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> {
@@ -3967,6 +4071,7 @@ class sve2_int_abs
diff _accum<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_abs
diff _accum<bit opc, string asm, SDPatternOperator op> {
@@ -4026,6 +4131,8 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
@@ -4066,6 +4173,7 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
@@ -4101,6 +4209,8 @@ class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
let Inst{10} = 0b0; // Top
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm,
@@ -4132,6 +4242,7 @@ class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm,
@@ -4160,6 +4271,8 @@ class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string as
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm,
@@ -4190,6 +4303,7 @@ class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm,
@@ -4229,6 +4343,7 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
@@ -4381,6 +4496,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
let Inst{12-5} = imm{7-0}; // imm8
let Inst{4-0} = Zd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -4422,6 +4538,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
let Inst{12-5} = imm8;
let Inst{4-0} = Zd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -4458,6 +4575,7 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4490,6 +4608,7 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4547,6 +4666,8 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
let Inst{15-10} = 0b001100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_log<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4585,6 +4706,7 @@ class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op,
@@ -4631,6 +4753,7 @@ class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
@@ -4676,6 +4799,7 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dup_fpimm_pred<string asm> {
@@ -4711,6 +4835,7 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dup_imm_pred_merge_inst<
@@ -4816,6 +4941,7 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -4905,6 +5031,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -4978,6 +5105,7 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -5020,6 +5148,7 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
let Inst{3-0} = 0b0000;
let Defs = [NZCV];
+ let hasSideEffects = 0;
}
class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
@@ -5042,6 +5171,7 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isWhile = 1;
}
@@ -5088,6 +5218,7 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isWhile = 1;
}
@@ -5124,6 +5255,9 @@ class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5163,6 +5297,8 @@ class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Vdn;
let Constraints = "$Vdn = $_Vdn";
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5203,6 +5339,9 @@ class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{9-5} = Zn;
let Inst{4} = opc{0};
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5259,6 +5398,9 @@ class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{9-5} = Zn;
let Inst{4} = opc{0};
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_pd<bits<3> opc, string asm,
@@ -5312,6 +5454,7 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{9-5} = imm5;
let Inst{4-0} = Zd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -5356,6 +5499,8 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010010;
let Inst{9-5} = imm5;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
@@ -5423,6 +5568,8 @@ class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010001;
let Inst{9-5} = Rn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_index_ri<string asm> {
@@ -5457,6 +5604,8 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010011;
let Inst{9-5} = Rn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
@@ -5514,6 +5663,7 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveBinaryImm;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
@@ -5630,6 +5780,7 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
@@ -5694,6 +5845,8 @@ class sve_int_bin_cons_shift_wide<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm, SDPatternOperator op> {
@@ -5724,6 +5877,8 @@ class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
@@ -5790,6 +5945,7 @@ class sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5826,6 +5982,7 @@ class sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5857,6 +6014,7 @@ class sve_mem_128b_est_si<bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5888,6 +6046,7 @@ class sve_mem_est_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5911,6 +6070,7 @@ class sve_mem_128b_est_ss<bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5933,6 +6093,7 @@ class sve_mem_cst_ss_base<bits<4> dtype, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5963,6 +6124,7 @@ class sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5997,6 +6159,7 @@ class sve_mem_cstnt_ss_base<bits<2> msz, string asm, RegisterOperand listty,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6027,6 +6190,7 @@ class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6083,6 +6247,7 @@ class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6186,6 +6351,7 @@ class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6234,6 +6400,7 @@ class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6286,6 +6453,7 @@ class sve_mem_z_spill<string asm>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6312,6 +6480,7 @@ class sve_mem_p_spill<string asm>
let Inst{4} = 0b0;
let Inst{3-0} = Pt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6344,6 +6513,8 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm,
@@ -6372,6 +6543,8 @@ class sve_int_perm_punpk<bit opc, string asm>
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
@@ -6398,6 +6571,7 @@ class sve_int_rdffr_pred<bit s, string asm>
let Defs = !if(s, [NZCV], []);
let Uses = [FFR];
+ let hasSideEffects = 1;
}
multiclass sve_int_rdffr_pred<bit s, string asm, SDPatternOperator op> {
@@ -6421,6 +6595,7 @@ class sve_int_rdffr_unpred<string asm> : I<
let Inst{3-0} = Pd;
let Uses = [FFR];
+ let hasSideEffects = 1;
}
multiclass sve_int_rdffr_unpred<string asm, SDPatternOperator op> {
@@ -6444,8 +6619,8 @@ class sve_int_wrffr<string asm, SDPatternOperator op>
let Inst{8-5} = Pn;
let Inst{4-0} = 0b00000;
- let hasSideEffects = 1;
let Defs = [FFR];
+ let hasSideEffects = 1;
}
class sve_int_setffr<string asm, SDPatternOperator op>
@@ -6455,8 +6630,8 @@ class sve_int_setffr<string asm, SDPatternOperator op>
[(op)]>, Sched<[]> {
let Inst{31-0} = 0b00100101001011001001000000000000;
- let hasSideEffects = 1;
let Defs = [FFR];
+ let hasSideEffects = 1;
}
//===----------------------------------------------------------------------===//
@@ -6482,6 +6657,7 @@ class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Rdn;
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_clast_rz<bit ab, string asm, SDPatternOperator op> {
@@ -6515,6 +6691,7 @@ class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Vdn;
let Constraints = "$Vdn = $_Vdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
@@ -6551,6 +6728,7 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
@@ -6588,6 +6766,8 @@ class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_last_r<bit ab, string asm, SDPatternOperator op> {
@@ -6619,6 +6799,8 @@ class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
@@ -6653,6 +6835,7 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
@@ -6688,6 +6871,8 @@ class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_perm_splice_cons<string asm> {
@@ -6718,6 +6903,7 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
@@ -6775,6 +6961,7 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_cpy_r<string asm, SDPatternOperator op> {
@@ -6821,6 +7008,7 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
@@ -6865,6 +7053,8 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
@@ -6900,9 +7090,10 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
- let Uses = !if(nf, [FFR], []);
let Defs = !if(nf, [FFR], []);
+ let Uses = !if(nf, [FFR], []);
+ let hasSideEffects = nf;
+ let mayLoad = 1;
}
multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
@@ -6946,6 +7137,7 @@ class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -6980,6 +7172,7 @@ class sve_mem_cldnt_ss_base<bits<2> msz, string asm, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7007,6 +7200,7 @@ class sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7038,6 +7232,7 @@ class sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7069,6 +7264,7 @@ class sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7103,9 +7299,10 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
- let Uses = !if(ff, [FFR], []);
let Defs = !if(ff, [FFR], []);
+ let Uses = !if(ff, [FFR], []);
+ let hasSideEffects = ff;
+ let mayLoad = 1;
}
multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
@@ -7163,6 +7360,7 @@ class sve_mem_eld_si<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7196,6 +7394,7 @@ class sve_mem_eld_ss<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7226,9 +7425,11 @@ class sve_mem_32b_gld_sv<bits<4> opc, bit xs, bit scaled, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
+
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
@@ -7309,9 +7510,11 @@ class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
+
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
@@ -7445,6 +7648,8 @@ class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
let Inst{9-5} = Zn;
let Inst{4} = 0b0;
let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
}
multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty, SDPatternOperator op> {
@@ -7472,6 +7677,7 @@ class sve_mem_z_fill<string asm>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7498,6 +7704,7 @@ class sve_mem_p_fill<string asm>
let Inst{4} = 0b0;
let Inst{3-0} = Pt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7530,6 +7737,7 @@ class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7595,9 +7803,11 @@ class sve_mem_64b_gld_sv<bits<4> opc, bit xs, bit scaled, bit lsl, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
+
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
@@ -7714,9 +7924,10 @@ class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
@@ -7844,6 +8055,8 @@ class sve_int_bin_cons_misc_0_a<bits<2> opc, bits<2> msz, string asm,
let Inst{11-10} = msz;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_misc_0_a_uxtw<bits<2> opc, string asm> {
@@ -7893,6 +8106,8 @@ class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{15-10} = 0b101100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> {
@@ -7956,6 +8171,8 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
@@ -8027,6 +8244,7 @@ class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
let Inst{4-0} = Zd;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
@@ -8079,6 +8297,7 @@ class sve_int_brkp<bits<2> opc, string asm>
let Inst{3-0} = Pd;
let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+ let hasSideEffects = 0;
}
multiclass sve_int_brkp<bits<2> opc, string asm, SDPatternOperator op> {
@@ -8112,6 +8331,7 @@ class sve_int_brkn<bit S, string asm>
let Constraints = "$Pdm = $_Pdm";
let Defs = !if(S, [NZCV], []);
let ElementSize = ElementSizeB;
+ let hasSideEffects = 0;
}
multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
@@ -8139,7 +8359,7 @@ class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
-
+ let hasSideEffects = 0;
}
multiclass sve_int_break_m<bits<3> opc, string asm, SDPatternOperator op> {
@@ -8180,6 +8400,7 @@ class sve2_char_match<bit sz, bit opc, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -8208,6 +8429,8 @@ class sve2_hist_gen_segment<string asm, SDPatternOperator op>
let Inst{15-10} = 0b101000;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
//===----------------------------------------------------------------------===//
@@ -8231,6 +8454,8 @@ class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_hist_gen_vector<string asm, SDPatternOperator op> {
@@ -8259,6 +8484,8 @@ class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty,
@@ -8282,6 +8509,7 @@ class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let hasSideEffects = 0;
}
multiclass sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty,
@@ -8302,6 +8530,7 @@ class sve2_crypto_unary_op<bit opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let hasSideEffects = 0;
}
multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
@@ -8329,6 +8558,8 @@ class sve_float_dot<bit bf, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
@@ -8354,6 +8585,8 @@ class sve_float_dot_indexed<bit bf, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
@@ -8376,6 +8609,8 @@ class sve_bfloat_matmul<string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeH;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
@@ -8398,8 +8633,9 @@ class sve_bfloat_convert<bit N, string asm>
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
- let hasSideEffects = 1;
let ElementSize = ElementSizeS;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op> {
@@ -8428,6 +8664,7 @@ class sve_int_matmul<bits<2> uns, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR32.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_matmul<bits<2> uns, string asm, SDPatternOperator op> {
@@ -8455,6 +8692,7 @@ class sve_int_dot_mixed<string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR32.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dot_mixed<string asm, SDPatternOperator op> {
@@ -8485,6 +8723,7 @@ class sve_int_dot_mixed_indexed<bit U, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR32.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
@@ -8514,6 +8753,8 @@ class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
@@ -8542,6 +8783,7 @@ class sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8582,6 +8824,7 @@ class sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8616,6 +8859,8 @@ class sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm>
let Inst{10} = P;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatternOperator op> {
@@ -8727,6 +8972,7 @@ class sve2p1_fclamp<string asm, bits<2> sz, ZPRRegOp zpr_ty>
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zpr_ty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2p1_fclamp<string asm, SDPatternOperator op> {
@@ -8756,6 +9002,7 @@ class sve2p1_two_way_dot_vv<string mnemonic, bit u>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
@@ -8778,6 +9025,7 @@ class sve2p1_two_way_dot_vvi<string mnemonic, bit u>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
@@ -8789,6 +9037,8 @@ class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty>
let Inst{23-22} = sz;
let Inst{21-3} = 0b1000000111100000010;
let Inst{2-0} = PNd;
+
+ let hasSideEffects = 0;
}
@@ -8816,6 +9066,8 @@ class sve2p1_pred_as_ctr_to_mask_base<string mnemonic, bits<2> sz, bits<3> opc,
let Inst{7-5} = PNn;
let Inst{4} = 0b1;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
class sve2p1_pred_as_ctr_to_mask<string mnemonic, bits<2> sz, PPRRegOp pprty>
@@ -8863,6 +9115,8 @@ class sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, bits<3> tsz>
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatternOperator intrinsic> {
@@ -8889,6 +9143,8 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc> {
@@ -8916,6 +9172,7 @@ class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8939,6 +9196,7 @@ class sve2p1_mem_cld_si_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8971,6 +9229,7 @@ class sve2p1_mem_cld_ss_4z<string mnemonic, bits<2> msz, bit n,
let Inst{1} = 0b0;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8995,6 +9254,7 @@ class sve2p1_mem_cld_si_4z<string mnemonic, bits<2> msz, bit n,
let Inst{1} = 0b0;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9027,6 +9287,7 @@ class sve2p1_mem_cst_ss_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9051,6 +9312,7 @@ class sve2p1_mem_cst_si_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9110,6 +9372,7 @@ class sve2p1_mem_cst_si_4z<string mnemonic, bits<2> msz, bit n,
let Inst{1} = 0b0;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9140,6 +9403,8 @@ class sve2p1_pcount_pn<string mnemonic, bits<3> opc, bits<2> sz, PNRRegOp pnrty>
let Inst{9} = 0b1;
let Inst{8-5} = PNn;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_pcount_pn<string mnemonic, bits<3> opc> {
@@ -9174,6 +9439,7 @@ class sve2p1_int_while_rr_pn<string mnemonic, bits<2> sz, bits<3> opc,
let Inst{2-0} = PNd;
let Defs = [NZCV];
+ let hasSideEffects = 0;
}
@@ -9206,6 +9472,7 @@ class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
let Inst{0} = opc{0};
let Defs = [NZCV];
+ let hasSideEffects = 0;
}
@@ -9232,6 +9499,7 @@ class sve_mem_128b_gld_64_unscaled<string mnemonic>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9258,6 +9526,7 @@ class sve_mem_sst_128b_64_unscaled<string mnemonic>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9288,6 +9557,7 @@ class sve_mem_128b_cld_si<bits<2> dtype, string mnemonic>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9321,6 +9591,7 @@ class sve_mem_128b_cld_ss<bits<2> dtype, string mnemonic, RegisterOperand gprsh_
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9349,6 +9620,9 @@ class sve2p1_fp_reduction_q<bits<2> sz, bits<3> opc, string mnemonic,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic> {
@@ -9370,6 +9644,8 @@ class sve2p1_dupq<bits<5> ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp
let Inst{15-10} = 0b001001;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_dupq<string mnemonic> {
@@ -9409,6 +9685,7 @@ class sve2p1_extq<string mnemonic>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR8.ElementSize;
+ let hasSideEffects = 0;
}
@@ -9428,6 +9705,8 @@ class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
let Inst{9-5} = Zn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_vector_to_pred<string mnemonic> {
@@ -9468,6 +9747,7 @@ class sve2p1_pred_to_vector<bits<4> opc, string mnemonic,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2p1_pred_to_vector<string mnemonic> {
@@ -9510,6 +9790,8 @@ class sve2p1_int_reduce_q<bits<2> sz, bits<4> opc, string mnemonic,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic> {
@@ -9537,6 +9819,8 @@ class sve2p1_permute_vec_elems_q<bits<2> sz, bits<3> opc, string mnemonic,
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic> {
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 8e7c6c6703a62..31ff9287046cd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -21,8 +21,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: bl __extenddftf2
; CHECK-NEXT: add x8, sp, #48
-; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
; CHECK-NEXT: mov d1, v1.d[1]
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: bl __extenddftf2
@@ -32,8 +32,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: bl __extenddftf2
; CHECK-NEXT: add x8, sp, #48
-; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: mov d1, v1.d[1]
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: bl __extenddftf2
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index bf38dd8c087ad..27da8659f4fb2 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -530,8 +530,8 @@ define <vscale x 4 x bfloat> @insert_nxv4bf16_v4bf16(<vscale x 4 x bfloat> %sv0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #1
diff --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
index 36d477738ff86..f915e1eaf07f0 100644
--- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
@@ -2558,10 +2558,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 add z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 add z31.s, z31.s, #65280
# CHECK-NEXT: 1 4 0.50 add z31.s, z31.s, z31.s
-# CHECK-NEXT: 1 1 0.50 U addpl sp, sp, #31
-# CHECK-NEXT: 1 1 0.50 U addpl x0, x0, #-32
-# CHECK-NEXT: 1 1 0.50 U addpl x21, x21, #0
-# CHECK-NEXT: 1 1 0.50 U addpl x23, x8, #-1
+# CHECK-NEXT: 1 1 0.50 addpl sp, sp, #31
+# CHECK-NEXT: 1 1 0.50 addpl x0, x0, #-32
+# CHECK-NEXT: 1 1 0.50 addpl x21, x21, #0
+# CHECK-NEXT: 1 1 0.50 addpl x23, x8, #-1
# CHECK-NEXT: 1 1 0.50 addvl sp, sp, #31
# CHECK-NEXT: 1 1 0.50 addvl x0, x0, #-32
# CHECK-NEXT: 1 1 0.50 addvl x21, x21, #0
@@ -2597,7 +2597,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 and z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 and z5.b, z5.b, #0x6
# CHECK-NEXT: 1 4 1.00 and z5.b, z5.b, #0xf9
-# CHECK-NEXT: 1 3 1.00 U ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 3 1.00 ands p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 10 46 5.00 andv b0, p7, z31.b
# CHECK-NEXT: 7 34 3.50 andv d0, p7, z31.d
# CHECK-NEXT: 9 42 4.50 andv h0, p7, z31.h
@@ -2636,10 +2636,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 8 1.00 asrd z31.d, p0/m, z31.d, #64
# CHECK-NEXT: 2 8 1.00 asrd z31.h, p0/m, z31.h, #16
# CHECK-NEXT: 2 8 1.00 asrd z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: 1 4 0.50 U asrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 4 0.50 U asrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 4 0.50 U asrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 4 0.50 U asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 4 0.50 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 4 0.50 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 4 0.50 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 4 0.50 asrr z0.s, p0/m, z0.s, z0.s
# CHECK-NEXT: 1 3 1.00 bic p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 3 1.00 bic p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 4 0.50 bic z0.d, z0.d, z0.d
@@ -2648,27 +2648,27 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 bic z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 4 0.50 bic z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 4 0.50 bic z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 3 1.00 U bics p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 1 3 1.00 U bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 3 1.00 bics p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 1.00 brka p0.b, p15/m, p15.b
# CHECK-NEXT: 1 3 1.00 brka p0.b, p15/z, p15.b
-# CHECK-NEXT: 1 3 1.00 U brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 1.00 brkas p0.b, p15/z, p15.b
# CHECK-NEXT: 1 3 1.00 brkb p0.b, p15/m, p15.b
# CHECK-NEXT: 1 3 1.00 brkb p0.b, p15/z, p15.b
-# CHECK-NEXT: 1 3 1.00 U brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 1.00 brkbs p0.b, p15/z, p15.b
# CHECK-NEXT: 1 3 1.00 brkn p0.b, p15/z, p1.b, p0.b
# CHECK-NEXT: 1 3 1.00 brkn p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U brkns p0.b, p15/z, p1.b, p0.b
-# CHECK-NEXT: 1 3 1.00 U brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 1 3 1.00 brkns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 1.00 brkpa p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 3 1.00 brkpa p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U brkpas p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT: 1 3 1.00 U brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 1.00 brkpas p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 1.00 brkpb p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 3 1.00 brkpb p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U brkpbs p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT: 1 3 1.00 U brkpbs p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 6 1.00 U clasta b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 1.00 brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 6 1.00 clasta b0, p7, b0, z31.b
# CHECK-NEXT: 1 6 1.00 clasta d0, p7, d0, z31.d
# CHECK-NEXT: 1 6 1.00 clasta h0, p7, h0, z31.h
# CHECK-NEXT: 1 6 1.00 clasta s0, p7, s0, z31.s
@@ -2680,7 +2680,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 6 1.00 clasta z0.d, p7, z0.d, z31.d
# CHECK-NEXT: 1 6 1.00 clasta z0.h, p7, z0.h, z31.h
# CHECK-NEXT: 1 6 1.00 clasta z0.s, p7, z0.s, z31.s
-# CHECK-NEXT: 1 6 1.00 U clastb b0, p7, b0, z31.b
+# CHECK-NEXT: 1 6 1.00 clastb b0, p7, b0, z31.b
# CHECK-NEXT: 1 6 1.00 clastb d0, p7, d0, z31.d
# CHECK-NEXT: 1 6 1.00 clastb h0, p7, h0, z31.h
# CHECK-NEXT: 1 6 1.00 clastb s0, p7, s0, z31.s
@@ -2880,14 +2880,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.50 cntw x0, pow2
# CHECK-NEXT: 1 6 1.00 compact z31.d, p7, z31.d
# CHECK-NEXT: 1 6 1.00 compact z31.s, p7, z31.s
-# CHECK-NEXT: 1 2 1.00 U ctermeq w30, wzr
-# CHECK-NEXT: 1 2 1.00 U ctermeq wzr, w30
-# CHECK-NEXT: 1 2 1.00 U ctermeq x30, xzr
-# CHECK-NEXT: 1 2 1.00 U ctermeq xzr, x30
-# CHECK-NEXT: 1 2 1.00 U ctermne w30, wzr
-# CHECK-NEXT: 1 2 1.00 U ctermne wzr, w30
-# CHECK-NEXT: 1 2 1.00 U ctermne x30, xzr
-# CHECK-NEXT: 1 2 1.00 U ctermne xzr, x30
+# CHECK-NEXT: 1 2 1.00 ctermeq w30, wzr
+# CHECK-NEXT: 1 2 1.00 ctermeq wzr, w30
+# CHECK-NEXT: 1 2 1.00 ctermeq x30, xzr
+# CHECK-NEXT: 1 2 1.00 ctermeq xzr, x30
+# CHECK-NEXT: 1 2 1.00 ctermne w30, wzr
+# CHECK-NEXT: 1 2 1.00 ctermne wzr, w30
+# CHECK-NEXT: 1 2 1.00 ctermne x30, xzr
+# CHECK-NEXT: 1 2 1.00 ctermne xzr, x30
# CHECK-NEXT: 1 1 0.50 decb x0
# CHECK-NEXT: 1 1 0.50 decb x0, #14
# CHECK-NEXT: 1 1 0.50 decb x0, all, mul #16
@@ -2911,9 +2911,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 7 1.00 decp xzr, p15.d
# CHECK-NEXT: 2 7 1.00 decp xzr, p15.h
# CHECK-NEXT: 2 7 1.00 decp xzr, p15.s
-# CHECK-NEXT: 1 12 1.00 U decp z31.d, p15.d
-# CHECK-NEXT: 1 12 1.00 U decp z31.h, p15.h
-# CHECK-NEXT: 1 12 1.00 U decp z31.s, p15.s
+# CHECK-NEXT: 1 12 1.00 decp z31.d, p15.d
+# CHECK-NEXT: 1 12 1.00 decp z31.h, p15.h
+# CHECK-NEXT: 1 12 1.00 decp z31.s, p15.s
# CHECK-NEXT: 1 1 0.50 decw x0
# CHECK-NEXT: 1 1 0.50 decw x0, #14
# CHECK-NEXT: 1 1 0.50 decw x0, all, mul #16
@@ -2938,7 +2938,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 eor z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 eor z5.b, z5.b, #0x6
# CHECK-NEXT: 1 4 1.00 eor z5.b, z5.b, #0xf9
-# CHECK-NEXT: 1 3 1.00 U eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 3 1.00 eors p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 10 46 5.00 eorv b0, p7, z31.b
# CHECK-NEXT: 7 34 3.50 eorv d0, p7, z31.d
# CHECK-NEXT: 9 42 4.50 eorv h0, p7, z31.h
@@ -3136,12 +3136,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 15 1.00 fmls z0.s, z1.s, z7.s[3]
# CHECK-NEXT: 1 6 1.00 fmov z0.d, #-10.00000000
# CHECK-NEXT: 1 6 1.00 fmov z0.d, #0.12500000
-# CHECK-NEXT: 1 4 1.00 U fmov z0.d, p0/m, #-10.00000000
-# CHECK-NEXT: 1 4 1.00 U fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: 1 4 1.00 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: 1 4 1.00 fmov z0.d, p0/m, #0.12500000
# CHECK-NEXT: 1 6 1.00 fmov z0.h, #-0.12500000
-# CHECK-NEXT: 1 4 1.00 U fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: 1 4 1.00 fmov z0.h, p0/m, #-0.12500000
# CHECK-NEXT: 1 6 1.00 fmov z0.s, #-0.12500000
-# CHECK-NEXT: 1 4 1.00 U fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: 1 4 1.00 fmov z0.s, p0/m, #-0.12500000
# CHECK-NEXT: 1 9 0.50 fmsb z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 9 0.50 fmsb z0.h, p7/m, z1.h, z31.h
# CHECK-NEXT: 1 9 0.50 fmsb z0.s, p7/m, z1.s, z31.s
@@ -3280,9 +3280,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 7 1.00 incp xzr, p15.d
# CHECK-NEXT: 2 7 1.00 incp xzr, p15.h
# CHECK-NEXT: 2 7 1.00 incp xzr, p15.s
-# CHECK-NEXT: 1 12 1.00 U incp z31.d, p15.d
-# CHECK-NEXT: 1 12 1.00 U incp z31.h, p15.h
-# CHECK-NEXT: 1 12 1.00 U incp z31.s, p15.s
+# CHECK-NEXT: 1 12 1.00 incp z31.d, p15.d
+# CHECK-NEXT: 1 12 1.00 incp z31.h, p15.h
+# CHECK-NEXT: 1 12 1.00 incp z31.s, p15.s
# CHECK-NEXT: 1 1 0.50 incw x0
# CHECK-NEXT: 1 1 0.50 incw x0, #14
# CHECK-NEXT: 1 1 0.50 incw x0, all, mul #16
@@ -3334,7 +3334,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 10 1.00 insr z31.h, wzr
# CHECK-NEXT: 1 6 1.00 insr z31.s, s31
# CHECK-NEXT: 1 10 1.00 insr z31.s, wzr
-# CHECK-NEXT: 1 6 1.00 U lasta b0, p7, z31.b
+# CHECK-NEXT: 1 6 1.00 lasta b0, p7, z31.b
# CHECK-NEXT: 1 6 1.00 lasta d0, p7, z31.d
# CHECK-NEXT: 1 6 1.00 lasta h0, p7, z31.h
# CHECK-NEXT: 1 6 1.00 lasta s0, p7, z31.s
@@ -3342,7 +3342,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 29 1.00 lasta w0, p7, z31.h
# CHECK-NEXT: 1 29 1.00 lasta w0, p7, z31.s
# CHECK-NEXT: 1 29 1.00 lasta x0, p7, z31.d
-# CHECK-NEXT: 1 6 1.00 U lastb b0, p7, z31.b
+# CHECK-NEXT: 1 6 1.00 lastb b0, p7, z31.b
# CHECK-NEXT: 1 6 1.00 lastb d0, p7, z31.d
# CHECK-NEXT: 1 6 1.00 lastb h0, p7, z31.h
# CHECK-NEXT: 1 6 1.00 lastb s0, p7, z31.s
@@ -3352,68 +3352,68 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 29 1.00 lastb x0, p7, z31.d
# CHECK-NEXT: 1 11 0.50 * ld1b { z0.b }, p0/z, [sp, x0]
# CHECK-NEXT: 1 11 0.50 * ld1b { z0.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z0.b }, p0/z, [x0]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1b { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 1 23 4.00 * U ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 1 23 4.00 * U ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 1 19 4.00 * U ld1b { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z21.b }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 23 4.00 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 23 4.00 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 19 4.00 * ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 11 0.50 * ld1b { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: 1 11 0.50 * ld1b { z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1b { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1b { z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 19 4.00 * U ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 19 4.00 * ld1b { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: 1 11 0.50 * ld1b { z5.h }, p3/z, [x17, x16]
-# CHECK-NEXT: 1 20 2.00 * U ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: 1 20 2.00 * U ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-# CHECK-NEXT: 1 11 0.50 * U ld1d { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1d { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 11 0.50 * U ld1d { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 1 20 2.00 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 1 11 0.50 * ld1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 11 0.50 * ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 11 0.50 * ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
# CHECK-NEXT: 1 11 0.50 * ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
-# CHECK-NEXT: 1 20 2.00 * U ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
-# CHECK-NEXT: 1 11 0.50 * U ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1d { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1d { z31.d }, p7/z, [z31.d, #248]
-# CHECK-NEXT: 1 20 2.00 * U ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 1 20 2.00 * U ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1h { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 1 23 4.00 * U ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 1 23 4.00 * U ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 1 19 4.00 * U ld1h { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 1 11 0.50 * ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 1 20 2.00 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 1 20 2.00 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 23 4.00 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 23 4.00 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 19 4.00 * ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 11 0.50 * ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: 1 11 0.50 * ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: 1 20 2.00 * U ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1h { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1h { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 23 4.00 * U ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 1 23 4.00 * U ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: 1 19 4.00 * U ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 1 20 2.00 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 23 4.00 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 1 23 4.00 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 1 19 4.00 * ld1h { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: 1 11 0.50 * ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
# CHECK-NEXT: 1 11 0.50 * ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
# CHECK-NEXT: 1 11 0.50 * ld1rb { z0.b }, p0/z, [x0]
@@ -3468,146 +3468,146 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 11 0.50 * ld1rw { z0.s }, p0/z, [x0]
# CHECK-NEXT: 1 11 0.50 * ld1rw { z31.d }, p7/z, [sp, #252]
# CHECK-NEXT: 1 11 0.50 * ld1rw { z31.s }, p7/z, [sp, #252]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1sb { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 11 0.50 * ld1sb { z0.h }, p0/z, [sp, x0]
# CHECK-NEXT: 1 11 0.50 * ld1sb { z0.h }, p0/z, [x0, x0]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 1 23 4.00 * U ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 1 19 4.00 * U ld1sb { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 23 4.00 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 19 4.00 * ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 11 0.50 * ld1sb { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: 1 11 0.50 * ld1sb { z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1sb { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1sb { z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 11 0.50 * U ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 19 4.00 * U ld1sb { z31.s }, p7/z, [z31.s, #31]
-# CHECK-NEXT: 1 20 2.00 * U ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 1 20 2.00 * U ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 1 11 0.50 * U ld1sh { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1sh { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 23 4.00 * U ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 1 23 4.00 * U ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1sh { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 1 19 4.00 * U ld1sh { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 11 0.50 * U ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 19 4.00 * ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 1 20 2.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 1 20 2.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 11 0.50 * ld1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 23 4.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 23 4.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 19 4.00 * ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 11 0.50 * ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 11 0.50 * ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
-# CHECK-NEXT: 1 11 0.50 * U ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 11 0.50 * ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: 1 11 0.50 * ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: 1 20 2.00 * U ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 1 11 0.50 * U ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1sh { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1sh { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: 1 11 0.50 * U ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 23 4.00 * U ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 1 23 4.00 * U ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: 1 19 4.00 * U ld1sh { z31.s }, p7/z, [z31.s, #62]
-# CHECK-NEXT: 1 20 2.00 * U ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 1 20 2.00 * U ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 1 11 0.50 * U ld1sw { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1sw { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 11 0.50 * U ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 11 0.50 * ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 11 0.50 * ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 23 4.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 1 23 4.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 1 19 4.00 * ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 1 20 2.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 1 20 2.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 11 0.50 * ld1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 11 0.50 * ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 11 0.50 * ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
# CHECK-NEXT: 1 11 0.50 * ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: 1 20 2.00 * U ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: 1 11 0.50 * U ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1sw { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1sw { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: 1 20 2.00 * U ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 1 20 2.00 * U ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 1 11 0.50 * U ld1w { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 1 16 2.00 * U ld1w { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 23 4.00 * U ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 1 23 4.00 * U ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 11 0.50 * U ld1w { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 1 19 4.00 * U ld1w { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 11 0.50 * U ld1w { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 1 20 2.00 * U ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 11 0.50 * ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 1 20 2.00 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 1 20 2.00 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 11 0.50 * ld1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 16 2.00 * ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 23 4.00 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 23 4.00 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 11 0.50 * ld1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 19 4.00 * ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 11 0.50 * ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 20 2.00 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 11 0.50 * ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
-# CHECK-NEXT: 1 11 0.50 * U ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 11 0.50 * ld1w { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 11 0.50 * ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
# CHECK-NEXT: 1 11 0.50 * ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: 1 20 2.00 * U ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: 1 11 0.50 * U ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 20 2.00 * U ld1w { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 1 16 2.00 * U ld1w { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: 1 11 0.50 * U ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 23 4.00 * U ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT: 1 23 4.00 * U ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
-# CHECK-NEXT: 1 19 4.00 * U ld1w { z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT: 3 15 4.50 * U ld2b { z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 3 15 4.50 * U ld2b { z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT: 3 15 4.50 * U ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 3 15 4.50 * U ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 15 4.50 * U ld2b { z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 3 12 1.50 * U ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 11 1.00 * U ld2d { z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT: 2 11 1.00 * U ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 11 1.00 * U ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 12 1.50 * U ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 15 4.50 * U ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 3 15 4.50 * U ld2h { z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT: 3 15 4.50 * U ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 3 15 4.50 * U ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 15 4.50 * U ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 12 1.50 * U ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 11 1.00 * U ld2w { z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 1.00 * U ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 11 1.00 * U ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 12 1.50 * U ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 4 12 2.00 * U ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 3 11 1.50 * U ld3d { z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT: 3 11 1.50 * U ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 3 11 1.50 * U ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 12 2.00 * U ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 4 12 2.00 * U ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 3 11 1.50 * U ld3w { z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT: 3 11 1.50 * U ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 3 11 1.50 * U ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 12 2.00 * U ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 5 12 2.50 * U ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 11 2.00 * U ld4d { z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT: 4 11 2.00 * U ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 11 2.00 * U ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 12 2.50 * U ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 5 12 2.50 * U ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 11 2.00 * U ld4w { z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT: 4 11 2.00 * U ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 11 2.00 * U ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 12 2.50 * U ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 1 20 2.00 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 11 0.50 * ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 20 2.00 * ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 1 16 2.00 * ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 1 11 0.50 * ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 23 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 1 23 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 1 19 4.00 * ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 3 15 4.50 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 3 15 4.50 * ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: 3 15 4.50 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 3 15 4.50 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 15 4.50 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 3 12 1.50 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 11 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: 2 11 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 11 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 12 1.50 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 15 4.50 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 3 15 4.50 * ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: 3 15 4.50 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 3 15 4.50 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 15 4.50 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 12 1.50 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 11 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: 2 11 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 11 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 12 1.50 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 15 6.50 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 4 15 6.50 * ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 4 15 6.50 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 6.50 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 6.50 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 4 12 2.00 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 3 11 1.50 * ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 3 11 1.50 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 1.50 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 12 2.00 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 15 6.50 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 15 6.50 * ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 4 15 6.50 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 6.50 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 6.50 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 12 2.00 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 3 11 1.50 * ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 3 11 1.50 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 1.50 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 12 2.00 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 5 15 8.50 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 15 8.50 * ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 5 15 8.50 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 8.50 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 8.50 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 5 12 2.50 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 11 2.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 4 11 2.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 11 2.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 12 2.50 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 5 15 8.50 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 15 8.50 * ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 5 15 8.50 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 8.50 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 8.50 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 5 12 2.50 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 11 2.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 4 11 2.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 11 2.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 12 2.50 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: 1 11 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: 1 16 2.00 * U ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 11 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0]
@@ -3782,9 +3782,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 11 1.00 * ldr p0, [x0]
# CHECK-NEXT: 1 11 1.00 * ldr p5, [x10, #255, mul vl]
# CHECK-NEXT: 1 11 1.00 * ldr p7, [x13, #-256, mul vl]
-# CHECK-NEXT: 1 11 1.00 * U ldr z0, [x0]
-# CHECK-NEXT: 1 11 1.00 * U ldr z23, [x13, #255, mul vl]
-# CHECK-NEXT: 1 11 1.00 * U ldr z31, [sp, #-256, mul vl]
+# CHECK-NEXT: 1 11 1.00 * ldr z0, [x0]
+# CHECK-NEXT: 1 11 1.00 * ldr z23, [x13, #255, mul vl]
+# CHECK-NEXT: 1 11 1.00 * ldr z31, [sp, #-256, mul vl]
# CHECK-NEXT: 1 4 0.50 lsl z0.b, p0/m, z0.b, #0
# CHECK-NEXT: 1 4 0.50 lsl z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 4 0.50 lsl z0.b, p0/m, z0.b, z1.d
@@ -3811,10 +3811,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 lsl z31.h, z31.h, #15
# CHECK-NEXT: 1 4 0.50 lsl z31.s, p0/m, z31.s, #31
# CHECK-NEXT: 1 4 0.50 lsl z31.s, z31.s, #31
-# CHECK-NEXT: 1 4 0.50 U lslr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 4 0.50 U lslr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 4 0.50 U lslr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 4 0.50 U lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 4 0.50 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 4 0.50 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 4 0.50 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 4 0.50 lslr z0.s, p0/m, z0.s, z0.s
# CHECK-NEXT: 1 4 0.50 lsr z0.b, p0/m, z0.b, #1
# CHECK-NEXT: 1 4 0.50 lsr z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 4 0.50 lsr z0.b, p0/m, z0.b, z1.d
@@ -3841,10 +3841,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 lsr z31.h, z31.h, #16
# CHECK-NEXT: 1 4 0.50 lsr z31.s, p0/m, z31.s, #32
# CHECK-NEXT: 1 4 0.50 lsr z31.s, z31.s, #32
-# CHECK-NEXT: 1 4 0.50 U lsrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 4 0.50 U lsrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 4 0.50 U lsrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 4 0.50 U lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 4 0.50 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 4 0.50 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 4 0.50 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 4 0.50 lsrr z0.s, p0/m, z0.s, z0.s
# CHECK-NEXT: 1 9 0.50 mad z0.b, p7/m, z1.b, z31.b
# CHECK-NEXT: 1 9 0.50 mad z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 9 0.50 mad z0.h, p7/m, z1.h, z31.h
@@ -3865,7 +3865,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 1.00 mov p15.b, p15/z, p15.b
# CHECK-NEXT: 1 4 1.00 mov z0.b, #127
# CHECK-NEXT: 1 4 1.00 mov z0.b, b0
-# CHECK-NEXT: 1 6 1.00 U mov z0.b, p0/m, b0
+# CHECK-NEXT: 1 6 1.00 mov z0.b, p0/m, b0
# CHECK-NEXT: 1 8 1.00 mov z0.b, p0/m, w0
# CHECK-NEXT: 1 4 1.00 mov z0.b, p0/z, #127
# CHECK-NEXT: 1 6 1.00 mov z0.b, w0
@@ -3927,14 +3927,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 mov z21.s, p15/m, #-128
# CHECK-NEXT: 1 4 1.00 mov z21.s, p15/m, #-32768
# CHECK-NEXT: 1 4 0.50 mov z31.b, p15/m, z31.b
-# CHECK-NEXT: 1 6 1.00 U mov z31.b, p7/m, b31
+# CHECK-NEXT: 1 6 1.00 mov z31.b, p7/m, b31
# CHECK-NEXT: 1 1 0.17 movprfx z31, z6
# CHECK-NEXT: 1 8 1.00 mov z31.b, p7/m, wsp
# CHECK-NEXT: 1 6 1.00 mov z31.b, wsp
# CHECK-NEXT: 1 4 1.00 mov z31.b, z31.b[63]
# CHECK-NEXT: 1 4 0.50 mov z31.d, p15/m, z31.d
# CHECK-NEXT: 1 6 1.00 mov z31.d, p7/m, d31
-# CHECK-NEXT: 1 1 0.17 U movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: 1 1 0.17 movprfx z31.d, p7/z, z6.d
# CHECK-NEXT: 1 8 1.00 mov z31.d, p7/m, sp
# CHECK-NEXT: 1 6 1.00 mov z31.d, sp
# CHECK-NEXT: 1 4 0.50 mov z31.d, z0.d
@@ -3960,10 +3960,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 mov z5.h, #-6
# CHECK-NEXT: 1 4 1.00 mov z5.q, z17.q[3]
# CHECK-NEXT: 1 4 1.00 mov z5.s, #-6
-# CHECK-NEXT: 1 3 1.00 U movs p0.b, p0.b
-# CHECK-NEXT: 1 3 1.00 U movs p0.b, p0/z, p0.b
-# CHECK-NEXT: 1 3 1.00 U movs p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U movs p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 1.00 movs p0.b, p0.b
+# CHECK-NEXT: 1 3 1.00 movs p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 3 1.00 movs p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 movs p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 0.17 U mrs x3, ID_AA64ZFR0_EL1
# CHECK-NEXT: 1 1 0.17 U mrs x3, ZCR_EL1
# CHECK-NEXT: 1 1 0.17 U mrs x3, ZCR_EL12
@@ -3991,8 +3991,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 9 1.00 mul z31.s, z31.s, #127
# CHECK-NEXT: 1 3 1.00 nand p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 3 1.00 nand p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U nands p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 1 3 1.00 U nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 3 1.00 nands p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 4 1.00 neg z0.b, p0/m, z0.b
# CHECK-NEXT: 1 4 1.00 neg z0.d, p0/m, z0.d
# CHECK-NEXT: 1 4 1.00 neg z0.h, p0/m, z0.h
@@ -4003,20 +4003,20 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 neg z31.s, p7/m, z31.s
# CHECK-NEXT: 1 3 1.00 nor p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 3 1.00 nor p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U nors p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 1 3 1.00 U nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 3 1.00 nors p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 1.00 not p0.b, p0/z, p0.b
# CHECK-NEXT: 1 3 1.00 not p15.b, p15/z, p15.b
# CHECK-NEXT: 1 4 0.50 not z31.b, p7/m, z31.b
# CHECK-NEXT: 1 4 0.50 not z31.d, p7/m, z31.d
# CHECK-NEXT: 1 4 0.50 not z31.h, p7/m, z31.h
# CHECK-NEXT: 1 4 0.50 not z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 U nots p0.b, p0/z, p0.b
-# CHECK-NEXT: 1 3 1.00 U nots p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 1.00 nots p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 3 1.00 nots p15.b, p15/z, p15.b
# CHECK-NEXT: 1 3 1.00 orn p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 3 1.00 orn p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 U orns p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 1 3 1.00 U orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 1.00 orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 3 1.00 orns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 1.00 orr p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 1 4 1.00 orr z0.d, z0.d, #0x6
# CHECK-NEXT: 1 4 1.00 orr z0.d, z0.d, #0xfffffffffffffff9
@@ -4031,7 +4031,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 orr z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 orr z5.b, z5.b, #0x6
# CHECK-NEXT: 1 4 1.00 orr z5.b, z5.b, #0xf9
-# CHECK-NEXT: 1 3 1.00 U orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 3 1.00 orrs p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 10 46 5.00 orv b0, p7, z31.b
# CHECK-NEXT: 7 34 3.50 orv d0, p7, z31.d
# CHECK-NEXT: 9 42 4.50 orv h0, p7, z31.h
@@ -4048,8 +4048,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.50 * * U prfb #15, p0, [x0]
# CHECK-NEXT: 1 1 0.50 * * U prfb #6, p0, [x0]
# CHECK-NEXT: 1 1 0.50 * * U prfb #7, p0, [x0]
-# CHECK-NEXT: 1 1 2.00 * * prfb #7, p3, [z13.s, #31]
-# CHECK-NEXT: 1 1 2.00 * * prfb #7, p3, [z13.s]
+# CHECK-NEXT: 1 1 2.00 * * U prfb #7, p3, [z13.s, #31]
+# CHECK-NEXT: 1 1 2.00 * * U prfb #7, p3, [z13.s]
# CHECK-NEXT: 1 1 1.00 * * U prfb pldl1keep, p0, [x0, z0.d, uxtw]
# CHECK-NEXT: 1 1 1.00 * * U prfb pldl1keep, p0, [x0, z0.d]
# CHECK-NEXT: 1 1 2.00 * * U prfb pldl1keep, p0, [x0, z0.s, uxtw]
@@ -4075,8 +4075,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.50 * * U prfd #15, p0, [x0]
# CHECK-NEXT: 1 1 1.00 * * U prfd #15, p7, [z31.d, #248]
# CHECK-NEXT: 1 1 1.00 * * U prfd #15, p7, [z31.d]
-# CHECK-NEXT: 1 1 2.00 * * prfd #15, p7, [z31.s, #248]
-# CHECK-NEXT: 1 1 2.00 * * prfd #15, p7, [z31.s]
+# CHECK-NEXT: 1 1 2.00 * * U prfd #15, p7, [z31.s, #248]
+# CHECK-NEXT: 1 1 2.00 * * U prfd #15, p7, [z31.s]
# CHECK-NEXT: 1 1 0.50 * * U prfd #6, p0, [x0]
# CHECK-NEXT: 1 1 0.50 * * U prfd #7, p0, [x0]
# CHECK-NEXT: 1 1 1.00 * * U prfd pldl1keep, p0, [x0, z0.d, lsl #3]
@@ -4102,8 +4102,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.50 * * U prfh #15, p0, [x0]
# CHECK-NEXT: 1 1 1.00 * * U prfh #15, p7, [z31.d, #62]
# CHECK-NEXT: 1 1 1.00 * * U prfh #15, p7, [z31.d]
-# CHECK-NEXT: 1 1 2.00 * * prfh #15, p7, [z31.s, #62]
-# CHECK-NEXT: 1 1 2.00 * * prfh #15, p7, [z31.s]
+# CHECK-NEXT: 1 1 2.00 * * U prfh #15, p7, [z31.s, #62]
+# CHECK-NEXT: 1 1 2.00 * * U prfh #15, p7, [z31.s]
# CHECK-NEXT: 1 1 0.50 * * U prfh #6, p0, [x0]
# CHECK-NEXT: 1 1 0.50 * * U prfh #7, p0, [x0]
# CHECK-NEXT: 1 1 1.00 * * U prfh pldl1keep, p0, [x0, z0.d, lsl #1]
@@ -4129,8 +4129,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.50 * * U prfw #15, p0, [x0]
# CHECK-NEXT: 1 1 1.00 * * U prfw #15, p7, [z31.d, #124]
# CHECK-NEXT: 1 1 1.00 * * U prfw #15, p7, [z31.d]
-# CHECK-NEXT: 1 1 2.00 * * prfw #15, p7, [z31.s, #124]
-# CHECK-NEXT: 1 1 2.00 * * prfw #15, p7, [z31.s]
+# CHECK-NEXT: 1 1 2.00 * * U prfw #15, p7, [z31.s, #124]
+# CHECK-NEXT: 1 1 2.00 * * U prfw #15, p7, [z31.s]
# CHECK-NEXT: 1 1 0.50 * * U prfw #6, p0, [x0]
# CHECK-NEXT: 1 1 0.50 * * U prfw #7, p0, [x0]
# CHECK-NEXT: 1 1 1.00 * * U prfw #7, p3, [x13, z8.d, uxtw #2]
@@ -4193,45 +4193,45 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 1.00 ptrue p7.s, vl64
# CHECK-NEXT: 1 3 1.00 ptrue p7.s, vl7
# CHECK-NEXT: 1 3 1.00 ptrue p7.s, vl8
-# CHECK-NEXT: 1 3 1.00 U ptrues p0.b, pow2
-# CHECK-NEXT: 1 3 1.00 U ptrues p0.d, pow2
-# CHECK-NEXT: 1 3 1.00 U ptrues p0.h, pow2
-# CHECK-NEXT: 1 3 1.00 U ptrues p0.s, pow2
-# CHECK-NEXT: 1 3 1.00 U ptrues p15.b
-# CHECK-NEXT: 1 3 1.00 U ptrues p15.d
-# CHECK-NEXT: 1 3 1.00 U ptrues p15.h
-# CHECK-NEXT: 1 3 1.00 U ptrues p15.s
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #14
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #15
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #16
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #17
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #18
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #19
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #20
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #21
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #22
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #23
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #24
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #25
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #26
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #27
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, #28
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, mul3
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, mul4
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl1
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl128
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl16
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl2
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl256
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl3
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl32
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl4
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl5
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl6
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl64
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl7
-# CHECK-NEXT: 1 3 1.00 U ptrues p7.s, vl8
+# CHECK-NEXT: 1 3 1.00 ptrues p0.b, pow2
+# CHECK-NEXT: 1 3 1.00 ptrues p0.d, pow2
+# CHECK-NEXT: 1 3 1.00 ptrues p0.h, pow2
+# CHECK-NEXT: 1 3 1.00 ptrues p0.s, pow2
+# CHECK-NEXT: 1 3 1.00 ptrues p15.b
+# CHECK-NEXT: 1 3 1.00 ptrues p15.d
+# CHECK-NEXT: 1 3 1.00 ptrues p15.h
+# CHECK-NEXT: 1 3 1.00 ptrues p15.s
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #14
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #15
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #16
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #17
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #18
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #19
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #20
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #21
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #22
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #23
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #24
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #25
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #26
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #27
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, #28
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, mul3
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, mul4
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl1
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl128
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl16
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl2
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl256
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl3
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl32
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl4
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl5
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl6
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl64
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl7
+# CHECK-NEXT: 1 3 1.00 ptrues p7.s, vl8
# CHECK-NEXT: 1 3 1.00 punpkhi p0.h, p0.b
# CHECK-NEXT: 1 3 1.00 punpkhi p15.h, p15.b
# CHECK-NEXT: 1 3 1.00 punpklo p0.h, p0.b
@@ -4347,19 +4347,19 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecb x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecb x0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecb x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqdecb x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqdecb x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqdecb x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqdecb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdecb x0, w0
+# CHECK-NEXT: 1 2 1.00 sqdecb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdecb x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqdecb x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecd x0
# CHECK-NEXT: 1 2 1.00 sqdecd x0, #14
# CHECK-NEXT: 1 2 1.00 sqdecd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecd x0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecd x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqdecd x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqdecd x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqdecd x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqdecd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0
+# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, pow2, mul #16
# CHECK-NEXT: 1 4 0.50 sqdecd z0.d
# CHECK-NEXT: 1 4 0.50 sqdecd z0.d, all, mul #16
# CHECK-NEXT: 1 4 0.50 sqdecd z0.d, pow2
@@ -4369,10 +4369,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdech x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech x0, pow2
# CHECK-NEXT: 1 2 1.00 sqdech x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqdech x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqdech x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqdech x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqdech x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdech x0, w0
+# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, pow2, mul #16
# CHECK-NEXT: 1 4 0.50 sqdech z0.h
# CHECK-NEXT: 1 4 0.50 sqdech z0.h, all, mul #16
# CHECK-NEXT: 1 4 0.50 sqdech z0.h, pow2
@@ -4381,10 +4381,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 6 1.50 sqdecp x0, p0.d
# CHECK-NEXT: 2 6 1.50 sqdecp x0, p0.h
# CHECK-NEXT: 2 6 1.50 sqdecp x0, p0.s
-# CHECK-NEXT: 2 6 1.50 U sqdecp xzr, p15.b, wzr
-# CHECK-NEXT: 2 6 1.50 U sqdecp xzr, p15.d, wzr
-# CHECK-NEXT: 2 6 1.50 U sqdecp xzr, p15.h, wzr
-# CHECK-NEXT: 2 6 1.50 U sqdecp xzr, p15.s, wzr
+# CHECK-NEXT: 2 6 1.50 sqdecp xzr, p15.b, wzr
+# CHECK-NEXT: 2 6 1.50 sqdecp xzr, p15.d, wzr
+# CHECK-NEXT: 2 6 1.50 sqdecp xzr, p15.h, wzr
+# CHECK-NEXT: 2 6 1.50 sqdecp xzr, p15.s, wzr
# CHECK-NEXT: 1 12 1.00 sqdecp z0.d, p0.d
# CHECK-NEXT: 1 12 1.00 sqdecp z0.h, p0.h
# CHECK-NEXT: 1 12 1.00 sqdecp z0.s, p0.s
@@ -4393,10 +4393,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecw x0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecw x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqdecw x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqdecw x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqdecw x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqdecw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0
+# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, pow2, mul #16
# CHECK-NEXT: 1 4 0.50 sqdecw z0.s
# CHECK-NEXT: 1 4 0.50 sqdecw z0.s, all, mul #16
# CHECK-NEXT: 1 4 0.50 sqdecw z0.s, pow2
@@ -4406,19 +4406,19 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincb x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincb x0, pow2
# CHECK-NEXT: 1 2 1.00 sqincb x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqincb x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqincb x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqincb x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqincb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqincb x0, w0
+# CHECK-NEXT: 1 2 1.00 sqincb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqincb x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqincb x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincd x0
# CHECK-NEXT: 1 2 1.00 sqincd x0, #14
# CHECK-NEXT: 1 2 1.00 sqincd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincd x0, pow2
# CHECK-NEXT: 1 2 1.00 sqincd x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqincd x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqincd x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqincd x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqincd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqincd x0, w0
+# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, pow2, mul #16
# CHECK-NEXT: 1 4 0.50 sqincd z0.d
# CHECK-NEXT: 1 4 0.50 sqincd z0.d, all, mul #16
# CHECK-NEXT: 1 4 0.50 sqincd z0.d, pow2
@@ -4428,10 +4428,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqinch x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch x0, pow2
# CHECK-NEXT: 1 2 1.00 sqinch x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqinch x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqinch x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqinch x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqinch x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqinch x0, w0
+# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, pow2, mul #16
# CHECK-NEXT: 1 4 0.50 sqinch z0.h
# CHECK-NEXT: 1 4 0.50 sqinch z0.h, all, mul #16
# CHECK-NEXT: 1 4 0.50 sqinch z0.h, pow2
@@ -4440,10 +4440,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 6 1.50 sqincp x0, p0.d
# CHECK-NEXT: 2 6 1.50 sqincp x0, p0.h
# CHECK-NEXT: 2 6 1.50 sqincp x0, p0.s
-# CHECK-NEXT: 2 6 1.50 U sqincp xzr, p15.b, wzr
-# CHECK-NEXT: 2 6 1.50 U sqincp xzr, p15.d, wzr
-# CHECK-NEXT: 2 6 1.50 U sqincp xzr, p15.h, wzr
-# CHECK-NEXT: 2 6 1.50 U sqincp xzr, p15.s, wzr
+# CHECK-NEXT: 2 6 1.50 sqincp xzr, p15.b, wzr
+# CHECK-NEXT: 2 6 1.50 sqincp xzr, p15.d, wzr
+# CHECK-NEXT: 2 6 1.50 sqincp xzr, p15.h, wzr
+# CHECK-NEXT: 2 6 1.50 sqincp xzr, p15.s, wzr
# CHECK-NEXT: 1 12 1.00 sqincp z0.d, p0.d
# CHECK-NEXT: 1 12 1.00 sqincp z0.h, p0.h
# CHECK-NEXT: 1 12 1.00 sqincp z0.s, p0.s
@@ -4452,10 +4452,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincw x0, pow2
# CHECK-NEXT: 1 2 1.00 sqincw x0, vl1
-# CHECK-NEXT: 1 2 1.00 U sqincw x0, w0
-# CHECK-NEXT: 1 2 1.00 U sqincw x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 1.00 U sqincw x0, w0, pow2
-# CHECK-NEXT: 1 2 1.00 U sqincw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 1.00 sqincw x0, w0
+# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, pow2
+# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, pow2, mul #16
# CHECK-NEXT: 1 4 0.50 sqincw z0.s
# CHECK-NEXT: 1 4 0.50 sqincw z0.s, all, mul #16
# CHECK-NEXT: 1 4 0.50 sqincw z0.s, pow2
@@ -4560,66 +4560,66 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 4 16 4.00 * st1w { z31.d }, p7, [z31.d, #124]
# CHECK-NEXT: 1 11 1.00 * st1w { z31.s }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: 8 16 12.00 * st1w { z31.s }, p7, [z31.s, #124]
-# CHECK-NEXT: 3 12 8.00 * U st2b { z0.b, z1.b }, p0, [x0, x0]
-# CHECK-NEXT: 3 12 8.00 * U st2b { z0.b, z1.b }, p0, [x0]
-# CHECK-NEXT: 3 12 8.00 * U st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 3 12 8.00 * U st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 12 8.00 * U st2b { z5.b, z6.b }, p3, [x17, x16]
-# CHECK-NEXT: 2 11 2.00 * U st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 3 12 2.00 * U st2d { z0.d, z1.d }, p0, [x0]
-# CHECK-NEXT: 3 12 2.00 * U st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 3 12 2.00 * U st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 11 2.00 * U st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 12 8.00 * U st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 3 12 8.00 * U st2h { z0.h, z1.h }, p0, [x0]
-# CHECK-NEXT: 3 12 8.00 * U st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 3 12 8.00 * U st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 12 8.00 * U st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 2 11 2.00 * U st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 3 12 2.00 * U st2w { z0.s, z1.s }, p0, [x0]
-# CHECK-NEXT: 3 12 2.00 * U st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 3 12 2.00 * U st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 11 2.00 * U st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b - z2.b }, p0, [x0, x0]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b - z2.b }, p0, [x0]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z5.b - z7.b }, p3, [x17, x16]
-# CHECK-NEXT: 3 11 3.00 * U st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 12 3.00 * U st3d { z0.d - z2.d }, p0, [x0]
-# CHECK-NEXT: 4 12 3.00 * U st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 12 3.00 * U st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 11 3.00 * U st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h - z2.h }, p0, [x0]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 11 3.00 * U st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 12 3.00 * U st3w { z0.s - z2.s }, p0, [x0]
-# CHECK-NEXT: 4 12 3.00 * U st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 12 3.00 * U st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 11 3.00 * U st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b - z3.b }, p0, [x0, x0]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b - z3.b }, p0, [x0]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z5.b - z8.b }, p3, [x17, x16]
-# CHECK-NEXT: 4 11 4.00 * U st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 5 12 4.00 * U st4d { z0.d - z3.d }, p0, [x0]
-# CHECK-NEXT: 5 12 4.00 * U st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 12 4.00 * U st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 4 11 4.00 * U st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h - z3.h }, p0, [x0]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 4 11 4.00 * U st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 5 12 4.00 * U st4w { z0.s - z3.s }, p0, [x0]
-# CHECK-NEXT: 5 12 4.00 * U st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 12 4.00 * U st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 4 11 4.00 * U st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 12 8.00 * st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: 3 12 8.00 * st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: 3 12 8.00 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 3 12 8.00 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 12 8.00 * st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: 2 11 2.00 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 3 12 2.00 * st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: 3 12 2.00 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 3 12 2.00 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 11 2.00 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 12 8.00 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 3 12 8.00 * st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: 3 12 8.00 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 3 12 8.00 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 12 8.00 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 2 11 2.00 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 3 12 2.00 * st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: 3 12 2.00 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 3 12 2.00 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 11 2.00 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 15 12.00 * st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 4 15 12.00 * st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 4 15 12.00 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 12.00 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 12.00 * st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 3 11 3.00 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 12 3.00 * st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 4 12 3.00 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 12 3.00 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 11 3.00 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 15 12.00 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 15 12.00 * st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 4 15 12.00 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 12.00 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 12.00 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 11 3.00 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 12 3.00 * st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 4 12 3.00 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 12 3.00 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 11 3.00 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 5 15 16.00 * st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 5 15 16.00 * st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 5 15 16.00 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 16.00 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 16.00 * st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 4 11 4.00 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 5 12 4.00 * st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 5 12 4.00 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 12 4.00 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 11 4.00 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 5 15 16.00 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 15 16.00 * st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 5 15 16.00 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 16.00 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 16.00 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 11 4.00 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 12 4.00 * st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 5 12 4.00 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 12 4.00 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 11 4.00 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: 1 11 1.00 * stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 1 11 1.00 * stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: 1 11 1.00 * stnt1b { z21.b }, p5, [x10, #7, mul vl]
@@ -4639,9 +4639,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 11 1.00 * str p0, [x0]
# CHECK-NEXT: 1 11 1.00 * str p15, [sp, #-256, mul vl]
# CHECK-NEXT: 1 11 1.00 * str p5, [x10, #255, mul vl]
-# CHECK-NEXT: 1 11 1.00 * U str z0, [x0]
-# CHECK-NEXT: 1 11 1.00 * U str z21, [x10, #-256, mul vl]
-# CHECK-NEXT: 1 11 1.00 * U str z31, [sp, #255, mul vl]
+# CHECK-NEXT: 1 11 1.00 * str z0, [x0]
+# CHECK-NEXT: 1 11 1.00 * str z21, [x10, #-256, mul vl]
+# CHECK-NEXT: 1 11 1.00 * str z31, [sp, #255, mul vl]
# CHECK-NEXT: 1 4 0.50 sub z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 4 1.00 sub z0.b, z0.b, #0
# CHECK-NEXT: 1 4 0.50 sub z0.b, z0.b, z0.b
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index 5891350b78022..0f4003671a36c 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -3486,10 +3486,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 addp z0.h, p0/m, z0.h, z1.h
# CHECK-NEXT: 1 2 0.50 addp z29.s, p7/m, z29.s, z30.s
# CHECK-NEXT: 1 2 0.50 addp z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT: 1 2 0.50 U addpl sp, sp, #31
-# CHECK-NEXT: 1 2 0.50 U addpl x0, x0, #-32
-# CHECK-NEXT: 1 2 0.50 U addpl x21, x21, #0
-# CHECK-NEXT: 1 2 0.50 U addpl x23, x8, #-1
+# CHECK-NEXT: 1 2 0.50 addpl sp, sp, #31
+# CHECK-NEXT: 1 2 0.50 addpl x0, x0, #-32
+# CHECK-NEXT: 1 2 0.50 addpl x21, x21, #0
+# CHECK-NEXT: 1 2 0.50 addpl x23, x8, #-1
# CHECK-NEXT: 1 2 0.50 addvl sp, sp, #31
# CHECK-NEXT: 1 2 0.50 addvl x0, x0, #-32
# CHECK-NEXT: 1 2 0.50 addvl x21, x21, #0
@@ -3531,7 +3531,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 and z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0x6
# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0xf9
-# CHECK-NEXT: 2 2 1.00 U ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 2 1.00 ands p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 2 6 1.00 andv b0, p7, z31.b
# CHECK-NEXT: 2 6 1.00 andv d0, p7, z31.d
# CHECK-NEXT: 2 6 1.00 andv h0, p7, z31.h
@@ -3570,10 +3570,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 asrd z31.d, p0/m, z31.d, #64
# CHECK-NEXT: 1 4 1.00 asrd z31.h, p0/m, z31.h, #16
# CHECK-NEXT: 1 4 1.00 asrd z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: 1 2 1.00 U asrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 U asrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 U asrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 U asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 asrr z0.s, p0/m, z0.s, z0.s
# CHECK-NEXT: 1 2 1.00 bcax z29.d, z29.d, z30.d, z31.d
# CHECK-NEXT: 2 6 2.00 bdep z0.b, z1.b, z31.b
# CHECK-NEXT: 2 6 2.00 bdep z0.d, z1.d, z31.d
@@ -3583,8 +3583,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 6 2.00 bext z0.d, z1.d, z31.d
# CHECK-NEXT: 2 6 2.00 bext z0.h, z1.h, z31.h
# CHECK-NEXT: 2 6 2.00 bext z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 3 1.00 U bfcvt z0.h, p0/m, z1.s
-# CHECK-NEXT: 1 3 1.00 U bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 3 1.00 bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 3 1.00 bfcvtnt z0.h, p0/m, z1.s
# CHECK-NEXT: 1 4 0.50 bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: 1 4 0.50 bfdot z0.s, z1.h, z2.h[0]
# CHECK-NEXT: 1 4 0.50 bfdot z0.s, z1.h, z2.h[3]
@@ -3611,26 +3611,26 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 bic z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 bic z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 bic z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 2 2 1.00 U bics p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 1.00 U bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 2 1.00 bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 2 2 1.00 bics p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 0.50 brka p0.b, p15/m, p15.b
# CHECK-NEXT: 1 2 0.50 brka p0.b, p15/z, p15.b
-# CHECK-NEXT: 1 3 0.50 U brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 0.50 brkas p0.b, p15/z, p15.b
# CHECK-NEXT: 1 2 0.50 brkb p0.b, p15/m, p15.b
# CHECK-NEXT: 1 2 0.50 brkb p0.b, p15/z, p15.b
-# CHECK-NEXT: 1 3 0.50 U brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 0.50 brkbs p0.b, p15/z, p15.b
# CHECK-NEXT: 1 2 2.00 brkn p0.b, p15/z, p1.b, p0.b
# CHECK-NEXT: 1 2 2.00 brkn p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 3 1.00 U brkns p0.b, p15/z, p1.b, p0.b
-# CHECK-NEXT: 2 3 1.00 U brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 3 1.00 brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 2 3 1.00 brkns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 2.00 brkpa p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 2 2.00 brkpa p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 3 1.00 U brkpas p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT: 2 3 1.00 U brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 3 1.00 brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 2 3 1.00 brkpas p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 2.00 brkpb p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 2 2.00 brkpb p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 3 1.00 U brkpbs p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT: 2 3 1.00 U brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 3 1.00 brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 2 3 1.00 brkpbs p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 0.50 bsl z0.d, z0.d, z1.d, z2.d
# CHECK-NEXT: 1 2 0.50 bsl1n z0.d, z0.d, z1.d, z2.d
# CHECK-NEXT: 1 2 0.50 bsl2n z0.d, z0.d, z1.d, z2.d
@@ -3652,7 +3652,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 cdot z29.d, z30.h, z0.h[0], #180
# CHECK-NEXT: 1 4 1.00 cdot z31.d, z30.h, z7.h[1], #270
# CHECK-NEXT: 1 4 1.00 cdot z5.d, z6.h, z3.h[0], #90
-# CHECK-NEXT: 1 3 1.00 U clasta b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 clasta b0, p7, b0, z31.b
# CHECK-NEXT: 1 3 1.00 clasta d0, p7, d0, z31.d
# CHECK-NEXT: 1 3 1.00 clasta h0, p7, h0, z31.h
# CHECK-NEXT: 1 3 1.00 clasta s0, p7, s0, z31.s
@@ -3664,7 +3664,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 1.00 clasta z0.d, p7, z0.d, z31.d
# CHECK-NEXT: 1 3 1.00 clasta z0.h, p7, z0.h, z31.h
# CHECK-NEXT: 1 3 1.00 clasta z0.s, p7, z0.s, z31.s
-# CHECK-NEXT: 1 3 1.00 U clastb b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 clastb b0, p7, b0, z31.b
# CHECK-NEXT: 1 3 1.00 clastb d0, p7, d0, z31.d
# CHECK-NEXT: 1 3 1.00 clastb h0, p7, h0, z31.h
# CHECK-NEXT: 1 3 1.00 clastb s0, p7, s0, z31.s
@@ -3884,14 +3884,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 cntw x0, pow2
# CHECK-NEXT: 1 3 1.00 compact z31.d, p7, z31.d
# CHECK-NEXT: 1 3 1.00 compact z31.s, p7, z31.s
-# CHECK-NEXT: 1 1 0.50 U ctermeq w30, wzr
-# CHECK-NEXT: 1 1 0.50 U ctermeq wzr, w30
-# CHECK-NEXT: 1 1 0.50 U ctermeq x30, xzr
-# CHECK-NEXT: 1 1 0.50 U ctermeq xzr, x30
-# CHECK-NEXT: 1 1 0.50 U ctermne w30, wzr
-# CHECK-NEXT: 1 1 0.50 U ctermne wzr, w30
-# CHECK-NEXT: 1 1 0.50 U ctermne x30, xzr
-# CHECK-NEXT: 1 1 0.50 U ctermne xzr, x30
+# CHECK-NEXT: 1 1 0.50 ctermeq w30, wzr
+# CHECK-NEXT: 1 1 0.50 ctermeq wzr, w30
+# CHECK-NEXT: 1 1 0.50 ctermeq x30, xzr
+# CHECK-NEXT: 1 1 0.50 ctermeq xzr, x30
+# CHECK-NEXT: 1 1 0.50 ctermne w30, wzr
+# CHECK-NEXT: 1 1 0.50 ctermne wzr, w30
+# CHECK-NEXT: 1 1 0.50 ctermne x30, xzr
+# CHECK-NEXT: 1 1 0.50 ctermne xzr, x30
# CHECK-NEXT: 1 2 0.50 decb x0
# CHECK-NEXT: 1 2 0.50 decb x0, #14
# CHECK-NEXT: 1 2 0.50 decb x0, all, mul #16
@@ -3915,9 +3915,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 decp xzr, p15.d
# CHECK-NEXT: 1 2 0.50 decp xzr, p15.h
# CHECK-NEXT: 1 2 0.50 decp xzr, p15.s
-# CHECK-NEXT: 3 7 1.00 U decp z31.d, p15.d
-# CHECK-NEXT: 3 7 1.00 U decp z31.h, p15.h
-# CHECK-NEXT: 3 7 1.00 U decp z31.s, p15.s
+# CHECK-NEXT: 3 7 1.00 decp z31.d, p15.d
+# CHECK-NEXT: 3 7 1.00 decp z31.h, p15.h
+# CHECK-NEXT: 3 7 1.00 decp z31.s, p15.s
# CHECK-NEXT: 1 2 0.50 decw x0
# CHECK-NEXT: 1 2 0.50 decw x0, #14
# CHECK-NEXT: 1 2 0.50 decw x0, all, mul #16
@@ -3947,7 +3947,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 eorbt z0.d, z1.d, z31.d
# CHECK-NEXT: 1 2 0.50 eorbt z0.h, z1.h, z31.h
# CHECK-NEXT: 1 2 0.50 eorbt z0.s, z1.s, z31.s
-# CHECK-NEXT: 2 2 1.00 U eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 2 1.00 eors p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 1 2 0.50 eortb z0.b, z1.b, z31.b
# CHECK-NEXT: 1 2 0.50 eortb z0.d, z1.d, z31.d
# CHECK-NEXT: 1 2 0.50 eortb z0.h, z1.h, z31.h
@@ -3956,10 +3956,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 6 1.00 eorv d0, p7, z31.d
# CHECK-NEXT: 2 6 1.00 eorv h0, p7, z31.h
# CHECK-NEXT: 2 6 1.00 eorv s0, p7, z31.s
-# CHECK-NEXT: 1 2 0.50 U ext z0.b, { z1.b, z2.b }, #0
+# CHECK-NEXT: 1 2 0.50 ext z0.b, { z1.b, z2.b }, #0
# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #255
-# CHECK-NEXT: 1 2 0.50 U ext z31.b, { z30.b, z31.b }, #255
+# CHECK-NEXT: 1 2 0.50 ext z31.b, { z30.b, z31.b }, #255
# CHECK-NEXT: 1 2 0.50 fabd z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 2 0.50 fabd z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 2 0.50 fabd z0.s, p7/m, z0.s, z31.s
@@ -4189,12 +4189,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 fmlslt z30.s, z31.h, z7.h[7]
# CHECK-NEXT: 1 2 0.50 fmov z0.d, #-10.00000000
# CHECK-NEXT: 1 2 0.50 fmov z0.d, #0.12500000
-# CHECK-NEXT: 1 2 0.50 U fmov z0.d, p0/m, #-10.00000000
-# CHECK-NEXT: 1 2 0.50 U fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: 1 2 0.50 fmov z0.d, p0/m, #0.12500000
# CHECK-NEXT: 1 2 0.50 fmov z0.h, #-0.12500000
-# CHECK-NEXT: 1 2 0.50 U fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.h, p0/m, #-0.12500000
# CHECK-NEXT: 1 2 0.50 fmov z0.s, #-0.12500000
-# CHECK-NEXT: 1 2 0.50 U fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.s, p0/m, #-0.12500000
# CHECK-NEXT: 1 4 0.50 fmsb z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 4 0.50 fmsb z0.h, p7/m, z1.h, z31.h
# CHECK-NEXT: 1 4 0.50 fmsb z0.s, p7/m, z1.s, z31.s
@@ -4336,9 +4336,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 incp xzr, p15.d
# CHECK-NEXT: 1 2 0.50 incp xzr, p15.h
# CHECK-NEXT: 1 2 0.50 incp xzr, p15.s
-# CHECK-NEXT: 3 7 1.00 U incp z31.d, p15.d
-# CHECK-NEXT: 3 7 1.00 U incp z31.h, p15.h
-# CHECK-NEXT: 3 7 1.00 U incp z31.s, p15.s
+# CHECK-NEXT: 3 7 1.00 incp z31.d, p15.d
+# CHECK-NEXT: 3 7 1.00 incp z31.h, p15.h
+# CHECK-NEXT: 3 7 1.00 incp z31.s, p15.s
# CHECK-NEXT: 1 2 0.50 incw x0
# CHECK-NEXT: 1 2 0.50 incw x0, #14
# CHECK-NEXT: 1 2 0.50 incw x0, all, mul #16
@@ -4390,7 +4390,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 5 1.00 insr z31.h, wzr
# CHECK-NEXT: 1 3 1.00 insr z31.s, s31
# CHECK-NEXT: 2 5 1.00 insr z31.s, wzr
-# CHECK-NEXT: 1 3 1.00 U lasta b0, p7, z31.b
+# CHECK-NEXT: 1 3 1.00 lasta b0, p7, z31.b
# CHECK-NEXT: 1 3 1.00 lasta d0, p7, z31.d
# CHECK-NEXT: 1 3 1.00 lasta h0, p7, z31.h
# CHECK-NEXT: 1 3 1.00 lasta s0, p7, z31.s
@@ -4398,7 +4398,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 5 1.00 lasta w0, p7, z31.h
# CHECK-NEXT: 2 5 1.00 lasta w0, p7, z31.s
# CHECK-NEXT: 2 5 1.00 lasta x0, p7, z31.d
-# CHECK-NEXT: 1 3 1.00 U lastb b0, p7, z31.b
+# CHECK-NEXT: 1 3 1.00 lastb b0, p7, z31.b
# CHECK-NEXT: 1 3 1.00 lastb d0, p7, z31.d
# CHECK-NEXT: 1 3 1.00 lastb h0, p7, z31.h
# CHECK-NEXT: 1 3 1.00 lastb s0, p7, z31.s
@@ -4408,68 +4408,68 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 5 1.00 lastb x0, p7, z31.d
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [sp, x0]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z0.b }, p0/z, [x0]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1b { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.50 * U ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1b { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z21.b }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 0.50 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1b { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: 1 6 0.50 * ld1b { z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1b { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1b { z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld1b { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: 1 6 0.50 * ld1b { z5.h }, p3/z, [x17, x16]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-# CHECK-NEXT: 1 6 0.33 * U ld1d { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 6 0.33 * U ld1d { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
# CHECK-NEXT: 1 6 0.50 * ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
-# CHECK-NEXT: 1 6 0.33 * U ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1d { z31.d }, p7/z, [z31.d, #248]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.50 * U ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1h { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 0.50 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: 1 6 0.50 * ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1h { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 10 1.00 * U ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 4 10 1.00 * U ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: 2 9 0.50 * U ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 10 1.00 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 4 10 1.00 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 9 0.50 * ld1h { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: 1 6 0.50 * ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
# CHECK-NEXT: 1 6 0.50 * ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.b }, p0/z, [x0]
@@ -4524,146 +4524,146 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 6 0.33 * ld1rw { z0.s }, p0/z, [x0]
# CHECK-NEXT: 1 6 0.33 * ld1rw { z31.d }, p7/z, [sp, #252]
# CHECK-NEXT: 1 6 0.33 * ld1rw { z31.s }, p7/z, [sp, #252]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1sb { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.h }, p0/z, [sp, x0]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.h }, p0/z, [x0, x0]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1sb { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1sb { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1sb { z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld1sb { z31.s }, p7/z, [z31.s, #31]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 1 6 0.33 * U ld1sh { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 2 9 0.50 * U ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.50 * U ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1sh { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1sh { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 6 0.33 * U ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 0.50 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 0.50 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
-# CHECK-NEXT: 1 6 0.33 * U ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 1 6 0.33 * U ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1sh { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: 1 6 0.33 * U ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 10 1.00 * U ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 4 10 1.00 * U ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: 2 9 0.50 * U ld1sh { z31.s }, p7/z, [z31.s, #62]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 1 6 0.33 * U ld1sw { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 6 0.33 * U ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 10 1.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 4 10 1.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 9 0.50 * ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: 1 6 0.33 * U ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1sw { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 1 6 0.33 * U ld1w { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 2 9 0.50 * U ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.50 * U ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 6 0.33 * U ld1w { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld1w { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 6 0.33 * U ld1w { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 0.50 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 0.50 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
-# CHECK-NEXT: 1 6 0.33 * U ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: 1 6 0.33 * U ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 1.00 * U ld1w { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: 1 6 0.33 * U ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 10 1.00 * U ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT: 4 10 1.00 * U ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
-# CHECK-NEXT: 2 9 0.50 * U ld1w { z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT: 2 9 0.50 * U ld2b { z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 8 0.50 * U ld2b { z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT: 2 8 0.50 * U ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 0.50 * U ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld2b { z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 2 9 0.50 * U ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 8 0.50 * U ld2d { z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT: 2 8 0.50 * U ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 0.50 * U ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 2 9 0.50 * U ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 8 0.50 * U ld2h { z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT: 2 8 0.50 * U ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 0.50 * U ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 2 9 0.50 * U ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 8 0.50 * U ld2w { z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT: 2 8 0.50 * U ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 0.50 * U ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 3 10 0.67 * U ld3b { z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3b { z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3b { z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 3 10 0.67 * U ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 9 0.50 * U ld3d { z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 10 0.67 * U ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 9 0.50 * U ld3h { z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 10 0.67 * U ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 9 0.50 * U ld3w { z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 6 10 1.00 * U ld4b { z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4b { z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4b { z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 6 10 1.00 * U ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 9 1.00 * U ld4d { z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 6 10 1.00 * U ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 9 1.00 * U ld4h { z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 6 10 1.00 * U ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 9 1.00 * U ld4w { z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 4 9 1.00 * ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 10 1.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 4 10 1.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 2 9 0.50 * ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 2 9 0.50 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 8 0.50 * ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: 2 8 0.50 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 0.50 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 2 9 0.50 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 8 0.50 * ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: 2 8 0.50 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 0.50 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 2 9 0.50 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 8 0.50 * ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: 2 8 0.50 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 0.50 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 2 9 0.50 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 8 0.50 * ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: 2 8 0.50 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 0.50 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 10 0.67 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 9 0.50 * ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 3 10 0.67 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 9 0.50 * ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 10 0.67 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 9 0.50 * ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 10 0.67 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 9 0.50 * ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 6 10 1.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 4 9 1.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 6 10 1.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 9 1.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 6 10 1.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 9 1.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 6 10 1.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 9 1.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: 4 9 1.00 * U ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0]
@@ -4874,9 +4874,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 6 0.50 * ldr p0, [x0]
# CHECK-NEXT: 2 6 0.50 * ldr p5, [x10, #255, mul vl]
# CHECK-NEXT: 2 6 0.50 * ldr p7, [x13, #-256, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ldr z0, [x0]
-# CHECK-NEXT: 1 6 0.33 * U ldr z23, [x13, #255, mul vl]
-# CHECK-NEXT: 1 6 0.33 * U ldr z31, [sp, #-256, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldr z0, [x0]
+# CHECK-NEXT: 1 6 0.33 * ldr z23, [x13, #255, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldr z31, [sp, #-256, mul vl]
# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, #0
# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, z1.d
@@ -4903,10 +4903,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 lsl z31.h, z31.h, #15
# CHECK-NEXT: 1 2 1.00 lsl z31.s, p0/m, z31.s, #31
# CHECK-NEXT: 1 2 1.00 lsl z31.s, z31.s, #31
-# CHECK-NEXT: 1 2 1.00 U lslr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 U lslr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 U lslr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 U lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 lslr z0.s, p0/m, z0.s, z0.s
# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, #1
# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, z1.d
@@ -4933,10 +4933,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 lsr z31.h, z31.h, #16
# CHECK-NEXT: 1 2 1.00 lsr z31.s, p0/m, z31.s, #32
# CHECK-NEXT: 1 2 1.00 lsr z31.s, z31.s, #32
-# CHECK-NEXT: 1 2 1.00 U lsrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 U lsrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 U lsrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 U lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 lsrr z0.s, p0/m, z0.s, z0.s
# CHECK-NEXT: 1 4 1.00 mad z0.b, p7/m, z1.b, z31.b
# CHECK-NEXT: 2 5 2.00 mad z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 4 1.00 mad z0.h, p7/m, z1.h, z31.h
@@ -4967,7 +4967,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 1.00 mov p15.b, p15/z, p15.b
# CHECK-NEXT: 1 2 0.50 mov z0.b, #127
# CHECK-NEXT: 1 2 0.50 mov z0.b, b0
-# CHECK-NEXT: 1 2 0.50 U mov z0.b, p0/m, b0
+# CHECK-NEXT: 1 2 0.50 mov z0.b, p0/m, b0
# CHECK-NEXT: 2 5 1.00 mov z0.b, p0/m, w0
# CHECK-NEXT: 1 2 0.50 mov z0.b, p0/z, #127
# CHECK-NEXT: 1 3 3.00 mov z0.b, w0
@@ -5029,14 +5029,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-128
# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-32768
# CHECK-NEXT: 1 2 0.50 mov z31.b, p15/m, z31.b
-# CHECK-NEXT: 1 2 0.50 U mov z31.b, p7/m, b31
+# CHECK-NEXT: 1 2 0.50 mov z31.b, p7/m, b31
# CHECK-NEXT: 1 2 0.50 movprfx z31, z6
# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp
# CHECK-NEXT: 1 3 3.00 mov z31.b, wsp
# CHECK-NEXT: 1 2 0.50 mov z31.b, z31.b[63]
# CHECK-NEXT: 1 2 0.50 mov z31.d, p15/m, z31.d
# CHECK-NEXT: 1 2 0.50 mov z31.d, p7/m, d31
-# CHECK-NEXT: 1 2 0.50 U movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: 1 2 0.50 movprfx z31.d, p7/z, z6.d
# CHECK-NEXT: 2 5 1.00 mov z31.d, p7/m, sp
# CHECK-NEXT: 1 3 3.00 mov z31.d, sp
# CHECK-NEXT: 1 2 0.50 mov z31.d, z0.d
@@ -5062,10 +5062,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 mov z5.h, #-6
# CHECK-NEXT: 1 2 0.50 mov z5.q, z17.q[3]
# CHECK-NEXT: 1 2 0.50 mov z5.s, #-6
-# CHECK-NEXT: 2 2 1.00 U movs p0.b, p0.b
-# CHECK-NEXT: 2 2 1.00 U movs p0.b, p0/z, p0.b
-# CHECK-NEXT: 2 2 1.00 U movs p15.b, p15.b
-# CHECK-NEXT: 2 2 1.00 U movs p15.b, p15/z, p15.b
+# CHECK-NEXT: 2 2 1.00 movs p0.b, p0.b
+# CHECK-NEXT: 2 2 1.00 movs p0.b, p0/z, p0.b
+# CHECK-NEXT: 2 2 1.00 movs p15.b, p15.b
+# CHECK-NEXT: 2 2 1.00 movs p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 0.10 U mrs x3, ID_AA64ZFR0_EL1
# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL1
# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL12
@@ -5100,8 +5100,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #127
# CHECK-NEXT: 1 1 1.00 nand p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 nand p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 2 1.00 U nands p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 1.00 U nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 2 1.00 nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 2 2 1.00 nands p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 0.50 nbsl z0.d, z0.d, z1.d, z2.d
# CHECK-NEXT: 1 2 0.50 neg z0.b, p0/m, z0.b
# CHECK-NEXT: 1 2 0.50 neg z0.d, p0/m, z0.d
@@ -5117,20 +5117,20 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 2 1.00 nmatch p15.h, p7/z, z30.h, z31.h
# CHECK-NEXT: 1 1 1.00 nor p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 nor p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 2 1.00 U nors p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 1.00 U nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 2 1.00 nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 2 2 1.00 nors p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 not p0.b, p0/z, p0.b
# CHECK-NEXT: 1 1 1.00 not p15.b, p15/z, p15.b
# CHECK-NEXT: 1 2 0.50 not z31.b, p7/m, z31.b
# CHECK-NEXT: 1 2 0.50 not z31.d, p7/m, z31.d
# CHECK-NEXT: 1 2 0.50 not z31.h, p7/m, z31.h
# CHECK-NEXT: 1 2 0.50 not z31.s, p7/m, z31.s
-# CHECK-NEXT: 2 2 1.00 U nots p0.b, p0/z, p0.b
-# CHECK-NEXT: 2 2 1.00 U nots p15.b, p15/z, p15.b
+# CHECK-NEXT: 2 2 1.00 nots p0.b, p0/z, p0.b
+# CHECK-NEXT: 2 2 1.00 nots p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 1.00 orn p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 orn p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 2 1.00 U orns p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 1.00 U orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 2 2 1.00 orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 2 2 1.00 orns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 orr p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0x6
# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
@@ -5145,7 +5145,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 orr z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0x6
# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0xf9
-# CHECK-NEXT: 2 2 1.00 U orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 2 1.00 orrs p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 2 6 1.00 orv b0, p7, z31.b
# CHECK-NEXT: 2 6 1.00 orv d0, p7, z31.d
# CHECK-NEXT: 2 6 1.00 orv h0, p7, z31.h
@@ -5170,8 +5170,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.33 * * U prfb #15, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfb #6, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p0, [x0]
-# CHECK-NEXT: 1 4 0.33 * * prfb #7, p3, [z13.s, #31]
-# CHECK-NEXT: 1 4 0.33 * * prfb #7, p3, [z13.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p3, [z13.s, #31]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p3, [z13.s]
# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.d, uxtw]
# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.d]
# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.s, uxtw]
@@ -5197,8 +5197,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.d, #248]
# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.d]
-# CHECK-NEXT: 1 4 0.33 * * prfd #15, p7, [z31.s, #248]
-# CHECK-NEXT: 1 4 0.33 * * prfd #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.s, #248]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.s]
# CHECK-NEXT: 1 4 0.33 * * U prfd #6, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfd #7, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, lsl #3]
@@ -5224,8 +5224,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.d, #62]
# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.d]
-# CHECK-NEXT: 1 4 0.33 * * prfh #15, p7, [z31.s, #62]
-# CHECK-NEXT: 1 4 0.33 * * prfh #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.s, #62]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.s]
# CHECK-NEXT: 1 4 0.33 * * U prfh #6, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfh #7, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1keep, p0, [x0, z0.d, lsl #1]
@@ -5251,8 +5251,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.d, #124]
# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.d]
-# CHECK-NEXT: 1 4 0.33 * * prfw #15, p7, [z31.s, #124]
-# CHECK-NEXT: 1 4 0.33 * * prfw #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.s, #124]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.s]
# CHECK-NEXT: 1 4 0.33 * * U prfw #6, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfw #7, p0, [x0]
# CHECK-NEXT: 1 4 0.33 * * U prfw #7, p3, [x13, z8.d, uxtw #2]
@@ -5315,45 +5315,45 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl64
# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl7
# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl8
-# CHECK-NEXT: 1 3 0.50 U ptrues p0.b, pow2
-# CHECK-NEXT: 1 3 0.50 U ptrues p0.d, pow2
-# CHECK-NEXT: 1 3 0.50 U ptrues p0.h, pow2
-# CHECK-NEXT: 1 3 0.50 U ptrues p0.s, pow2
-# CHECK-NEXT: 1 3 0.50 U ptrues p15.b
-# CHECK-NEXT: 1 3 0.50 U ptrues p15.d
-# CHECK-NEXT: 1 3 0.50 U ptrues p15.h
-# CHECK-NEXT: 1 3 0.50 U ptrues p15.s
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #14
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #15
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #16
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #17
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #18
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #19
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #20
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #21
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #22
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #23
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #24
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #25
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #26
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #27
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, #28
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, mul3
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, mul4
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl1
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl128
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl16
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl2
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl256
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl3
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl32
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl4
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl5
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl6
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl64
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl7
-# CHECK-NEXT: 1 3 0.50 U ptrues p7.s, vl8
+# CHECK-NEXT: 1 3 0.50 ptrues p0.b, pow2
+# CHECK-NEXT: 1 3 0.50 ptrues p0.d, pow2
+# CHECK-NEXT: 1 3 0.50 ptrues p0.h, pow2
+# CHECK-NEXT: 1 3 0.50 ptrues p0.s, pow2
+# CHECK-NEXT: 1 3 0.50 ptrues p15.b
+# CHECK-NEXT: 1 3 0.50 ptrues p15.d
+# CHECK-NEXT: 1 3 0.50 ptrues p15.h
+# CHECK-NEXT: 1 3 0.50 ptrues p15.s
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #14
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #15
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #16
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #17
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #18
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #19
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #20
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #21
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #22
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #23
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #24
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #25
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #26
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #27
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, #28
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, mul3
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, mul4
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl1
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl128
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl16
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl2
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl256
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl3
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl32
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl4
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl5
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl6
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl64
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl7
+# CHECK-NEXT: 1 3 0.50 ptrues p7.s, vl8
# CHECK-NEXT: 1 2 0.50 punpkhi p0.h, p0.b
# CHECK-NEXT: 1 2 0.50 punpkhi p15.h, p15.b
# CHECK-NEXT: 1 2 0.50 punpklo p0.h, p0.b
@@ -5585,10 +5585,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 smullt z0.s, z1.h, z7.h[7]
# CHECK-NEXT: 1 4 1.00 smullt z29.s, z30.h, z31.h
# CHECK-NEXT: 1 4 1.00 smullt z31.d, z31.s, z31.s
-# CHECK-NEXT: 1 3 1.00 U splice z29.b, p7, { z30.b, z31.b }
-# CHECK-NEXT: 1 3 1.00 U splice z29.d, p7, { z30.d, z31.d }
-# CHECK-NEXT: 1 3 1.00 U splice z29.h, p7, { z30.h, z31.h }
-# CHECK-NEXT: 1 3 1.00 U splice z29.s, p7, { z30.s, z31.s }
+# CHECK-NEXT: 1 3 1.00 splice z29.b, p7, { z30.b, z31.b }
+# CHECK-NEXT: 1 3 1.00 splice z29.d, p7, { z30.d, z31.d }
+# CHECK-NEXT: 1 3 1.00 splice z29.h, p7, { z30.h, z31.h }
+# CHECK-NEXT: 1 3 1.00 splice z29.s, p7, { z30.s, z31.s }
# CHECK-NEXT: 1 3 1.00 splice z31.b, p7, z31.b, z31.b
# CHECK-NEXT: 1 3 1.00 splice z31.d, p7, z31.d, z31.d
# CHECK-NEXT: 1 3 1.00 splice z31.h, p7, z31.h, z31.h
@@ -5629,19 +5629,19 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqdecb x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqdecb x0, pow2
# CHECK-NEXT: 1 2 0.50 sqdecb x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqdecb x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqdecb x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqdecb x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqdecb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 0.50 sqdecd x0
# CHECK-NEXT: 1 2 0.50 sqdecd x0, #14
# CHECK-NEXT: 1 2 0.50 sqdecd x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqdecd x0, pow2
# CHECK-NEXT: 1 2 0.50 sqdecd x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqdecd x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqdecd x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqdecd x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqdecd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecd z0.d
# CHECK-NEXT: 1 2 1.00 sqdecd z0.d, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecd z0.d, pow2
@@ -5651,10 +5651,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqdech x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqdech x0, pow2
# CHECK-NEXT: 1 2 0.50 sqdech x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqdech x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqdech x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqdech x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqdech x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech z0.h
# CHECK-NEXT: 1 2 1.00 sqdech z0.h, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech z0.h, pow2
@@ -5663,10 +5663,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.d
# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.h
# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.s
-# CHECK-NEXT: 1 2 0.50 U sqdecp xzr, p15.b, wzr
-# CHECK-NEXT: 1 2 0.50 U sqdecp xzr, p15.d, wzr
-# CHECK-NEXT: 1 2 0.50 U sqdecp xzr, p15.h, wzr
-# CHECK-NEXT: 1 2 0.50 U sqdecp xzr, p15.s, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.b, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.d, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.h, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.s, wzr
# CHECK-NEXT: 3 7 1.00 sqdecp z0.d, p0.d
# CHECK-NEXT: 3 7 1.00 sqdecp z0.h, p0.h
# CHECK-NEXT: 3 7 1.00 sqdecp z0.s, p0.s
@@ -5675,10 +5675,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqdecw x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqdecw x0, pow2
# CHECK-NEXT: 1 2 0.50 sqdecw x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqdecw x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqdecw x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqdecw x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqdecw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecw z0.s
# CHECK-NEXT: 1 2 1.00 sqdecw z0.s, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecw z0.s, pow2
@@ -5731,19 +5731,19 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqincb x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqincb x0, pow2
# CHECK-NEXT: 1 2 0.50 sqincb x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqincb x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqincb x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqincb x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqincb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 0.50 sqincd x0
# CHECK-NEXT: 1 2 0.50 sqincd x0, #14
# CHECK-NEXT: 1 2 0.50 sqincd x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqincd x0, pow2
# CHECK-NEXT: 1 2 0.50 sqincd x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqincd x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqincd x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqincd x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqincd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincd z0.d
# CHECK-NEXT: 1 2 1.00 sqincd z0.d, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincd z0.d, pow2
@@ -5753,10 +5753,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqinch x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqinch x0, pow2
# CHECK-NEXT: 1 2 0.50 sqinch x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqinch x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqinch x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqinch x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqinch x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch z0.h
# CHECK-NEXT: 1 2 1.00 sqinch z0.h, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch z0.h, pow2
@@ -5765,10 +5765,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.d
# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.h
# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.s
-# CHECK-NEXT: 1 2 0.50 U sqincp xzr, p15.b, wzr
-# CHECK-NEXT: 1 2 0.50 U sqincp xzr, p15.d, wzr
-# CHECK-NEXT: 1 2 0.50 U sqincp xzr, p15.h, wzr
-# CHECK-NEXT: 1 2 0.50 U sqincp xzr, p15.s, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.b, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.d, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.h, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.s, wzr
# CHECK-NEXT: 3 7 1.00 sqincp z0.d, p0.d
# CHECK-NEXT: 3 7 1.00 sqincp z0.h, p0.h
# CHECK-NEXT: 3 7 1.00 sqincp z0.s, p0.s
@@ -5777,10 +5777,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sqincw x0, all, mul #16
# CHECK-NEXT: 1 2 0.50 sqincw x0, pow2
# CHECK-NEXT: 1 2 0.50 sqincw x0, vl1
-# CHECK-NEXT: 1 2 0.50 U sqincw x0, w0
-# CHECK-NEXT: 1 2 0.50 U sqincw x0, w0, all, mul #16
-# CHECK-NEXT: 1 2 0.50 U sqincw x0, w0, pow2
-# CHECK-NEXT: 1 2 0.50 U sqincw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincw z0.s
# CHECK-NEXT: 1 2 1.00 sqincw z0.s, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincw z0.s, pow2
@@ -5834,10 +5834,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 sqrshl z0.h, p0/m, z0.h, z1.h
# CHECK-NEXT: 1 4 1.00 sqrshl z29.s, p7/m, z29.s, z30.s
# CHECK-NEXT: 1 4 1.00 sqrshl z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT: 1 4 1.00 U sqrshlr z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT: 1 4 1.00 U sqrshlr z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT: 1 4 1.00 U sqrshlr z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT: 1 4 1.00 U sqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 sqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 1.00 sqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 1.00 sqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 1.00 sqrshlr z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 sqrshrnb z0.b, z0.h, #1
# CHECK-NEXT: 1 4 1.00 sqrshrnb z0.h, z0.s, #1
# CHECK-NEXT: 1 4 1.00 sqrshrnb z0.s, z0.d, #1
@@ -5874,10 +5874,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 sqshl z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 sqshl z31.h, p0/m, z31.h, #15
# CHECK-NEXT: 1 4 1.00 sqshl z31.s, p0/m, z31.s, #31
-# CHECK-NEXT: 1 4 1.00 U sqshlr z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT: 1 4 1.00 U sqshlr z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT: 1 4 1.00 U sqshlr z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT: 1 4 1.00 U sqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 sqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 1.00 sqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 1.00 sqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 1.00 sqshlr z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 sqshlu z0.b, p0/m, z0.b, #0
# CHECK-NEXT: 1 4 1.00 sqshlu z0.d, p0/m, z0.d, #0
# CHECK-NEXT: 1 4 1.00 sqshlu z0.h, p0/m, z0.h, #0
@@ -5961,10 +5961,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 srshl z0.h, p0/m, z0.h, z1.h
# CHECK-NEXT: 1 4 1.00 srshl z29.s, p7/m, z29.s, z30.s
# CHECK-NEXT: 1 4 1.00 srshl z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT: 1 4 1.00 U srshlr z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT: 1 4 1.00 U srshlr z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT: 1 4 1.00 U srshlr z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT: 1 4 1.00 U srshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 srshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 1.00 srshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 1.00 srshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 1.00 srshlr z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 srshr z0.b, p0/m, z0.b, #1
# CHECK-NEXT: 1 4 1.00 srshr z0.d, p0/m, z0.d, #1
# CHECK-NEXT: 1 4 1.00 srshr z0.h, p0/m, z0.h, #1
@@ -6104,66 +6104,66 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 2 0.50 * st1w { z31.d }, p7, [z31.d, #124]
# CHECK-NEXT: 2 2 0.50 * st1w { z31.s }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: 4 4 1.00 * st1w { z31.s }, p7, [z31.s, #124]
-# CHECK-NEXT: 2 4 0.50 * U st2b { z0.b, z1.b }, p0, [x0, x0]
-# CHECK-NEXT: 2 4 0.50 * U st2b { z0.b, z1.b }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * U st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2b { z5.b, z6.b }, p3, [x17, x16]
-# CHECK-NEXT: 2 4 0.50 * U st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 4 0.50 * U st2d { z0.d, z1.d }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * U st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 4 0.50 * U st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 4 0.50 * U st2h { z0.h, z1.h }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * U st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 4 0.50 * U st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 2 4 0.50 * U st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 4 0.50 * U st2w { z0.s, z1.s }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * U st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 4 0.50 * U st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 15 7 2.50 * U st3b { z0.b - z2.b }, p0, [x0, x0]
-# CHECK-NEXT: 10 7 2.50 * U st3b { z0.b - z2.b }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3b { z5.b - z7.b }, p3, [x17, x16]
-# CHECK-NEXT: 15 7 2.50 * U st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 10 7 2.50 * U st3d { z0.d - z2.d }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 15 7 2.50 * U st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 10 7 2.50 * U st3h { z0.h - z2.h }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 15 7 2.50 * U st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 10 7 2.50 * U st3w { z0.s - z2.s }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 27 11 4.50 * U st4b { z0.b - z3.b }, p0, [x0, x0]
-# CHECK-NEXT: 18 11 4.50 * U st4b { z0.b - z3.b }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4b { z5.b - z8.b }, p3, [x17, x16]
-# CHECK-NEXT: 27 11 4.50 * U st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 18 11 4.50 * U st4d { z0.d - z3.d }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 27 11 4.50 * U st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 18 11 4.50 * U st4h { z0.h - z3.h }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 27 11 4.50 * U st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 18 11 4.50 * U st4w { z0.s - z3.s }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 4 0.50 * st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 4 0.50 * st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: 2 4 0.50 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 4 0.50 * st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 4 0.50 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 4 0.50 * st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 4 0.50 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 2 4 0.50 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 4 0.50 * st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 15 7 2.50 * st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 10 7 2.50 * st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 15 7 2.50 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 10 7 2.50 * st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 15 7 2.50 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 10 7 2.50 * st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 15 7 2.50 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 10 7 2.50 * st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 27 11 4.50 * st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 18 11 4.50 * st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 27 11 4.50 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 18 11 4.50 * st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 27 11 4.50 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 18 11 4.50 * st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 27 11 4.50 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 18 11 4.50 * st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.d }, p0, [z1.d]
@@ -6204,9 +6204,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.50 * str p0, [x0]
# CHECK-NEXT: 1 1 0.50 * str p15, [sp, #-256, mul vl]
# CHECK-NEXT: 1 1 0.50 * str p5, [x10, #255, mul vl]
-# CHECK-NEXT: 2 2 0.50 * U str z0, [x0]
-# CHECK-NEXT: 2 2 0.50 * U str z21, [x10, #-256, mul vl]
-# CHECK-NEXT: 2 2 0.50 * U str z31, [sp, #255, mul vl]
+# CHECK-NEXT: 2 2 0.50 * str z0, [x0]
+# CHECK-NEXT: 2 2 0.50 * str z21, [x10, #-256, mul vl]
+# CHECK-NEXT: 2 2 0.50 * str z31, [sp, #255, mul vl]
# CHECK-NEXT: 1 2 0.50 sub z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 2 0.50 sub z0.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 sub z0.b, z0.b, z0.b
@@ -6293,10 +6293,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sxth z31.s, p7/m, z31.s
# CHECK-NEXT: 1 2 1.00 sxtw z0.d, p0/m, z0.d
# CHECK-NEXT: 1 2 1.00 sxtw z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 U tbl z28.b, { z29.b, z30.b }, z31.b
-# CHECK-NEXT: 1 2 0.50 U tbl z28.d, { z29.d, z30.d }, z31.d
-# CHECK-NEXT: 1 2 0.50 U tbl z28.h, { z29.h, z30.h }, z31.h
-# CHECK-NEXT: 1 2 0.50 U tbl z28.s, { z29.s, z30.s }, z31.s
+# CHECK-NEXT: 1 2 0.50 tbl z28.b, { z29.b, z30.b }, z31.b
+# CHECK-NEXT: 1 2 0.50 tbl z28.d, { z29.d, z30.d }, z31.d
+# CHECK-NEXT: 1 2 0.50 tbl z28.h, { z29.h, z30.h }, z31.h
+# CHECK-NEXT: 1 2 0.50 tbl z28.s, { z29.s, z30.s }, z31.s
# CHECK-NEXT: 1 2 0.50 tbl z31.b, { z31.b }, z31.b
# CHECK-NEXT: 1 2 0.50 tbl z31.d, { z31.d }, z31.d
# CHECK-NEXT: 1 2 0.50 tbl z31.h, { z31.h }, z31.h
@@ -6595,10 +6595,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 uqrshl z0.h, p0/m, z0.h, z1.h
# CHECK-NEXT: 1 4 1.00 uqrshl z29.s, p7/m, z29.s, z30.s
# CHECK-NEXT: 1 4 1.00 uqrshl z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT: 1 4 1.00 U uqrshlr z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT: 1 4 1.00 U uqrshlr z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT: 1 4 1.00 U uqrshlr z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT: 1 4 1.00 U uqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 uqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 1.00 uqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 1.00 uqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 1.00 uqrshlr z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 uqrshrnb z0.b, z0.h, #1
# CHECK-NEXT: 1 4 1.00 uqrshrnb z0.h, z0.s, #1
# CHECK-NEXT: 1 4 1.00 uqrshrnb z0.s, z0.d, #1
@@ -6623,10 +6623,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 uqshl z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 uqshl z31.h, p0/m, z31.h, #15
# CHECK-NEXT: 1 4 1.00 uqshl z31.s, p0/m, z31.s, #31
-# CHECK-NEXT: 1 4 1.00 U uqshlr z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT: 1 4 1.00 U uqshlr z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT: 1 4 1.00 U uqshlr z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT: 1 4 1.00 U uqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 uqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 1.00 uqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 1.00 uqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 1.00 uqshlr z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 uqshrnb z0.b, z0.h, #1
# CHECK-NEXT: 1 4 1.00 uqshrnb z0.h, z0.s, #1
# CHECK-NEXT: 1 4 1.00 uqshrnb z0.s, z0.d, #1
@@ -6677,10 +6677,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 urshl z0.h, p0/m, z0.h, z1.h
# CHECK-NEXT: 1 4 1.00 urshl z29.s, p7/m, z29.s, z30.s
# CHECK-NEXT: 1 4 1.00 urshl z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT: 1 4 1.00 U urshlr z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT: 1 4 1.00 U urshlr z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT: 1 4 1.00 U urshlr z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT: 1 4 1.00 U urshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 urshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 1.00 urshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 1.00 urshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 1.00 urshlr z31.d, p7/m, z31.d, z30.d
# CHECK-NEXT: 1 4 1.00 urshr z0.b, p0/m, z0.b, #1
# CHECK-NEXT: 1 4 1.00 urshr z0.d, p0/m, z0.d, #1
# CHECK-NEXT: 1 4 1.00 urshr z0.h, p0/m, z0.h, #1
@@ -6779,14 +6779,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 0.50 whilege p15.h, x0, xzr
# CHECK-NEXT: 1 3 0.50 whilege p15.s, w0, wzr
# CHECK-NEXT: 1 3 0.50 whilege p15.s, x0, xzr
-# CHECK-NEXT: 1 3 0.50 * * U whilerw p15.b, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilerw p15.d, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilerw p15.h, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilerw p15.s, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilewr p15.b, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilewr p15.d, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilewr p15.h, x30, x30
-# CHECK-NEXT: 1 3 0.50 * * U whilewr p15.s, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilerw p15.b, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilerw p15.d, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilerw p15.h, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilerw p15.s, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilewr p15.b, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilewr p15.d, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilewr p15.h, x30, x30
+# CHECK-NEXT: 1 3 0.50 whilewr p15.s, x30, x30
# CHECK-NEXT: 1 2 2.00 * U wrffr p0.b
# CHECK-NEXT: 1 2 2.00 * U wrffr p15.b
# CHECK-NEXT: 1 2 1.00 xar z0.b, z0.b, z1.b, #1
More information about the llvm-commits
mailing list