[llvm] 9394088 - [SVE][InstrFormats] Explcitly set hasSideEffects for all SVE instructions.

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 25 04:33:00 PST 2023


Author: Paul Walker
Date: 2023-01-25T12:30:46Z
New Revision: 9394088ca02693b3b7d98c2996f42ce653255e7d

URL: https://github.com/llvm/llvm-project/commit/9394088ca02693b3b7d98c2996f42ce653255e7d
DIFF: https://github.com/llvm/llvm-project/commit/9394088ca02693b3b7d98c2996f42ce653255e7d.diff

LOG: [SVE][InstrFormats] Explcitly set hasSideEffects for all SVE instructions.

The instruction property hasSideEffects relies on the presence of
tablegen isel patterns when constructing its value, unless
specifically overriden. Since adding SVE scheduling information
we've noticed this property flip-flop as isel patterns have been
updated. To make things consistent (and correct) this patch
explicitly sets the property for all SVE instructions.

This has resulted in the following notable changes:
* Normal load and store instructions no longer report having side
  effects.
* All prefetch instructions correctly report having side effects.
* FFR related instructions continue to report having side effects.
  This is likely overkill but I've chosen to remain cautious here.
* Most all integer instructions no longer report having side effects.
* Most all floating point instructions no longer report having side
  effects, but do now report their potential for raising FP
  exceptions. I do not know how to test the latter so I've again
  took a caution route of taging all floating point instructions
  except for DUPs.
* The conflict detection intrinsics now report they don't touch
  memory.

NOTE: SVE isel makes significant use of psuedo instructions but
this patch makes no effort to update them.

NOTE: We'll need a similar patch for SME but without a scheduling
model it'll be harder to verify the results.

Differential Revision: https://reviews.llvm.org/D142122

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll
    llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
    llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index daafd1f7c3b77..a88d96f7f6a27 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1355,7 +1355,8 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
   class SVE2_CONFLICT_DETECT_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                 [LLVMAnyPointerType<llvm_any_ty>,
-                 LLVMMatchType<1>]>;
+                 LLVMMatchType<1>],
+                [IntrNoMem]>;
 
   class SVE2_3VectorArg_Indexed_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index cef8d41218e80..1eaf799453eda 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -328,6 +328,7 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
 
   let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -657,6 +658,7 @@ class sve_int_pfalse<bits<6> opc, string asm>
   let Inst{8-4}   = 0b00000;
   let Inst{3-0}   = Pd;
 
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -690,6 +692,7 @@ class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
   let Inst{4-0}   = 0b00000;
 
   let Defs = [NZCV];
+  let hasSideEffects = 0;
   let isCompare = 1;
 }
 
@@ -724,8 +727,9 @@ class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
 
   let Constraints = "$Pdn = $_Pdn";
   let Defs = [NZCV];
-  let isPTestLike = 1;
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
+  let isPTestLike = 1;
 }
 
 multiclass sve_int_pfirst<bits<5> opc, string asm, SDPatternOperator op> {
@@ -772,6 +776,7 @@ class sve_int_count_r<bits<2> sz8_64, bits<5> opc, string asm,
                       !strconcat(asm, "\t$Rdn, $Pg, $_Rdn"),
                       !strconcat(asm, "\t$Rdn, $Pg"));
   let Constraints = "$Rdn = $_Rdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_count_r_s32<bits<5> opc, string asm,
@@ -877,6 +882,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_count_v<bits<5> opc, string asm,
@@ -915,6 +921,8 @@ class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
   let Inst{9}     = opc{0};
   let Inst{8-5}   = Pn;
   let Inst{4-0}   = Rd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_pcount_pred<bits<4> opc, string asm,
@@ -951,6 +959,7 @@ class sve_int_count<bits<3> opc, string asm>
   let Inst{9-5}   = pattern;
   let Inst{4-0}   = Rd;
 
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -993,6 +1002,7 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty,
@@ -1027,6 +1037,7 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
   let Inst{4-0}   = Rdn;
 
   let Constraints = "$Rdn = $_Rdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
@@ -1093,6 +1104,7 @@ class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
                       !strconcat(asm, "\t$Rdn, $pattern, mul $imm4"));
 
   let Constraints = "$Rdn = $_Rdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm,
@@ -1158,6 +1170,8 @@ class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{21-10} = 0b100000001110;
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_dup_r<string asm, SDPatternOperator op> {
@@ -1192,6 +1206,8 @@ class sve_int_perm_dup_i<bits<5> tsz, Operand immtype, string asm,
   let Inst{15-10} = 0b001000;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_dup_i<string asm> {
@@ -1296,6 +1312,8 @@ class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
   let Inst{10}    = 0b0;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
@@ -1391,6 +1409,7 @@ class sve2_int_perm_tbx<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_perm_tbx<string asm, bits<2> opc, SDPatternOperator op> {
@@ -1423,6 +1442,8 @@ class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
   let Inst{21-10} = 0b111000001110;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
@@ -1461,6 +1482,8 @@ class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
   let Inst{8-5}   = Pn;
   let Inst{4}     = 0b0;
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator op> {
@@ -1489,6 +1512,8 @@ class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
   let Inst{15-10} = 0b001110;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_unpk<bits<2> opc, string asm, SDPatternOperator op> {
@@ -1517,6 +1542,7 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
 
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
@@ -1547,6 +1573,7 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
 
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
@@ -1598,6 +1625,7 @@ class sve_int_perm_extract_i<string asm>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_extract_i<string asm, SDPatternOperator op> {
@@ -1620,6 +1648,8 @@ class sve2_int_perm_extract_i_cons<string asm>
   let Inst{12-10} = imm8{2-0};
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1643,6 +1673,8 @@ class sve_int_sel_vvv<bits<2> sz8_64, string asm, ZPRRegOp zprty>
   let Inst{13-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
@@ -1706,7 +1738,7 @@ class sve_int_pred_log<bits<4> opc, string asm>
                       !strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm"));
 
   let Defs = !if(!eq (opc{2}, 1), [NZCV], []);
-
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
@@ -1769,6 +1801,7 @@ class sve_int_log_imm<bits<2> opc, string asm>
   let DecoderMethod = "DecodeSVELogicalImmInstruction";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_log_imm<bits<2> opc, string asm, string alias, SDPatternOperator op> {
@@ -1814,8 +1847,9 @@ class sve_int_dup_mask_imm<string asm>
   let Inst{17-5} = imms;
   let Inst{4-0} = Zd;
 
-  let isReMaterializable = 1;
   let DecoderMethod = "DecodeSVELogicalImmInstruction";
+  let hasSideEffects = 0;
+  let isReMaterializable = 1;
 }
 
 multiclass sve_int_dup_mask_imm<string asm> {
@@ -1865,6 +1899,8 @@ class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
   let Inst{12-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
@@ -1906,6 +1942,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
@@ -1944,6 +1982,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
@@ -2003,6 +2043,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
@@ -2072,6 +2114,9 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
   let Inst{12-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
@@ -2125,6 +2170,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
   let Constraints = "$Zda = $_Zda";
   let ElementSize = zprty.ElementSize;
   let DestructiveInstType = DestructiveTernaryCommWithRev;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, string Ps,
@@ -2165,6 +2212,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op,
@@ -2209,6 +2258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bits<2> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2p1_fp_bfma_by_indexed_elem<string asm, bits<2> opc> {
@@ -2271,6 +2322,9 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, bit o2, string asm, ZPRRegOp zprty
   let Inst{10}    = 0b0;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2p1_fp_bfmul_by_indexed_elem<string asm> {
@@ -2339,6 +2393,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_fcmla<string asm, SDPatternOperator op> {
@@ -2379,6 +2435,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -2427,6 +2485,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
@@ -2465,6 +2525,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2_fp_convert_down_narrow<string asm, string op> {
@@ -2514,6 +2576,8 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
@@ -2557,6 +2621,8 @@ class sve2_fp_mla_long_by_indexed_elem<bits<3> opc, string asm>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2_fp_mla_long_by_indexed_elem<bits<3> opc, string asm,
@@ -2592,6 +2658,8 @@ class sve2_fp_mla_long<bits<3> opc, string asm>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2_fp_mla_long<bits<3> opc, string asm, ValueType OutVT,
@@ -2620,6 +2688,8 @@ class sve_int_arith_vl<bit opc, string asm, bit streaming_sve = 0b0>
   let Inst{11}    = streaming_sve;
   let Inst{10-5}  = imm6;
   let Inst{4-0}   = Rd;
+
+  let hasSideEffects = 0;
 }
 
 class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b0>
@@ -2638,6 +2708,7 @@ class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b
   let Inst{10-5}  = imm6;
   let Inst{4-0}   = Rd;
 
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -2662,6 +2733,8 @@ class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm,
   let Inst{12-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
@@ -2711,6 +2784,8 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveUnaryPassthru;
   let ElementSize = Sz;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
@@ -2825,6 +2900,9 @@ class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm,
   let Inst{15-10} = 0b001100;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_2op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -2861,6 +2939,7 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_pred_log<bits<3> opc, string asm, string Ps,
@@ -2987,6 +3066,7 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
@@ -3024,6 +3104,7 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
@@ -3061,6 +3142,7 @@ class sve2_int_mla<bits<2> sz, bits<5> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_mla<bit S, string asm, SDPatternOperator op> {
@@ -3106,6 +3188,7 @@ class sve2_int_mla_by_indexed_elem<bits<2> sz, bits<6> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
@@ -3184,6 +3267,7 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
@@ -3216,6 +3300,7 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
@@ -3262,6 +3347,7 @@ class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_cintx_dot<string asm, SDPatternOperator op> {
@@ -3316,6 +3402,7 @@ class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_cintx_dot_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -3386,6 +3473,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
   let Inst{12-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
@@ -3430,6 +3519,8 @@ class sve2_int_mul_by_indexed_elem<bits<2> sz, bits<4> opc, string asm,
   let Inst{13-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm,
@@ -3506,6 +3597,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op,
@@ -3548,6 +3640,7 @@ class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty1.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperator op> {
@@ -3583,6 +3676,7 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveUnaryPassthru;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
@@ -3642,6 +3736,8 @@ class sve2_wide_int_arith<bits<2> sz, bits<5> opc, string asm,
   let Inst{14-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_wide_int_arith_long<bits<5> opc, string asm,
@@ -3706,6 +3802,8 @@ class sve2_misc<bits<2> sz, bits<4> opc, string asm,
   let Inst{13-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_misc_bitwise<bits<4> opc, string asm, SDPatternOperator op> {
@@ -3750,6 +3848,7 @@ class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_bitwise_xor_interleaved<bit opc, string asm,
@@ -3783,6 +3882,8 @@ class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
   let Inst{11-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
@@ -3825,6 +3926,7 @@ class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
@@ -3888,6 +3990,7 @@ class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
@@ -3934,6 +4037,7 @@ class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> {
@@ -3967,6 +4071,7 @@ class sve2_int_abs
diff _accum<bits<2> sz, bits<4> opc, string asm,
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_abs
diff _accum<bit opc, string asm, SDPatternOperator op> {
@@ -4026,6 +4131,8 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
   let Inst{10}    = 0b0;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
@@ -4066,6 +4173,7 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
@@ -4101,6 +4209,8 @@ class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
   let Inst{10}    = 0b0; // Top
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm,
@@ -4132,6 +4242,7 @@ class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm,
@@ -4160,6 +4271,8 @@ class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string as
   let Inst{10}    = 0b0;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm,
@@ -4190,6 +4303,7 @@ class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm,
@@ -4229,6 +4343,7 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveUnaryPassthru;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
@@ -4381,6 +4496,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
   let Inst{12-5}  = imm{7-0}; // imm8
   let Inst{4-0}   = Zd;
 
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -4422,6 +4538,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
   let Inst{12-5}  = imm8;
   let Inst{4-0}   = Zd;
 
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -4458,6 +4575,7 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4490,6 +4608,7 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4547,6 +4666,8 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
   let Inst{15-10} = 0b001100;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_cons_log<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4585,6 +4706,7 @@ class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op,
@@ -4631,6 +4753,7 @@ class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
@@ -4676,6 +4799,7 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_dup_fpimm_pred<string asm> {
@@ -4711,6 +4835,7 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
 
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_dup_imm_pred_merge_inst<
@@ -4816,6 +4941,7 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isPTestLike = 1;
 }
 
@@ -4905,6 +5031,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isPTestLike = 1;
 }
 
@@ -4978,6 +5105,7 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isPTestLike = 1;
 }
 
@@ -5020,6 +5148,7 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
   let Inst{3-0}   = 0b0000;
 
   let Defs = [NZCV];
+  let hasSideEffects = 0;
 }
 
 class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
@@ -5042,6 +5171,7 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isWhile = 1;
 }
 
@@ -5088,6 +5218,7 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isWhile = 1;
 }
 
@@ -5124,6 +5255,9 @@ class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Vd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5163,6 +5297,8 @@ class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
   let Inst{4-0}   = Vdn;
 
   let Constraints = "$Vdn = $_Vdn";
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5203,6 +5339,9 @@ class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
   let Inst{9-5}   = Zn;
   let Inst{4}     = opc{0};
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5259,6 +5398,9 @@ class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
   let Inst{9-5}   = Zn;
   let Inst{4}     = opc{0};
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_2op_p_pd<bits<3> opc, string asm,
@@ -5312,6 +5454,7 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{9-5}   = imm5;
   let Inst{4-0}   = Zd;
 
+  let hasSideEffects = 0;
   let isReMaterializable = 1;
 }
 
@@ -5356,6 +5499,8 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{15-10} = 0b010010;
   let Inst{9-5}   = imm5;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
@@ -5423,6 +5568,8 @@ class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{15-10} = 0b010001;
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_index_ri<string asm> {
@@ -5457,6 +5604,8 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{15-10} = 0b010011;
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
@@ -5514,6 +5663,7 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveBinaryImm;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
@@ -5630,6 +5780,7 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
@@ -5694,6 +5845,8 @@ class sve_int_bin_cons_shift_wide<bits<2> sz8_64, bits<2> opc, string asm,
   let Inst{11-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm, SDPatternOperator op> {
@@ -5724,6 +5877,8 @@ class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
   let Inst{11-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
@@ -5790,6 +5945,7 @@ class sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5826,6 +5982,7 @@ class sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5857,6 +6014,7 @@ class sve_mem_128b_est_si<bits<2> nregs, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5888,6 +6046,7 @@ class sve_mem_est_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5911,6 +6070,7 @@ class sve_mem_128b_est_ss<bits<2> nregs, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5933,6 +6093,7 @@ class sve_mem_cst_ss_base<bits<4> dtype, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5963,6 +6124,7 @@ class sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand VecList>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -5997,6 +6159,7 @@ class sve_mem_cstnt_ss_base<bits<2> msz, string asm, RegisterOperand listty,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6027,6 +6190,7 @@ class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6083,6 +6247,7 @@ class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6186,6 +6351,7 @@ class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6234,6 +6400,7 @@ class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6286,6 +6453,7 @@ class sve_mem_z_spill<string asm>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6312,6 +6480,7 @@ class sve_mem_p_spill<string asm>
   let Inst{4}     = 0b0;
   let Inst{3-0}   = Pt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -6344,6 +6513,8 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
   let Inst{8-5}   = Pn;
   let Inst{4}     = 0b0;
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm,
@@ -6372,6 +6543,8 @@ class sve_int_perm_punpk<bit opc, string asm>
   let Inst{8-5}   = Pn;
   let Inst{4}     = 0b0;
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
@@ -6398,6 +6571,7 @@ class sve_int_rdffr_pred<bit s, string asm>
 
   let Defs = !if(s, [NZCV], []);
   let Uses = [FFR];
+  let hasSideEffects = 1;
 }
 
 multiclass sve_int_rdffr_pred<bit s, string asm, SDPatternOperator op> {
@@ -6421,6 +6595,7 @@ class sve_int_rdffr_unpred<string asm> : I<
   let Inst{3-0}   = Pd;
 
   let Uses = [FFR];
+  let hasSideEffects = 1;
 }
 
 multiclass sve_int_rdffr_unpred<string asm, SDPatternOperator op> {
@@ -6444,8 +6619,8 @@ class sve_int_wrffr<string asm, SDPatternOperator op>
   let Inst{8-5}  = Pn;
   let Inst{4-0}  = 0b00000;
 
-  let hasSideEffects = 1;
   let Defs = [FFR];
+  let hasSideEffects = 1;
 }
 
 class sve_int_setffr<string asm, SDPatternOperator op>
@@ -6455,8 +6630,8 @@ class sve_int_setffr<string asm, SDPatternOperator op>
   [(op)]>, Sched<[]> {
   let Inst{31-0} = 0b00100101001011001001000000000000;
 
-  let hasSideEffects = 1;
   let Defs = [FFR];
+  let hasSideEffects = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -6482,6 +6657,7 @@ class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm,
   let Inst{4-0}   = Rdn;
 
   let Constraints = "$Rdn = $_Rdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_clast_rz<bit ab, string asm, SDPatternOperator op> {
@@ -6515,6 +6691,7 @@ class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm,
   let Inst{4-0}   = Vdn;
 
   let Constraints = "$Vdn = $_Vdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
@@ -6551,6 +6728,7 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
@@ -6588,6 +6766,8 @@ class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Rd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_last_r<bit ab, string asm, SDPatternOperator op> {
@@ -6619,6 +6799,8 @@ class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Vd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
@@ -6653,6 +6835,7 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeNone;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
@@ -6688,6 +6871,8 @@ class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_int_perm_splice_cons<string asm> {
@@ -6718,6 +6903,7 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
@@ -6775,6 +6961,7 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_cpy_r<string asm, SDPatternOperator op> {
@@ -6821,6 +7008,7 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
@@ -6865,6 +7053,8 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
@@ -6900,9 +7090,10 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
-  let mayLoad = 1;
-  let Uses = !if(nf, [FFR], []);
   let Defs = !if(nf, [FFR], []);
+  let Uses = !if(nf, [FFR], []);
+  let hasSideEffects = nf;
+  let mayLoad = 1;
 }
 
 multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
@@ -6946,6 +7137,7 @@ class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -6980,6 +7172,7 @@ class sve_mem_cldnt_ss_base<bits<2> msz, string asm, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7007,6 +7200,7 @@ class sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand VecList>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7038,6 +7232,7 @@ class sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7069,6 +7264,7 @@ class sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7103,9 +7299,10 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
-  let mayLoad = 1;
-  let Uses = !if(ff, [FFR], []);
   let Defs = !if(ff, [FFR], []);
+  let Uses = !if(ff, [FFR], []);
+  let hasSideEffects = ff;
+  let mayLoad = 1;
 }
 
 multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
@@ -7163,6 +7360,7 @@ class sve_mem_eld_si<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7196,6 +7394,7 @@ class sve_mem_eld_ss<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7226,9 +7425,11 @@ class sve_mem_32b_gld_sv<bits<4> opc, bit xs, bit scaled, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
-  let mayLoad = 1;
+
   let Defs = !if(!eq(opc{0}, 1), [FFR], []);
   let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+  let hasSideEffects = opc{0};
+  let mayLoad = 1;
 }
 
 multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
@@ -7309,9 +7510,11 @@ class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
-  let mayLoad = 1;
+
   let Defs = !if(!eq(opc{0}, 1), [FFR], []);
   let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+  let hasSideEffects = opc{0};
+  let mayLoad = 1;
 }
 
 multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
@@ -7445,6 +7648,8 @@ class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
   let Inst{9-5}   = Zn;
   let Inst{4}     = 0b0;
   let Inst{3-0}   = prfop;
+
+  let hasSideEffects = 1;
 }
 
 multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty, SDPatternOperator op> {
@@ -7472,6 +7677,7 @@ class sve_mem_z_fill<string asm>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7498,6 +7704,7 @@ class sve_mem_p_fill<string asm>
   let Inst{4}     = 0b0;
   let Inst{3-0}   = Pt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7530,6 +7737,7 @@ class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -7595,9 +7803,11 @@ class sve_mem_64b_gld_sv<bits<4> opc, bit xs, bit scaled, bit lsl, string asm,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
-  let mayLoad = 1;
+
   let Defs = !if(!eq(opc{0}, 1), [FFR], []);
   let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+  let hasSideEffects = opc{0};
+  let mayLoad = 1;
 }
 
 multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
@@ -7714,9 +7924,10 @@ class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
-  let mayLoad = 1;
   let Defs = !if(!eq(opc{0}, 1), [FFR], []);
   let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+  let hasSideEffects = opc{0};
+  let mayLoad = 1;
 }
 
 multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
@@ -7844,6 +8055,8 @@ class sve_int_bin_cons_misc_0_a<bits<2> opc, bits<2> msz, string asm,
   let Inst{11-10} = msz;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_cons_misc_0_a_uxtw<bits<2> opc, string asm> {
@@ -7893,6 +8106,8 @@ class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
   let Inst{15-10} = 0b101100;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> {
@@ -7956,6 +8171,8 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Vd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
@@ -8027,6 +8244,7 @@ class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
   let Inst{4-0}   = Zd;
 
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
@@ -8079,6 +8297,7 @@ class sve_int_brkp<bits<2> opc, string asm>
   let Inst{3-0}   = Pd;
 
   let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_brkp<bits<2> opc, string asm, SDPatternOperator op> {
@@ -8112,6 +8331,7 @@ class sve_int_brkn<bit S, string asm>
   let Constraints = "$Pdm = $_Pdm";
   let Defs = !if(S, [NZCV], []);
   let ElementSize = ElementSizeB;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
@@ -8139,7 +8359,7 @@ class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
 
   let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
   let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
-
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_break_m<bits<3> opc, string asm, SDPatternOperator op> {
@@ -8180,6 +8400,7 @@ class sve2_char_match<bit sz, bit opc, string asm,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
+  let hasSideEffects = 0;
   let isPTestLike = 1;
 }
 
@@ -8208,6 +8429,8 @@ class sve2_hist_gen_segment<string asm, SDPatternOperator op>
   let Inst{15-10} = 0b101000;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8231,6 +8454,8 @@ class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_hist_gen_vector<string asm, SDPatternOperator op> {
@@ -8259,6 +8484,8 @@ class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
   let Inst{10}    = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty,
@@ -8282,6 +8509,7 @@ class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zdn;
 
   let Constraints = "$Zdn = $_Zdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty,
@@ -8302,6 +8530,7 @@ class sve2_crypto_unary_op<bit opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zdn;
 
   let Constraints = "$Zdn = $_Zdn";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
@@ -8329,6 +8558,8 @@ class sve_float_dot<bit bf, string asm>
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
@@ -8354,6 +8585,8 @@ class sve_float_dot_indexed<bit bf, string asm>
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
@@ -8376,6 +8609,8 @@ class sve_bfloat_matmul<string asm>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ElementSizeH;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
@@ -8398,8 +8633,9 @@ class sve_bfloat_convert<bit N, string asm>
 
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
-  let hasSideEffects = 1;
   let ElementSize = ElementSizeS;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op> {
@@ -8428,6 +8664,7 @@ class sve_int_matmul<bits<2> uns, string asm>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ZPR32.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_matmul<bits<2> uns, string asm, SDPatternOperator op> {
@@ -8455,6 +8692,7 @@ class sve_int_dot_mixed<string asm>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ZPR32.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_dot_mixed<string asm, SDPatternOperator op> {
@@ -8485,6 +8723,7 @@ class sve_int_dot_mixed_indexed<bit U, string asm>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ZPR32.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
@@ -8514,6 +8753,8 @@ class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zprty.ElementSize;
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
@@ -8542,6 +8783,7 @@ class sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand VecList>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -8582,6 +8824,7 @@ class sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand VecList,
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -8616,6 +8859,8 @@ class sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm>
   let Inst{10}    = P;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatternOperator op> {
@@ -8727,6 +8972,7 @@ class sve2p1_fclamp<string asm, bits<2> sz, ZPRRegOp zpr_ty>
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = zpr_ty.ElementSize;
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_fclamp<string asm, SDPatternOperator op> {
@@ -8756,6 +9002,7 @@ class sve2p1_two_way_dot_vv<string mnemonic, bit u>
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
 }
 
 
@@ -8778,6 +9025,7 @@ class sve2p1_two_way_dot_vvi<string mnemonic, bit u>
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
+  let hasSideEffects = 0;
 }
 
 
@@ -8789,6 +9037,8 @@ class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty>
   let Inst{23-22} = sz;
   let Inst{21-3}  = 0b1000000111100000010;
   let Inst{2-0}   = PNd;
+
+  let hasSideEffects = 0;
 }
 
 
@@ -8816,6 +9066,8 @@ class sve2p1_pred_as_ctr_to_mask_base<string mnemonic, bits<2> sz, bits<3> opc,
   let Inst{7-5}   = PNn;
   let Inst{4}     = 0b1;
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
 }
 
 class sve2p1_pred_as_ctr_to_mask<string mnemonic, bits<2> sz, PPRRegOp pprty>
@@ -8863,6 +9115,8 @@ class sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, bits<3> tsz>
   let Inst{9-6}   = Zn;
   let Inst{5}     = 0b0;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatternOperator intrinsic> {
@@ -8889,6 +9143,8 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
   let Inst{9-6}   = Zn;
   let Inst{5}     = 0b0;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc> {
@@ -8916,6 +9172,7 @@ class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
   let Inst{4-1} = Zt;
   let Inst{0}   = n;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -8939,6 +9196,7 @@ class sve2p1_mem_cld_si_2z<string mnemonic, bits<2> msz, bit n,
   let Inst{4-1}   = Zt;
   let Inst{0}     = n;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -8971,6 +9229,7 @@ class sve2p1_mem_cld_ss_4z<string mnemonic, bits<2> msz, bit n,
   let Inst{1}   = 0b0;
   let Inst{0}   = n;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -8995,6 +9254,7 @@ class sve2p1_mem_cld_si_4z<string mnemonic, bits<2> msz, bit n,
   let Inst{1}     = 0b0;
   let Inst{0}     = n;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -9027,6 +9287,7 @@ class sve2p1_mem_cst_ss_2z<string mnemonic, bits<2> msz, bit n,
   let Inst{4-1} = Zt;
   let Inst{0}   = n;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -9051,6 +9312,7 @@ class sve2p1_mem_cst_si_2z<string mnemonic, bits<2> msz, bit n,
   let Inst{4-1}   = Zt;
   let Inst{0}     = n;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -9110,6 +9372,7 @@ class sve2p1_mem_cst_si_4z<string mnemonic, bits<2> msz, bit n,
   let Inst{1}     = 0b0;
   let Inst{0}     = n;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -9140,6 +9403,8 @@ class sve2p1_pcount_pn<string mnemonic, bits<3> opc, bits<2> sz, PNRRegOp pnrty>
   let Inst{9}     = 0b1;
   let Inst{8-5}   = PNn;
   let Inst{4-0}   = Rd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_pcount_pn<string mnemonic, bits<3> opc> {
@@ -9174,6 +9439,7 @@ class sve2p1_int_while_rr_pn<string mnemonic, bits<2> sz, bits<3> opc,
   let Inst{2-0}   = PNd;
 
   let Defs = [NZCV];
+  let hasSideEffects = 0;
 }
 
 
@@ -9206,6 +9472,7 @@ class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
   let Inst{0}     = opc{0};
 
   let Defs = [NZCV];
+  let hasSideEffects = 0;
 }
 
 
@@ -9232,6 +9499,7 @@ class sve_mem_128b_gld_64_unscaled<string mnemonic>
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -9258,6 +9526,7 @@ class sve_mem_sst_128b_64_unscaled<string mnemonic>
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayStore = 1;
 }
 
@@ -9288,6 +9557,7 @@ class sve_mem_128b_cld_si<bits<2> dtype, string mnemonic>
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -9321,6 +9591,7 @@ class sve_mem_128b_cld_ss<bits<2> dtype, string mnemonic, RegisterOperand gprsh_
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Zt;
 
+  let hasSideEffects = 0;
   let mayLoad = 1;
 }
 
@@ -9349,6 +9620,9 @@ class sve2p1_fp_reduction_q<bits<2> sz, bits<3> opc, string mnemonic,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Vd;
+
+  let hasSideEffects = 0;
+  let mayRaiseFPException = 1;
 }
 
 multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic> {
@@ -9370,6 +9644,8 @@ class sve2p1_dupq<bits<5> ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp
   let Inst{15-10} = 0b001001;
   let Inst{9-5} = Zn;
   let Inst{4-0} = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_dupq<string mnemonic> {
@@ -9409,6 +9685,7 @@ class sve2p1_extq<string mnemonic>
   let Constraints = "$Zdn = $_Zdn";
   let DestructiveInstType = DestructiveOther;
   let ElementSize = ZPR8.ElementSize;
+  let hasSideEffects = 0;
 }
 
 
@@ -9428,6 +9705,8 @@ class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
   let Inst{9-5}   = Zn;
   let Inst{4}     = 0b0;
   let Inst{3-0}   = Pd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_vector_to_pred<string mnemonic> {
@@ -9468,6 +9747,7 @@ class sve2p1_pred_to_vector<bits<4> opc, string mnemonic,
   let Inst{4-0}   = Zd;
 
   let Constraints = "$Zd = $_Zd";
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_pred_to_vector<string mnemonic> {
@@ -9510,6 +9790,8 @@ class sve2p1_int_reduce_q<bits<2> sz, bits<4> opc, string mnemonic,
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Vd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic> {
@@ -9537,6 +9819,8 @@ class sve2p1_permute_vec_elems_q<bits<2> sz, bits<3> opc, string mnemonic,
   let Inst{12-10} = opc;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
 }
 
 multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic> {

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 8e7c6c6703a62..31ff9287046cd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -21,8 +21,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    add x8, sp, #48
-; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov d1, v1.d[1]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    bl __extenddftf2
@@ -32,8 +32,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    add x8, sp, #48
-; CHECK-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov d1, v1.d[1]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    bl __extenddftf2

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index bf38dd8c087ad..27da8659f4fb2 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -530,8 +530,8 @@ define <vscale x 4 x bfloat> @insert_nxv4bf16_v4bf16(<vscale x 4 x bfloat> %sv0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    st1h { z0.s }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    addpl x8, sp, #4
+; CHECK-NEXT:    st1h { z0.s }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    str d1, [x8]
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [sp, #1, mul vl]
 ; CHECK-NEXT:    addvl sp, sp, #1

diff  --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
index 36d477738ff86..f915e1eaf07f0 100644
--- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
@@ -2558,10 +2558,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        add	z31.s, p7/m, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        add	z31.s, z31.s, #65280
 # CHECK-NEXT:  1      4     0.50                        add	z31.s, z31.s, z31.s
-# CHECK-NEXT:  1      1     0.50                  U     addpl	sp, sp, #31
-# CHECK-NEXT:  1      1     0.50                  U     addpl	x0, x0, #-32
-# CHECK-NEXT:  1      1     0.50                  U     addpl	x21, x21, #0
-# CHECK-NEXT:  1      1     0.50                  U     addpl	x23, x8, #-1
+# CHECK-NEXT:  1      1     0.50                        addpl	sp, sp, #31
+# CHECK-NEXT:  1      1     0.50                        addpl	x0, x0, #-32
+# CHECK-NEXT:  1      1     0.50                        addpl	x21, x21, #0
+# CHECK-NEXT:  1      1     0.50                        addpl	x23, x8, #-1
 # CHECK-NEXT:  1      1     0.50                        addvl	sp, sp, #31
 # CHECK-NEXT:  1      1     0.50                        addvl	x0, x0, #-32
 # CHECK-NEXT:  1      1     0.50                        addvl	x21, x21, #0
@@ -2597,7 +2597,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        and	z31.s, p7/m, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        and	z5.b, z5.b, #0x6
 # CHECK-NEXT:  1      4     1.00                        and	z5.b, z5.b, #0xf9
-# CHECK-NEXT:  1      3     1.00                  U     ands	p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT:  1      3     1.00                        ands	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  10     46    5.00                        andv	b0, p7, z31.b
 # CHECK-NEXT:  7      34    3.50                        andv	d0, p7, z31.d
 # CHECK-NEXT:  9      42    4.50                        andv	h0, p7, z31.h
@@ -2636,10 +2636,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      8     1.00                        asrd	z31.d, p0/m, z31.d, #64
 # CHECK-NEXT:  2      8     1.00                        asrd	z31.h, p0/m, z31.h, #16
 # CHECK-NEXT:  2      8     1.00                        asrd	z31.s, p0/m, z31.s, #32
-# CHECK-NEXT:  1      4     0.50                  U     asrr	z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT:  1      4     0.50                  U     asrr	z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT:  1      4     0.50                  U     asrr	z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT:  1      4     0.50                  U     asrr	z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT:  1      4     0.50                        asrr	z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT:  1      4     0.50                        asrr	z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT:  1      4     0.50                        asrr	z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT:  1      4     0.50                        asrr	z0.s, p0/m, z0.s, z0.s
 # CHECK-NEXT:  1      3     1.00                        bic	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      3     1.00                        bic	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      4     0.50                        bic	z0.d, z0.d, z0.d
@@ -2648,27 +2648,27 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        bic	z31.d, p7/m, z31.d, z31.d
 # CHECK-NEXT:  1      4     0.50                        bic	z31.h, p7/m, z31.h, z31.h
 # CHECK-NEXT:  1      4     0.50                        bic	z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT:  1      3     1.00                  U     bics	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     bics	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        bics	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  1      3     1.00                        bics	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      3     1.00                        brka	p0.b, p15/m, p15.b
 # CHECK-NEXT:  1      3     1.00                        brka	p0.b, p15/z, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     brkas	p0.b, p15/z, p15.b
+# CHECK-NEXT:  1      3     1.00                        brkas	p0.b, p15/z, p15.b
 # CHECK-NEXT:  1      3     1.00                        brkb	p0.b, p15/m, p15.b
 # CHECK-NEXT:  1      3     1.00                        brkb	p0.b, p15/z, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     brkbs	p0.b, p15/z, p15.b
+# CHECK-NEXT:  1      3     1.00                        brkbs	p0.b, p15/z, p15.b
 # CHECK-NEXT:  1      3     1.00                        brkn	p0.b, p15/z, p1.b, p0.b
 # CHECK-NEXT:  1      3     1.00                        brkn	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     brkns	p0.b, p15/z, p1.b, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     brkns	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        brkns	p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT:  1      3     1.00                        brkns	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      3     1.00                        brkpa	p0.b, p15/z, p1.b, p2.b
 # CHECK-NEXT:  1      3     1.00                        brkpa	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     brkpas	p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT:  1      3     1.00                  U     brkpas	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        brkpas	p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT:  1      3     1.00                        brkpas	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      3     1.00                        brkpb	p0.b, p15/z, p1.b, p2.b
 # CHECK-NEXT:  1      3     1.00                        brkpb	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     brkpbs	p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT:  1      3     1.00                  U     brkpbs	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      6     1.00                  U     clasta	b0, p7, b0, z31.b
+# CHECK-NEXT:  1      3     1.00                        brkpbs	p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT:  1      3     1.00                        brkpbs	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      6     1.00                        clasta	b0, p7, b0, z31.b
 # CHECK-NEXT:  1      6     1.00                        clasta	d0, p7, d0, z31.d
 # CHECK-NEXT:  1      6     1.00                        clasta	h0, p7, h0, z31.h
 # CHECK-NEXT:  1      6     1.00                        clasta	s0, p7, s0, z31.s
@@ -2680,7 +2680,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      6     1.00                        clasta	z0.d, p7, z0.d, z31.d
 # CHECK-NEXT:  1      6     1.00                        clasta	z0.h, p7, z0.h, z31.h
 # CHECK-NEXT:  1      6     1.00                        clasta	z0.s, p7, z0.s, z31.s
-# CHECK-NEXT:  1      6     1.00                  U     clastb	b0, p7, b0, z31.b
+# CHECK-NEXT:  1      6     1.00                        clastb	b0, p7, b0, z31.b
 # CHECK-NEXT:  1      6     1.00                        clastb	d0, p7, d0, z31.d
 # CHECK-NEXT:  1      6     1.00                        clastb	h0, p7, h0, z31.h
 # CHECK-NEXT:  1      6     1.00                        clastb	s0, p7, s0, z31.s
@@ -2880,14 +2880,14 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     0.50                        cntw	x0, pow2
 # CHECK-NEXT:  1      6     1.00                        compact	z31.d, p7, z31.d
 # CHECK-NEXT:  1      6     1.00                        compact	z31.s, p7, z31.s
-# CHECK-NEXT:  1      2     1.00                  U     ctermeq	w30, wzr
-# CHECK-NEXT:  1      2     1.00                  U     ctermeq	wzr, w30
-# CHECK-NEXT:  1      2     1.00                  U     ctermeq	x30, xzr
-# CHECK-NEXT:  1      2     1.00                  U     ctermeq	xzr, x30
-# CHECK-NEXT:  1      2     1.00                  U     ctermne	w30, wzr
-# CHECK-NEXT:  1      2     1.00                  U     ctermne	wzr, w30
-# CHECK-NEXT:  1      2     1.00                  U     ctermne	x30, xzr
-# CHECK-NEXT:  1      2     1.00                  U     ctermne	xzr, x30
+# CHECK-NEXT:  1      2     1.00                        ctermeq	w30, wzr
+# CHECK-NEXT:  1      2     1.00                        ctermeq	wzr, w30
+# CHECK-NEXT:  1      2     1.00                        ctermeq	x30, xzr
+# CHECK-NEXT:  1      2     1.00                        ctermeq	xzr, x30
+# CHECK-NEXT:  1      2     1.00                        ctermne	w30, wzr
+# CHECK-NEXT:  1      2     1.00                        ctermne	wzr, w30
+# CHECK-NEXT:  1      2     1.00                        ctermne	x30, xzr
+# CHECK-NEXT:  1      2     1.00                        ctermne	xzr, x30
 # CHECK-NEXT:  1      1     0.50                        decb	x0
 # CHECK-NEXT:  1      1     0.50                        decb	x0, #14
 # CHECK-NEXT:  1      1     0.50                        decb	x0, all, mul #16
@@ -2911,9 +2911,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      7     1.00                        decp	xzr, p15.d
 # CHECK-NEXT:  2      7     1.00                        decp	xzr, p15.h
 # CHECK-NEXT:  2      7     1.00                        decp	xzr, p15.s
-# CHECK-NEXT:  1      12    1.00                  U     decp	z31.d, p15.d
-# CHECK-NEXT:  1      12    1.00                  U     decp	z31.h, p15.h
-# CHECK-NEXT:  1      12    1.00                  U     decp	z31.s, p15.s
+# CHECK-NEXT:  1      12    1.00                        decp	z31.d, p15.d
+# CHECK-NEXT:  1      12    1.00                        decp	z31.h, p15.h
+# CHECK-NEXT:  1      12    1.00                        decp	z31.s, p15.s
 # CHECK-NEXT:  1      1     0.50                        decw	x0
 # CHECK-NEXT:  1      1     0.50                        decw	x0, #14
 # CHECK-NEXT:  1      1     0.50                        decw	x0, all, mul #16
@@ -2938,7 +2938,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        eor	z31.s, p7/m, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        eor	z5.b, z5.b, #0x6
 # CHECK-NEXT:  1      4     1.00                        eor	z5.b, z5.b, #0xf9
-# CHECK-NEXT:  1      3     1.00                  U     eors	p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT:  1      3     1.00                        eors	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  10     46    5.00                        eorv	b0, p7, z31.b
 # CHECK-NEXT:  7      34    3.50                        eorv	d0, p7, z31.d
 # CHECK-NEXT:  9      42    4.50                        eorv	h0, p7, z31.h
@@ -3136,12 +3136,12 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      15    1.00                        fmls	z0.s, z1.s, z7.s[3]
 # CHECK-NEXT:  1      6     1.00                        fmov	z0.d, #-10.00000000
 # CHECK-NEXT:  1      6     1.00                        fmov	z0.d, #0.12500000
-# CHECK-NEXT:  1      4     1.00                  U     fmov	z0.d, p0/m, #-10.00000000
-# CHECK-NEXT:  1      4     1.00                  U     fmov	z0.d, p0/m, #0.12500000
+# CHECK-NEXT:  1      4     1.00                        fmov	z0.d, p0/m, #-10.00000000
+# CHECK-NEXT:  1      4     1.00                        fmov	z0.d, p0/m, #0.12500000
 # CHECK-NEXT:  1      6     1.00                        fmov	z0.h, #-0.12500000
-# CHECK-NEXT:  1      4     1.00                  U     fmov	z0.h, p0/m, #-0.12500000
+# CHECK-NEXT:  1      4     1.00                        fmov	z0.h, p0/m, #-0.12500000
 # CHECK-NEXT:  1      6     1.00                        fmov	z0.s, #-0.12500000
-# CHECK-NEXT:  1      4     1.00                  U     fmov	z0.s, p0/m, #-0.12500000
+# CHECK-NEXT:  1      4     1.00                        fmov	z0.s, p0/m, #-0.12500000
 # CHECK-NEXT:  1      9     0.50                        fmsb	z0.d, p7/m, z1.d, z31.d
 # CHECK-NEXT:  1      9     0.50                        fmsb	z0.h, p7/m, z1.h, z31.h
 # CHECK-NEXT:  1      9     0.50                        fmsb	z0.s, p7/m, z1.s, z31.s
@@ -3280,9 +3280,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      7     1.00                        incp	xzr, p15.d
 # CHECK-NEXT:  2      7     1.00                        incp	xzr, p15.h
 # CHECK-NEXT:  2      7     1.00                        incp	xzr, p15.s
-# CHECK-NEXT:  1      12    1.00                  U     incp	z31.d, p15.d
-# CHECK-NEXT:  1      12    1.00                  U     incp	z31.h, p15.h
-# CHECK-NEXT:  1      12    1.00                  U     incp	z31.s, p15.s
+# CHECK-NEXT:  1      12    1.00                        incp	z31.d, p15.d
+# CHECK-NEXT:  1      12    1.00                        incp	z31.h, p15.h
+# CHECK-NEXT:  1      12    1.00                        incp	z31.s, p15.s
 # CHECK-NEXT:  1      1     0.50                        incw	x0
 # CHECK-NEXT:  1      1     0.50                        incw	x0, #14
 # CHECK-NEXT:  1      1     0.50                        incw	x0, all, mul #16
@@ -3334,7 +3334,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      10    1.00                        insr	z31.h, wzr
 # CHECK-NEXT:  1      6     1.00                        insr	z31.s, s31
 # CHECK-NEXT:  1      10    1.00                        insr	z31.s, wzr
-# CHECK-NEXT:  1      6     1.00                  U     lasta	b0, p7, z31.b
+# CHECK-NEXT:  1      6     1.00                        lasta	b0, p7, z31.b
 # CHECK-NEXT:  1      6     1.00                        lasta	d0, p7, z31.d
 # CHECK-NEXT:  1      6     1.00                        lasta	h0, p7, z31.h
 # CHECK-NEXT:  1      6     1.00                        lasta	s0, p7, z31.s
@@ -3342,7 +3342,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      29    1.00                        lasta	w0, p7, z31.h
 # CHECK-NEXT:  1      29    1.00                        lasta	w0, p7, z31.s
 # CHECK-NEXT:  1      29    1.00                        lasta	x0, p7, z31.d
-# CHECK-NEXT:  1      6     1.00                  U     lastb	b0, p7, z31.b
+# CHECK-NEXT:  1      6     1.00                        lastb	b0, p7, z31.b
 # CHECK-NEXT:  1      6     1.00                        lastb	d0, p7, z31.d
 # CHECK-NEXT:  1      6     1.00                        lastb	h0, p7, z31.h
 # CHECK-NEXT:  1      6     1.00                        lastb	s0, p7, z31.s
@@ -3352,68 +3352,68 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      29    1.00                        lastb	x0, p7, z31.d
 # CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z0.b }, p0/z, [sp, x0]
 # CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z0.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z0.b }, p0/z, [x0]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1b	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z0.h }, p0/z, [x0]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1b	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1b	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1b	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z21.b }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1b	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1b	{ z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z0.b }, p0/z, [x0]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1b	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z0.h }, p0/z, [x0]
+# CHECK-NEXT:  1      23    4.00    *                   ld1b	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  1      23    4.00    *                   ld1b	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  1      19    4.00    *                   ld1b	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1b	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1b	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z21.s }, p5/z, [x10, x21]
 # CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z31.b }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1b	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1b	{ z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1b	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1b	{ z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1b	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1b	{ z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      19    4.00    *                   ld1b	{ z31.s }, p7/z, [z31.s, #31]
 # CHECK-NEXT:  1      11    0.50    *                   ld1b	{ z5.h }, p3/z, [x17, x16]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1d	{ z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1d	{ z0.d }, p0/z, [x0, z0.d, uxtw #3]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1d	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1d	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1d	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1d	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1d	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1d	{ z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT:  1      20    2.00    *                   ld1d	{ z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT:  1      11    0.50    *                   ld1d	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1d	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      11    0.50    *                   ld1d	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1d	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1d	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      11    0.50    *                   ld1d	{ z23.d }, p3/z, [sp, x8, lsl #3]
 # CHECK-NEXT:  1      11    0.50    *                   ld1d	{ z23.d }, p3/z, [x13, x8, lsl #3]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1d	{ z23.d }, p3/z, [x13, z8.d, lsl #3]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1d	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1d	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1d	{ z31.d }, p7/z, [z31.d, #248]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1h	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1h	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1h	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z0.h }, p0/z, [x0]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1h	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1h	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1h	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1h	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1h	{ z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1d	{ z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT:  1      11    0.50    *                   ld1d	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1d	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1d	{ z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT:  1      20    2.00    *                   ld1h	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT:  1      20    2.00    *                   ld1h	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1h	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z0.h }, p0/z, [x0]
+# CHECK-NEXT:  1      23    4.00    *                   ld1h	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  1      23    4.00    *                   ld1h	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  1      19    4.00    *                   ld1h	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1h	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1h	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z21.s }, p5/z, [x10, x21, lsl #1]
 # CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1h	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1h	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1h	{ z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1h	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1h	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1h	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1h	{ z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT:  1      20    2.00    *                   ld1h	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1h	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1h	{ z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      23    4.00    *                   ld1h	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT:  1      23    4.00    *                   ld1h	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT:  1      19    4.00    *                   ld1h	{ z31.s }, p7/z, [z31.s, #62]
 # CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z5.h }, p3/z, [sp, x16, lsl #1]
 # CHECK-NEXT:  1      11    0.50    *                   ld1h	{ z5.h }, p3/z, [x17, x16, lsl #1]
 # CHECK-NEXT:  1      11    0.50    *                   ld1rb	{ z0.b }, p0/z, [x0]
@@ -3468,146 +3468,146 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      11    0.50    *                   ld1rw	{ z0.s }, p0/z, [x0]
 # CHECK-NEXT:  1      11    0.50    *                   ld1rw	{ z31.d }, p7/z, [sp, #252]
 # CHECK-NEXT:  1      11    0.50    *                   ld1rw	{ z31.s }, p7/z, [sp, #252]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1sb	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1sb	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z0.h }, p0/z, [sp, x0]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z0.h }, p0/z, [x0, x0]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z0.h }, p0/z, [x0]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1sb	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1sb	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sb	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sb	{ z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z0.h }, p0/z, [x0]
+# CHECK-NEXT:  1      23    4.00    *                   ld1sb	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  1      19    4.00    *                   ld1sb	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sb	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sb	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z21.s }, p5/z, [x10, x21]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sb	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1sb	{ z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sb	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1sb	{ z31.s }, p7/z, [z31.s, #31]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sh	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sh	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sh	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1sh	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1sh	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1sh	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sh	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1sh	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sh	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sh	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sh	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sb	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1sb	{ z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sb	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      19    4.00    *                   ld1sb	{ z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sh	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sh	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1sh	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      23    4.00    *                   ld1sh	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  1      23    4.00    *                   ld1sh	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  1      19    4.00    *                   ld1sh	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sh	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sh	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z21.s }, p5/z, [sp, x21, lsl #1]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sh	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z21.s }, p5/z, [x10, x21, lsl #1]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sh	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sh	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sh	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1sh	{ z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sh	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1sh	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1sh	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1sh	{ z31.s }, p7/z, [z31.s, #62]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sw	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sw	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sw	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1sw	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sw	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sw	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sw	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sh	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sh	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1sh	{ z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sh	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      23    4.00    *                   ld1sh	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT:  1      23    4.00    *                   ld1sh	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT:  1      19    4.00    *                   ld1sh	{ z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sw	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sw	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sw	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1sw	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sw	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sw	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sw	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sw	{ z23.d }, p3/z, [sp, x8, lsl #2]
 # CHECK-NEXT:  1      11    0.50    *                   ld1sw	{ z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sw	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1sw	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1sw	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1sw	{ z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1w	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1w	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1w	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1w	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1w	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1w	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1w	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1w	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1w	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1w	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1w	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sw	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT:  1      11    0.50    *                   ld1sw	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1sw	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1sw	{ z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT:  1      20    2.00    *                   ld1w	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT:  1      20    2.00    *                   ld1w	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  1      16    2.00    *                   ld1w	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      23    4.00    *                   ld1w	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  1      23    4.00    *                   ld1w	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  1      19    4.00    *                   ld1w	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1w	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  1      20    2.00    *                   ld1w	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z21.s }, p5/z, [sp, x21, lsl #2]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1w	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z21.s }, p5/z, [x10, x21, lsl #2]
 # CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1w	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1w	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      20    2.00    *             U     ld1w	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  1      16    2.00    *             U     ld1w	{ z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT:  1      11    0.50    *             U     ld1w	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1w	{ z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT:  1      23    4.00    *             U     ld1w	{ z31.s }, p7/z, [sp, z31.s, uxtw #2]
-# CHECK-NEXT:  1      19    4.00    *             U     ld1w	{ z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2b	{ z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2b	{ z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2b	{ z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2b	{ z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2b	{ z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  3      12    1.50    *             U     ld2d	{ z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  2      11    1.00    *             U     ld2d	{ z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT:  2      11    1.00    *             U     ld2d	{ z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  2      11    1.00    *             U     ld2d	{ z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      12    1.50    *             U     ld2d	{ z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2h	{ z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2h	{ z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2h	{ z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2h	{ z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      15    4.50    *             U     ld2h	{ z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  3      12    1.50    *             U     ld2w	{ z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  2      11    1.00    *             U     ld2w	{ z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT:  2      11    1.00    *             U     ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  2      11    1.00    *             U     ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      12    1.50    *             U     ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  1      20    2.00    *                   ld1w	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      20    2.00    *                   ld1w	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  1      16    2.00    *                   ld1w	{ z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT:  1      11    0.50    *                   ld1w	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      23    4.00    *                   ld1w	{ z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT:  1      23    4.00    *                   ld1w	{ z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT:  1      19    4.00    *                   ld1w	{ z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT:  3      15    4.50    *                   ld2b	{ z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  3      15    4.50    *                   ld2b	{ z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT:  3      15    4.50    *                   ld2b	{ z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  3      15    4.50    *                   ld2b	{ z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      15    4.50    *                   ld2b	{ z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  3      12    1.50    *                   ld2d	{ z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  2      11    1.00    *                   ld2d	{ z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT:  2      11    1.00    *                   ld2d	{ z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  2      11    1.00    *                   ld2d	{ z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      12    1.50    *                   ld2d	{ z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  3      15    4.50    *                   ld2h	{ z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  3      15    4.50    *                   ld2h	{ z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT:  3      15    4.50    *                   ld2h	{ z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  3      15    4.50    *                   ld2h	{ z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      15    4.50    *                   ld2h	{ z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  3      12    1.50    *                   ld2w	{ z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  2      11    1.00    *                   ld2w	{ z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT:  2      11    1.00    *                   ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  2      11    1.00    *                   ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      12    1.50    *                   ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  4      15    6.50    *                   ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  4      15    6.50    *                   ld3b	{ z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT:  4      15    6.50    *                   ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    6.50    *                   ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    6.50    *                   ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  4      12    2.00    *                   ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  3      11    1.50    *                   ld3d	{ z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT:  3      11    1.50    *                   ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  3      11    1.50    *                   ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      12    2.00    *                   ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  4      15    6.50    *                   ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  4      15    6.50    *                   ld3h	{ z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT:  4      15    6.50    *                   ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    6.50    *                   ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    6.50    *                   ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  4      12    2.00    *                   ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  3      11    1.50    *                   ld3w	{ z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT:  3      11    1.50    *                   ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  3      11    1.50    *                   ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      12    2.00    *                   ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  5      15    8.50    *                   ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  5      15    8.50    *                   ld4b	{ z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT:  5      15    8.50    *                   ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    8.50    *                   ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    8.50    *                   ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  5      12    2.50    *                   ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  4      11    2.00    *                   ld4d	{ z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT:  4      11    2.00    *                   ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      11    2.00    *                   ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      12    2.50    *                   ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  5      15    8.50    *                   ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  5      15    8.50    *                   ld4h	{ z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT:  5      15    8.50    *                   ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    8.50    *                   ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    8.50    *                   ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  5      12    2.50    *                   ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  4      11    2.00    *                   ld4w	{ z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT:  4      11    2.00    *                   ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      11    2.00    *                   ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      12    2.50    *                   ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 # CHECK-NEXT:  1      11    0.50    *             U     ldff1b	{ z0.d }, p0/z, [x0, x0]
 # CHECK-NEXT:  1      16    2.00    *             U     ldff1b	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  1      11    0.50    *             U     ldff1b	{ z0.h }, p0/z, [x0, x0]
@@ -3782,9 +3782,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      11    1.00    *                   ldr	p0, [x0]
 # CHECK-NEXT:  1      11    1.00    *                   ldr	p5, [x10, #255, mul vl]
 # CHECK-NEXT:  1      11    1.00    *                   ldr	p7, [x13, #-256, mul vl]
-# CHECK-NEXT:  1      11    1.00    *             U     ldr	z0, [x0]
-# CHECK-NEXT:  1      11    1.00    *             U     ldr	z23, [x13, #255, mul vl]
-# CHECK-NEXT:  1      11    1.00    *             U     ldr	z31, [sp, #-256, mul vl]
+# CHECK-NEXT:  1      11    1.00    *                   ldr	z0, [x0]
+# CHECK-NEXT:  1      11    1.00    *                   ldr	z23, [x13, #255, mul vl]
+# CHECK-NEXT:  1      11    1.00    *                   ldr	z31, [sp, #-256, mul vl]
 # CHECK-NEXT:  1      4     0.50                        lsl	z0.b, p0/m, z0.b, #0
 # CHECK-NEXT:  1      4     0.50                        lsl	z0.b, p0/m, z0.b, z0.b
 # CHECK-NEXT:  1      4     0.50                        lsl	z0.b, p0/m, z0.b, z1.d
@@ -3811,10 +3811,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        lsl	z31.h, z31.h, #15
 # CHECK-NEXT:  1      4     0.50                        lsl	z31.s, p0/m, z31.s, #31
 # CHECK-NEXT:  1      4     0.50                        lsl	z31.s, z31.s, #31
-# CHECK-NEXT:  1      4     0.50                  U     lslr	z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT:  1      4     0.50                  U     lslr	z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT:  1      4     0.50                  U     lslr	z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT:  1      4     0.50                  U     lslr	z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT:  1      4     0.50                        lslr	z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT:  1      4     0.50                        lslr	z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT:  1      4     0.50                        lslr	z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT:  1      4     0.50                        lslr	z0.s, p0/m, z0.s, z0.s
 # CHECK-NEXT:  1      4     0.50                        lsr	z0.b, p0/m, z0.b, #1
 # CHECK-NEXT:  1      4     0.50                        lsr	z0.b, p0/m, z0.b, z0.b
 # CHECK-NEXT:  1      4     0.50                        lsr	z0.b, p0/m, z0.b, z1.d
@@ -3841,10 +3841,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        lsr	z31.h, z31.h, #16
 # CHECK-NEXT:  1      4     0.50                        lsr	z31.s, p0/m, z31.s, #32
 # CHECK-NEXT:  1      4     0.50                        lsr	z31.s, z31.s, #32
-# CHECK-NEXT:  1      4     0.50                  U     lsrr	z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT:  1      4     0.50                  U     lsrr	z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT:  1      4     0.50                  U     lsrr	z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT:  1      4     0.50                  U     lsrr	z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT:  1      4     0.50                        lsrr	z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT:  1      4     0.50                        lsrr	z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT:  1      4     0.50                        lsrr	z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT:  1      4     0.50                        lsrr	z0.s, p0/m, z0.s, z0.s
 # CHECK-NEXT:  1      9     0.50                        mad	z0.b, p7/m, z1.b, z31.b
 # CHECK-NEXT:  1      9     0.50                        mad	z0.d, p7/m, z1.d, z31.d
 # CHECK-NEXT:  1      9     0.50                        mad	z0.h, p7/m, z1.h, z31.h
@@ -3865,7 +3865,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      3     1.00                        mov	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      4     1.00                        mov	z0.b, #127
 # CHECK-NEXT:  1      4     1.00                        mov	z0.b, b0
-# CHECK-NEXT:  1      6     1.00                  U     mov	z0.b, p0/m, b0
+# CHECK-NEXT:  1      6     1.00                        mov	z0.b, p0/m, b0
 # CHECK-NEXT:  1      8     1.00                        mov	z0.b, p0/m, w0
 # CHECK-NEXT:  1      4     1.00                        mov	z0.b, p0/z, #127
 # CHECK-NEXT:  1      6     1.00                        mov	z0.b, w0
@@ -3927,14 +3927,14 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        mov	z21.s, p15/m, #-128
 # CHECK-NEXT:  1      4     1.00                        mov	z21.s, p15/m, #-32768
 # CHECK-NEXT:  1      4     0.50                        mov	z31.b, p15/m, z31.b
-# CHECK-NEXT:  1      6     1.00                  U     mov	z31.b, p7/m, b31
+# CHECK-NEXT:  1      6     1.00                        mov	z31.b, p7/m, b31
 # CHECK-NEXT:  1      1     0.17                        movprfx	z31, z6
 # CHECK-NEXT:  1      8     1.00                        mov	z31.b, p7/m, wsp
 # CHECK-NEXT:  1      6     1.00                        mov	z31.b, wsp
 # CHECK-NEXT:  1      4     1.00                        mov	z31.b, z31.b[63]
 # CHECK-NEXT:  1      4     0.50                        mov	z31.d, p15/m, z31.d
 # CHECK-NEXT:  1      6     1.00                        mov	z31.d, p7/m, d31
-# CHECK-NEXT:  1      1     0.17                  U     movprfx	z31.d, p7/z, z6.d
+# CHECK-NEXT:  1      1     0.17                        movprfx	z31.d, p7/z, z6.d
 # CHECK-NEXT:  1      8     1.00                        mov	z31.d, p7/m, sp
 # CHECK-NEXT:  1      6     1.00                        mov	z31.d, sp
 # CHECK-NEXT:  1      4     0.50                        mov	z31.d, z0.d
@@ -3960,10 +3960,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        mov	z5.h, #-6
 # CHECK-NEXT:  1      4     1.00                        mov	z5.q, z17.q[3]
 # CHECK-NEXT:  1      4     1.00                        mov	z5.s, #-6
-# CHECK-NEXT:  1      3     1.00                  U     movs	p0.b, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     movs	p0.b, p0/z, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     movs	p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     movs	p15.b, p15/z, p15.b
+# CHECK-NEXT:  1      3     1.00                        movs	p0.b, p0.b
+# CHECK-NEXT:  1      3     1.00                        movs	p0.b, p0/z, p0.b
+# CHECK-NEXT:  1      3     1.00                        movs	p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        movs	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      1     0.17                  U     mrs	x3, ID_AA64ZFR0_EL1
 # CHECK-NEXT:  1      1     0.17                  U     mrs	x3, ZCR_EL1
 # CHECK-NEXT:  1      1     0.17                  U     mrs	x3, ZCR_EL12
@@ -3991,8 +3991,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      9     1.00                        mul	z31.s, z31.s, #127
 # CHECK-NEXT:  1      3     1.00                        nand	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      3     1.00                        nand	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     nands	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     nands	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        nands	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  1      3     1.00                        nands	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      4     1.00                        neg	z0.b, p0/m, z0.b
 # CHECK-NEXT:  1      4     1.00                        neg	z0.d, p0/m, z0.d
 # CHECK-NEXT:  1      4     1.00                        neg	z0.h, p0/m, z0.h
@@ -4003,20 +4003,20 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        neg	z31.s, p7/m, z31.s
 # CHECK-NEXT:  1      3     1.00                        nor	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      3     1.00                        nor	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     nors	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     nors	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        nors	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  1      3     1.00                        nors	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      3     1.00                        not	p0.b, p0/z, p0.b
 # CHECK-NEXT:  1      3     1.00                        not	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      4     0.50                        not	z31.b, p7/m, z31.b
 # CHECK-NEXT:  1      4     0.50                        not	z31.d, p7/m, z31.d
 # CHECK-NEXT:  1      4     0.50                        not	z31.h, p7/m, z31.h
 # CHECK-NEXT:  1      4     0.50                        not	z31.s, p7/m, z31.s
-# CHECK-NEXT:  1      3     1.00                  U     nots	p0.b, p0/z, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     nots	p15.b, p15/z, p15.b
+# CHECK-NEXT:  1      3     1.00                        nots	p0.b, p0/z, p0.b
+# CHECK-NEXT:  1      3     1.00                        nots	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      3     1.00                        orn	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      3     1.00                        orn	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  1      3     1.00                  U     orns	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  1      3     1.00                  U     orns	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  1      3     1.00                        orns	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  1      3     1.00                        orns	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      3     1.00                        orr	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  1      4     1.00                        orr	z0.d, z0.d, #0x6
 # CHECK-NEXT:  1      4     1.00                        orr	z0.d, z0.d, #0xfffffffffffffff9
@@ -4031,7 +4031,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        orr	z31.s, p7/m, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        orr	z5.b, z5.b, #0x6
 # CHECK-NEXT:  1      4     1.00                        orr	z5.b, z5.b, #0xf9
-# CHECK-NEXT:  1      3     1.00                  U     orrs	p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT:  1      3     1.00                        orrs	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  10     46    5.00                        orv	b0, p7, z31.b
 # CHECK-NEXT:  7      34    3.50                        orv	d0, p7, z31.d
 # CHECK-NEXT:  9      42    4.50                        orv	h0, p7, z31.h
@@ -4048,8 +4048,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfb	#15, p0, [x0]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfb	#6, p0, [x0]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfb	#7, p0, [x0]
-# CHECK-NEXT:  1      1     2.00    *      *            prfb	#7, p3, [z13.s, #31]
-# CHECK-NEXT:  1      1     2.00    *      *            prfb	#7, p3, [z13.s]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfb	#7, p3, [z13.s, #31]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfb	#7, p3, [z13.s]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfb	pldl1keep, p0, [x0, z0.d, uxtw]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfb	pldl1keep, p0, [x0, z0.d]
 # CHECK-NEXT:  1      1     2.00    *      *      U     prfb	pldl1keep, p0, [x0, z0.s, uxtw]
@@ -4075,8 +4075,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfd	#15, p0, [x0]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfd	#15, p7, [z31.d, #248]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfd	#15, p7, [z31.d]
-# CHECK-NEXT:  1      1     2.00    *      *            prfd	#15, p7, [z31.s, #248]
-# CHECK-NEXT:  1      1     2.00    *      *            prfd	#15, p7, [z31.s]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfd	#15, p7, [z31.s, #248]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfd	#15, p7, [z31.s]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfd	#6, p0, [x0]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfd	#7, p0, [x0]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfd	pldl1keep, p0, [x0, z0.d, lsl #3]
@@ -4102,8 +4102,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfh	#15, p0, [x0]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfh	#15, p7, [z31.d, #62]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfh	#15, p7, [z31.d]
-# CHECK-NEXT:  1      1     2.00    *      *            prfh	#15, p7, [z31.s, #62]
-# CHECK-NEXT:  1      1     2.00    *      *            prfh	#15, p7, [z31.s]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfh	#15, p7, [z31.s, #62]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfh	#15, p7, [z31.s]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfh	#6, p0, [x0]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfh	#7, p0, [x0]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfh	pldl1keep, p0, [x0, z0.d, lsl #1]
@@ -4129,8 +4129,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfw	#15, p0, [x0]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfw	#15, p7, [z31.d, #124]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfw	#15, p7, [z31.d]
-# CHECK-NEXT:  1      1     2.00    *      *            prfw	#15, p7, [z31.s, #124]
-# CHECK-NEXT:  1      1     2.00    *      *            prfw	#15, p7, [z31.s]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfw	#15, p7, [z31.s, #124]
+# CHECK-NEXT:  1      1     2.00    *      *      U     prfw	#15, p7, [z31.s]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfw	#6, p0, [x0]
 # CHECK-NEXT:  1      1     0.50    *      *      U     prfw	#7, p0, [x0]
 # CHECK-NEXT:  1      1     1.00    *      *      U     prfw	#7, p3, [x13, z8.d, uxtw #2]
@@ -4193,45 +4193,45 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      3     1.00                        ptrue	p7.s, vl64
 # CHECK-NEXT:  1      3     1.00                        ptrue	p7.s, vl7
 # CHECK-NEXT:  1      3     1.00                        ptrue	p7.s, vl8
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p0.b, pow2
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p0.d, pow2
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p0.h, pow2
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p0.s, pow2
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p15.b
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p15.d
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p15.h
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p15.s
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #14
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #15
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #16
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #17
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #18
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #19
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #20
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #21
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #22
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #23
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #24
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #25
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #26
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #27
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, #28
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, mul3
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, mul4
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl1
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl128
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl16
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl2
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl256
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl3
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl32
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl4
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl5
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl6
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl64
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl7
-# CHECK-NEXT:  1      3     1.00                  U     ptrues	p7.s, vl8
+# CHECK-NEXT:  1      3     1.00                        ptrues	p0.b, pow2
+# CHECK-NEXT:  1      3     1.00                        ptrues	p0.d, pow2
+# CHECK-NEXT:  1      3     1.00                        ptrues	p0.h, pow2
+# CHECK-NEXT:  1      3     1.00                        ptrues	p0.s, pow2
+# CHECK-NEXT:  1      3     1.00                        ptrues	p15.b
+# CHECK-NEXT:  1      3     1.00                        ptrues	p15.d
+# CHECK-NEXT:  1      3     1.00                        ptrues	p15.h
+# CHECK-NEXT:  1      3     1.00                        ptrues	p15.s
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #14
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #15
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #16
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #17
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #18
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #19
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #20
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #21
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #22
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #23
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #24
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #25
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #26
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #27
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, #28
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, mul3
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, mul4
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl1
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl128
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl16
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl2
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl256
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl3
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl32
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl4
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl5
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl6
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl64
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl7
+# CHECK-NEXT:  1      3     1.00                        ptrues	p7.s, vl8
 # CHECK-NEXT:  1      3     1.00                        punpkhi	p0.h, p0.b
 # CHECK-NEXT:  1      3     1.00                        punpkhi	p15.h, p15.b
 # CHECK-NEXT:  1      3     1.00                        punpklo	p0.h, p0.b
@@ -4347,19 +4347,19 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sqdecb	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecb	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqdecb	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqdecb	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqdecb	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqdecb	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqdecb	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdecb	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqdecb	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdecb	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqdecb	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecd	x0
 # CHECK-NEXT:  1      2     1.00                        sqdecd	x0, #14
 # CHECK-NEXT:  1      2     1.00                        sqdecd	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecd	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqdecd	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqdecd	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqdecd	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqdecd	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqdecd	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdecd	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqdecd	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdecd	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqdecd	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqdecd	z0.d
 # CHECK-NEXT:  1      4     0.50                        sqdecd	z0.d, all, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqdecd	z0.d, pow2
@@ -4369,10 +4369,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sqdech	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdech	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqdech	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqdech	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqdech	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqdech	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqdech	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdech	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqdech	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdech	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqdech	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqdech	z0.h
 # CHECK-NEXT:  1      4     0.50                        sqdech	z0.h, all, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqdech	z0.h, pow2
@@ -4381,10 +4381,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      6     1.50                        sqdecp	x0, p0.d
 # CHECK-NEXT:  2      6     1.50                        sqdecp	x0, p0.h
 # CHECK-NEXT:  2      6     1.50                        sqdecp	x0, p0.s
-# CHECK-NEXT:  2      6     1.50                  U     sqdecp	xzr, p15.b, wzr
-# CHECK-NEXT:  2      6     1.50                  U     sqdecp	xzr, p15.d, wzr
-# CHECK-NEXT:  2      6     1.50                  U     sqdecp	xzr, p15.h, wzr
-# CHECK-NEXT:  2      6     1.50                  U     sqdecp	xzr, p15.s, wzr
+# CHECK-NEXT:  2      6     1.50                        sqdecp	xzr, p15.b, wzr
+# CHECK-NEXT:  2      6     1.50                        sqdecp	xzr, p15.d, wzr
+# CHECK-NEXT:  2      6     1.50                        sqdecp	xzr, p15.h, wzr
+# CHECK-NEXT:  2      6     1.50                        sqdecp	xzr, p15.s, wzr
 # CHECK-NEXT:  1      12    1.00                        sqdecp	z0.d, p0.d
 # CHECK-NEXT:  1      12    1.00                        sqdecp	z0.h, p0.h
 # CHECK-NEXT:  1      12    1.00                        sqdecp	z0.s, p0.s
@@ -4393,10 +4393,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sqdecw	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecw	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqdecw	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqdecw	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqdecw	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqdecw	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqdecw	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdecw	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqdecw	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqdecw	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqdecw	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqdecw	z0.s
 # CHECK-NEXT:  1      4     0.50                        sqdecw	z0.s, all, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqdecw	z0.s, pow2
@@ -4406,19 +4406,19 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sqincb	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincb	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqincb	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqincb	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqincb	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqincb	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqincb	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqincb	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqincb	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqincb	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqincb	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincd	x0
 # CHECK-NEXT:  1      2     1.00                        sqincd	x0, #14
 # CHECK-NEXT:  1      2     1.00                        sqincd	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincd	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqincd	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqincd	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqincd	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqincd	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqincd	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqincd	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqincd	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqincd	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqincd	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqincd	z0.d
 # CHECK-NEXT:  1      4     0.50                        sqincd	z0.d, all, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqincd	z0.d, pow2
@@ -4428,10 +4428,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sqinch	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqinch	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqinch	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqinch	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqinch	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqinch	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqinch	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqinch	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqinch	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqinch	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqinch	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqinch	z0.h
 # CHECK-NEXT:  1      4     0.50                        sqinch	z0.h, all, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqinch	z0.h, pow2
@@ -4440,10 +4440,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      6     1.50                        sqincp	x0, p0.d
 # CHECK-NEXT:  2      6     1.50                        sqincp	x0, p0.h
 # CHECK-NEXT:  2      6     1.50                        sqincp	x0, p0.s
-# CHECK-NEXT:  2      6     1.50                  U     sqincp	xzr, p15.b, wzr
-# CHECK-NEXT:  2      6     1.50                  U     sqincp	xzr, p15.d, wzr
-# CHECK-NEXT:  2      6     1.50                  U     sqincp	xzr, p15.h, wzr
-# CHECK-NEXT:  2      6     1.50                  U     sqincp	xzr, p15.s, wzr
+# CHECK-NEXT:  2      6     1.50                        sqincp	xzr, p15.b, wzr
+# CHECK-NEXT:  2      6     1.50                        sqincp	xzr, p15.d, wzr
+# CHECK-NEXT:  2      6     1.50                        sqincp	xzr, p15.h, wzr
+# CHECK-NEXT:  2      6     1.50                        sqincp	xzr, p15.s, wzr
 # CHECK-NEXT:  1      12    1.00                        sqincp	z0.d, p0.d
 # CHECK-NEXT:  1      12    1.00                        sqincp	z0.h, p0.h
 # CHECK-NEXT:  1      12    1.00                        sqincp	z0.s, p0.s
@@ -4452,10 +4452,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sqincw	x0, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincw	x0, pow2
 # CHECK-NEXT:  1      2     1.00                        sqincw	x0, vl1
-# CHECK-NEXT:  1      2     1.00                  U     sqincw	x0, w0
-# CHECK-NEXT:  1      2     1.00                  U     sqincw	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     1.00                  U     sqincw	x0, w0, pow2
-# CHECK-NEXT:  1      2     1.00                  U     sqincw	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqincw	x0, w0
+# CHECK-NEXT:  1      2     1.00                        sqincw	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     1.00                        sqincw	x0, w0, pow2
+# CHECK-NEXT:  1      2     1.00                        sqincw	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqincw	z0.s
 # CHECK-NEXT:  1      4     0.50                        sqincw	z0.s, all, mul #16
 # CHECK-NEXT:  1      4     0.50                        sqincw	z0.s, pow2
@@ -4560,66 +4560,66 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  4      16    4.00           *            st1w	{ z31.d }, p7, [z31.d, #124]
 # CHECK-NEXT:  1      11    1.00           *            st1w	{ z31.s }, p7, [sp, #-1, mul vl]
 # CHECK-NEXT:  8      16    12.00          *            st1w	{ z31.s }, p7, [z31.s, #124]
-# CHECK-NEXT:  3      12    8.00           *      U     st2b	{ z0.b, z1.b }, p0, [x0, x0]
-# CHECK-NEXT:  3      12    8.00           *      U     st2b	{ z0.b, z1.b }, p0, [x0]
-# CHECK-NEXT:  3      12    8.00           *      U     st2b	{ z21.b, z22.b }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  3      12    8.00           *      U     st2b	{ z23.b, z24.b }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      12    8.00           *      U     st2b	{ z5.b, z6.b }, p3, [x17, x16]
-# CHECK-NEXT:  2      11    2.00           *      U     st2d	{ z0.d, z1.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  3      12    2.00           *      U     st2d	{ z0.d, z1.d }, p0, [x0]
-# CHECK-NEXT:  3      12    2.00           *      U     st2d	{ z21.d, z22.d }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  3      12    2.00           *      U     st2d	{ z23.d, z24.d }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      11    2.00           *      U     st2d	{ z5.d, z6.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  3      12    8.00           *      U     st2h	{ z0.h, z1.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  3      12    8.00           *      U     st2h	{ z0.h, z1.h }, p0, [x0]
-# CHECK-NEXT:  3      12    8.00           *      U     st2h	{ z21.h, z22.h }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  3      12    8.00           *      U     st2h	{ z23.h, z24.h }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      12    8.00           *      U     st2h	{ z5.h, z6.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  2      11    2.00           *      U     st2w	{ z0.s, z1.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  3      12    2.00           *      U     st2w	{ z0.s, z1.s }, p0, [x0]
-# CHECK-NEXT:  3      12    2.00           *      U     st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  3      12    2.00           *      U     st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      11    2.00           *      U     st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z0.b - z2.b }, p0, [x0, x0]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z0.b - z2.b }, p0, [x0]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z5.b - z7.b }, p3, [x17, x16]
-# CHECK-NEXT:  3      11    3.00           *      U     st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z0.d - z2.d }, p0, [x0]
-# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      11    3.00           *      U     st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z0.h - z2.h }, p0, [x0]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  3      11    3.00           *      U     st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z0.s - z2.s }, p0, [x0]
-# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      11    3.00           *      U     st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z0.b - z3.b }, p0, [x0, x0]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z0.b - z3.b }, p0, [x0]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z5.b - z8.b }, p3, [x17, x16]
-# CHECK-NEXT:  4      11    4.00           *      U     st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z0.d - z3.d }, p0, [x0]
-# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  4      11    4.00           *      U     st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z0.h - z3.h }, p0, [x0]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  4      11    4.00           *      U     st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z0.s - z3.s }, p0, [x0]
-# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  4      11    4.00           *      U     st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  3      12    8.00           *            st2b	{ z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT:  3      12    8.00           *            st2b	{ z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT:  3      12    8.00           *            st2b	{ z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  3      12    8.00           *            st2b	{ z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      12    8.00           *            st2b	{ z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT:  2      11    2.00           *            st2d	{ z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  3      12    2.00           *            st2d	{ z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT:  3      12    2.00           *            st2d	{ z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  3      12    2.00           *            st2d	{ z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      11    2.00           *            st2d	{ z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  3      12    8.00           *            st2h	{ z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  3      12    8.00           *            st2h	{ z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT:  3      12    8.00           *            st2h	{ z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  3      12    8.00           *            st2h	{ z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      12    8.00           *            st2h	{ z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  2      11    2.00           *            st2w	{ z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  3      12    2.00           *            st2w	{ z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT:  3      12    2.00           *            st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  3      12    2.00           *            st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      11    2.00           *            st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  4      15    12.00          *            st3b	{ z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT:  4      15    12.00          *            st3b	{ z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT:  4      15    12.00          *            st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    12.00          *            st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    12.00          *            st3b	{ z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT:  3      11    3.00           *            st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  4      12    3.00           *            st3d	{ z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT:  4      12    3.00           *            st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      12    3.00           *            st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      11    3.00           *            st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  4      15    12.00          *            st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  4      15    12.00          *            st3h	{ z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT:  4      15    12.00          *            st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    12.00          *            st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    12.00          *            st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  3      11    3.00           *            st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  4      12    3.00           *            st3w	{ z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT:  4      12    3.00           *            st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      12    3.00           *            st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      11    3.00           *            st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  5      15    16.00          *            st4b	{ z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT:  5      15    16.00          *            st4b	{ z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT:  5      15    16.00          *            st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    16.00          *            st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    16.00          *            st4b	{ z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT:  4      11    4.00           *            st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  5      12    4.00           *            st4d	{ z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT:  5      12    4.00           *            st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      12    4.00           *            st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  4      11    4.00           *            st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  5      15    16.00          *            st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  5      15    16.00          *            st4h	{ z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT:  5      15    16.00          *            st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    16.00          *            st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    16.00          *            st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  4      11    4.00           *            st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  5      12    4.00           *            st4w	{ z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT:  5      12    4.00           *            st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      12    4.00           *            st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  4      11    4.00           *            st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
 # CHECK-NEXT:  1      11    1.00           *            stnt1b	{ z0.b }, p0, [x0, x0]
 # CHECK-NEXT:  1      11    1.00           *            stnt1b	{ z0.b }, p0, [x0]
 # CHECK-NEXT:  1      11    1.00           *            stnt1b	{ z21.b }, p5, [x10, #7, mul vl]
@@ -4639,9 +4639,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      11    1.00           *            str	p0, [x0]
 # CHECK-NEXT:  1      11    1.00           *            str	p15, [sp, #-256, mul vl]
 # CHECK-NEXT:  1      11    1.00           *            str	p5, [x10, #255, mul vl]
-# CHECK-NEXT:  1      11    1.00           *      U     str	z0, [x0]
-# CHECK-NEXT:  1      11    1.00           *      U     str	z21, [x10, #-256, mul vl]
-# CHECK-NEXT:  1      11    1.00           *      U     str	z31, [sp, #255, mul vl]
+# CHECK-NEXT:  1      11    1.00           *            str	z0, [x0]
+# CHECK-NEXT:  1      11    1.00           *            str	z21, [x10, #-256, mul vl]
+# CHECK-NEXT:  1      11    1.00           *            str	z31, [sp, #255, mul vl]
 # CHECK-NEXT:  1      4     0.50                        sub	z0.b, p0/m, z0.b, z0.b
 # CHECK-NEXT:  1      4     1.00                        sub	z0.b, z0.b, #0
 # CHECK-NEXT:  1      4     0.50                        sub	z0.b, z0.b, z0.b

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index 5891350b78022..0f4003671a36c 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -3486,10 +3486,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        addp	z0.h, p0/m, z0.h, z1.h
 # CHECK-NEXT:  1      2     0.50                        addp	z29.s, p7/m, z29.s, z30.s
 # CHECK-NEXT:  1      2     0.50                        addp	z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT:  1      2     0.50                  U     addpl	sp, sp, #31
-# CHECK-NEXT:  1      2     0.50                  U     addpl	x0, x0, #-32
-# CHECK-NEXT:  1      2     0.50                  U     addpl	x21, x21, #0
-# CHECK-NEXT:  1      2     0.50                  U     addpl	x23, x8, #-1
+# CHECK-NEXT:  1      2     0.50                        addpl	sp, sp, #31
+# CHECK-NEXT:  1      2     0.50                        addpl	x0, x0, #-32
+# CHECK-NEXT:  1      2     0.50                        addpl	x21, x21, #0
+# CHECK-NEXT:  1      2     0.50                        addpl	x23, x8, #-1
 # CHECK-NEXT:  1      2     0.50                        addvl	sp, sp, #31
 # CHECK-NEXT:  1      2     0.50                        addvl	x0, x0, #-32
 # CHECK-NEXT:  1      2     0.50                        addvl	x21, x21, #0
@@ -3531,7 +3531,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        and	z31.s, p7/m, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        and	z5.b, z5.b, #0x6
 # CHECK-NEXT:  1      2     0.50                        and	z5.b, z5.b, #0xf9
-# CHECK-NEXT:  2      2     1.00                  U     ands	p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT:  2      2     1.00                        ands	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  2      6     1.00                        andv	b0, p7, z31.b
 # CHECK-NEXT:  2      6     1.00                        andv	d0, p7, z31.d
 # CHECK-NEXT:  2      6     1.00                        andv	h0, p7, z31.h
@@ -3570,10 +3570,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        asrd	z31.d, p0/m, z31.d, #64
 # CHECK-NEXT:  1      4     1.00                        asrd	z31.h, p0/m, z31.h, #16
 # CHECK-NEXT:  1      4     1.00                        asrd	z31.s, p0/m, z31.s, #32
-# CHECK-NEXT:  1      2     1.00                  U     asrr	z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT:  1      2     1.00                  U     asrr	z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT:  1      2     1.00                  U     asrr	z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT:  1      2     1.00                  U     asrr	z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT:  1      2     1.00                        asrr	z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT:  1      2     1.00                        asrr	z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT:  1      2     1.00                        asrr	z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT:  1      2     1.00                        asrr	z0.s, p0/m, z0.s, z0.s
 # CHECK-NEXT:  1      2     1.00                        bcax	z29.d, z29.d, z30.d, z31.d
 # CHECK-NEXT:  2      6     2.00                        bdep	z0.b, z1.b, z31.b
 # CHECK-NEXT:  2      6     2.00                        bdep	z0.d, z1.d, z31.d
@@ -3583,8 +3583,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      6     2.00                        bext	z0.d, z1.d, z31.d
 # CHECK-NEXT:  2      6     2.00                        bext	z0.h, z1.h, z31.h
 # CHECK-NEXT:  2      6     2.00                        bext	z0.s, z1.s, z31.s
-# CHECK-NEXT:  1      3     1.00                  U     bfcvt	z0.h, p0/m, z1.s
-# CHECK-NEXT:  1      3     1.00                  U     bfcvtnt	z0.h, p0/m, z1.s
+# CHECK-NEXT:  1      3     1.00                        bfcvt	z0.h, p0/m, z1.s
+# CHECK-NEXT:  1      3     1.00                        bfcvtnt	z0.h, p0/m, z1.s
 # CHECK-NEXT:  1      4     0.50                        bfdot	z0.s, z1.h, z2.h
 # CHECK-NEXT:  1      4     0.50                        bfdot	z0.s, z1.h, z2.h[0]
 # CHECK-NEXT:  1      4     0.50                        bfdot	z0.s, z1.h, z2.h[3]
@@ -3611,26 +3611,26 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        bic	z31.d, p7/m, z31.d, z31.d
 # CHECK-NEXT:  1      2     0.50                        bic	z31.h, p7/m, z31.h, z31.h
 # CHECK-NEXT:  1      2     0.50                        bic	z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT:  2      2     1.00                  U     bics	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     bics	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      2     1.00                        bics	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  2      2     1.00                        bics	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      2     0.50                        brka	p0.b, p15/m, p15.b
 # CHECK-NEXT:  1      2     0.50                        brka	p0.b, p15/z, p15.b
-# CHECK-NEXT:  1      3     0.50                  U     brkas	p0.b, p15/z, p15.b
+# CHECK-NEXT:  1      3     0.50                        brkas	p0.b, p15/z, p15.b
 # CHECK-NEXT:  1      2     0.50                        brkb	p0.b, p15/m, p15.b
 # CHECK-NEXT:  1      2     0.50                        brkb	p0.b, p15/z, p15.b
-# CHECK-NEXT:  1      3     0.50                  U     brkbs	p0.b, p15/z, p15.b
+# CHECK-NEXT:  1      3     0.50                        brkbs	p0.b, p15/z, p15.b
 # CHECK-NEXT:  1      2     2.00                        brkn	p0.b, p15/z, p1.b, p0.b
 # CHECK-NEXT:  1      2     2.00                        brkn	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  2      3     1.00                  U     brkns	p0.b, p15/z, p1.b, p0.b
-# CHECK-NEXT:  2      3     1.00                  U     brkns	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      3     1.00                        brkns	p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT:  2      3     1.00                        brkns	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      2     2.00                        brkpa	p0.b, p15/z, p1.b, p2.b
 # CHECK-NEXT:  1      2     2.00                        brkpa	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  2      3     1.00                  U     brkpas	p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT:  2      3     1.00                  U     brkpas	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      3     1.00                        brkpas	p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT:  2      3     1.00                        brkpas	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      2     2.00                        brkpb	p0.b, p15/z, p1.b, p2.b
 # CHECK-NEXT:  1      2     2.00                        brkpb	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  2      3     1.00                  U     brkpbs	p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT:  2      3     1.00                  U     brkpbs	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      3     1.00                        brkpbs	p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT:  2      3     1.00                        brkpbs	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      2     0.50                        bsl	z0.d, z0.d, z1.d, z2.d
 # CHECK-NEXT:  1      2     0.50                        bsl1n	z0.d, z0.d, z1.d, z2.d
 # CHECK-NEXT:  1      2     0.50                        bsl2n	z0.d, z0.d, z1.d, z2.d
@@ -3652,7 +3652,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        cdot	z29.d, z30.h, z0.h[0], #180
 # CHECK-NEXT:  1      4     1.00                        cdot	z31.d, z30.h, z7.h[1], #270
 # CHECK-NEXT:  1      4     1.00                        cdot	z5.d, z6.h, z3.h[0], #90
-# CHECK-NEXT:  1      3     1.00                  U     clasta	b0, p7, b0, z31.b
+# CHECK-NEXT:  1      3     1.00                        clasta	b0, p7, b0, z31.b
 # CHECK-NEXT:  1      3     1.00                        clasta	d0, p7, d0, z31.d
 # CHECK-NEXT:  1      3     1.00                        clasta	h0, p7, h0, z31.h
 # CHECK-NEXT:  1      3     1.00                        clasta	s0, p7, s0, z31.s
@@ -3664,7 +3664,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      3     1.00                        clasta	z0.d, p7, z0.d, z31.d
 # CHECK-NEXT:  1      3     1.00                        clasta	z0.h, p7, z0.h, z31.h
 # CHECK-NEXT:  1      3     1.00                        clasta	z0.s, p7, z0.s, z31.s
-# CHECK-NEXT:  1      3     1.00                  U     clastb	b0, p7, b0, z31.b
+# CHECK-NEXT:  1      3     1.00                        clastb	b0, p7, b0, z31.b
 # CHECK-NEXT:  1      3     1.00                        clastb	d0, p7, d0, z31.d
 # CHECK-NEXT:  1      3     1.00                        clastb	h0, p7, h0, z31.h
 # CHECK-NEXT:  1      3     1.00                        clastb	s0, p7, s0, z31.s
@@ -3884,14 +3884,14 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        cntw	x0, pow2
 # CHECK-NEXT:  1      3     1.00                        compact	z31.d, p7, z31.d
 # CHECK-NEXT:  1      3     1.00                        compact	z31.s, p7, z31.s
-# CHECK-NEXT:  1      1     0.50                  U     ctermeq	w30, wzr
-# CHECK-NEXT:  1      1     0.50                  U     ctermeq	wzr, w30
-# CHECK-NEXT:  1      1     0.50                  U     ctermeq	x30, xzr
-# CHECK-NEXT:  1      1     0.50                  U     ctermeq	xzr, x30
-# CHECK-NEXT:  1      1     0.50                  U     ctermne	w30, wzr
-# CHECK-NEXT:  1      1     0.50                  U     ctermne	wzr, w30
-# CHECK-NEXT:  1      1     0.50                  U     ctermne	x30, xzr
-# CHECK-NEXT:  1      1     0.50                  U     ctermne	xzr, x30
+# CHECK-NEXT:  1      1     0.50                        ctermeq	w30, wzr
+# CHECK-NEXT:  1      1     0.50                        ctermeq	wzr, w30
+# CHECK-NEXT:  1      1     0.50                        ctermeq	x30, xzr
+# CHECK-NEXT:  1      1     0.50                        ctermeq	xzr, x30
+# CHECK-NEXT:  1      1     0.50                        ctermne	w30, wzr
+# CHECK-NEXT:  1      1     0.50                        ctermne	wzr, w30
+# CHECK-NEXT:  1      1     0.50                        ctermne	x30, xzr
+# CHECK-NEXT:  1      1     0.50                        ctermne	xzr, x30
 # CHECK-NEXT:  1      2     0.50                        decb	x0
 # CHECK-NEXT:  1      2     0.50                        decb	x0, #14
 # CHECK-NEXT:  1      2     0.50                        decb	x0, all, mul #16
@@ -3915,9 +3915,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        decp	xzr, p15.d
 # CHECK-NEXT:  1      2     0.50                        decp	xzr, p15.h
 # CHECK-NEXT:  1      2     0.50                        decp	xzr, p15.s
-# CHECK-NEXT:  3      7     1.00                  U     decp	z31.d, p15.d
-# CHECK-NEXT:  3      7     1.00                  U     decp	z31.h, p15.h
-# CHECK-NEXT:  3      7     1.00                  U     decp	z31.s, p15.s
+# CHECK-NEXT:  3      7     1.00                        decp	z31.d, p15.d
+# CHECK-NEXT:  3      7     1.00                        decp	z31.h, p15.h
+# CHECK-NEXT:  3      7     1.00                        decp	z31.s, p15.s
 # CHECK-NEXT:  1      2     0.50                        decw	x0
 # CHECK-NEXT:  1      2     0.50                        decw	x0, #14
 # CHECK-NEXT:  1      2     0.50                        decw	x0, all, mul #16
@@ -3947,7 +3947,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        eorbt	z0.d, z1.d, z31.d
 # CHECK-NEXT:  1      2     0.50                        eorbt	z0.h, z1.h, z31.h
 # CHECK-NEXT:  1      2     0.50                        eorbt	z0.s, z1.s, z31.s
-# CHECK-NEXT:  2      2     1.00                  U     eors	p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT:  2      2     1.00                        eors	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  1      2     0.50                        eortb	z0.b, z1.b, z31.b
 # CHECK-NEXT:  1      2     0.50                        eortb	z0.d, z1.d, z31.d
 # CHECK-NEXT:  1      2     0.50                        eortb	z0.h, z1.h, z31.h
@@ -3956,10 +3956,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      6     1.00                        eorv	d0, p7, z31.d
 # CHECK-NEXT:  2      6     1.00                        eorv	h0, p7, z31.h
 # CHECK-NEXT:  2      6     1.00                        eorv	s0, p7, z31.s
-# CHECK-NEXT:  1      2     0.50                  U     ext	z0.b, { z1.b, z2.b }, #0
+# CHECK-NEXT:  1      2     0.50                        ext	z0.b, { z1.b, z2.b }, #0
 # CHECK-NEXT:  1      2     0.50                        ext	z31.b, z31.b, z0.b, #0
 # CHECK-NEXT:  1      2     0.50                        ext	z31.b, z31.b, z0.b, #255
-# CHECK-NEXT:  1      2     0.50                  U     ext	z31.b, { z30.b, z31.b }, #255
+# CHECK-NEXT:  1      2     0.50                        ext	z31.b, { z30.b, z31.b }, #255
 # CHECK-NEXT:  1      2     0.50                        fabd	z0.d, p7/m, z0.d, z31.d
 # CHECK-NEXT:  1      2     0.50                        fabd	z0.h, p7/m, z0.h, z31.h
 # CHECK-NEXT:  1      2     0.50                        fabd	z0.s, p7/m, z0.s, z31.s
@@ -4189,12 +4189,12 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.50                        fmlslt	z30.s, z31.h, z7.h[7]
 # CHECK-NEXT:  1      2     0.50                        fmov	z0.d, #-10.00000000
 # CHECK-NEXT:  1      2     0.50                        fmov	z0.d, #0.12500000
-# CHECK-NEXT:  1      2     0.50                  U     fmov	z0.d, p0/m, #-10.00000000
-# CHECK-NEXT:  1      2     0.50                  U     fmov	z0.d, p0/m, #0.12500000
+# CHECK-NEXT:  1      2     0.50                        fmov	z0.d, p0/m, #-10.00000000
+# CHECK-NEXT:  1      2     0.50                        fmov	z0.d, p0/m, #0.12500000
 # CHECK-NEXT:  1      2     0.50                        fmov	z0.h, #-0.12500000
-# CHECK-NEXT:  1      2     0.50                  U     fmov	z0.h, p0/m, #-0.12500000
+# CHECK-NEXT:  1      2     0.50                        fmov	z0.h, p0/m, #-0.12500000
 # CHECK-NEXT:  1      2     0.50                        fmov	z0.s, #-0.12500000
-# CHECK-NEXT:  1      2     0.50                  U     fmov	z0.s, p0/m, #-0.12500000
+# CHECK-NEXT:  1      2     0.50                        fmov	z0.s, p0/m, #-0.12500000
 # CHECK-NEXT:  1      4     0.50                        fmsb	z0.d, p7/m, z1.d, z31.d
 # CHECK-NEXT:  1      4     0.50                        fmsb	z0.h, p7/m, z1.h, z31.h
 # CHECK-NEXT:  1      4     0.50                        fmsb	z0.s, p7/m, z1.s, z31.s
@@ -4336,9 +4336,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        incp	xzr, p15.d
 # CHECK-NEXT:  1      2     0.50                        incp	xzr, p15.h
 # CHECK-NEXT:  1      2     0.50                        incp	xzr, p15.s
-# CHECK-NEXT:  3      7     1.00                  U     incp	z31.d, p15.d
-# CHECK-NEXT:  3      7     1.00                  U     incp	z31.h, p15.h
-# CHECK-NEXT:  3      7     1.00                  U     incp	z31.s, p15.s
+# CHECK-NEXT:  3      7     1.00                        incp	z31.d, p15.d
+# CHECK-NEXT:  3      7     1.00                        incp	z31.h, p15.h
+# CHECK-NEXT:  3      7     1.00                        incp	z31.s, p15.s
 # CHECK-NEXT:  1      2     0.50                        incw	x0
 # CHECK-NEXT:  1      2     0.50                        incw	x0, #14
 # CHECK-NEXT:  1      2     0.50                        incw	x0, all, mul #16
@@ -4390,7 +4390,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      5     1.00                        insr	z31.h, wzr
 # CHECK-NEXT:  1      3     1.00                        insr	z31.s, s31
 # CHECK-NEXT:  2      5     1.00                        insr	z31.s, wzr
-# CHECK-NEXT:  1      3     1.00                  U     lasta	b0, p7, z31.b
+# CHECK-NEXT:  1      3     1.00                        lasta	b0, p7, z31.b
 # CHECK-NEXT:  1      3     1.00                        lasta	d0, p7, z31.d
 # CHECK-NEXT:  1      3     1.00                        lasta	h0, p7, z31.h
 # CHECK-NEXT:  1      3     1.00                        lasta	s0, p7, z31.s
@@ -4398,7 +4398,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      5     1.00                        lasta	w0, p7, z31.h
 # CHECK-NEXT:  2      5     1.00                        lasta	w0, p7, z31.s
 # CHECK-NEXT:  2      5     1.00                        lasta	x0, p7, z31.d
-# CHECK-NEXT:  1      3     1.00                  U     lastb	b0, p7, z31.b
+# CHECK-NEXT:  1      3     1.00                        lastb	b0, p7, z31.b
 # CHECK-NEXT:  1      3     1.00                        lastb	d0, p7, z31.d
 # CHECK-NEXT:  1      3     1.00                        lastb	h0, p7, z31.h
 # CHECK-NEXT:  1      3     1.00                        lastb	s0, p7, z31.s
@@ -4408,68 +4408,68 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      5     1.00                        lastb	x0, p7, z31.d
 # CHECK-NEXT:  1      6     0.50    *                   ld1b	{ z0.b }, p0/z, [sp, x0]
 # CHECK-NEXT:  1      6     0.50    *                   ld1b	{ z0.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z0.b }, p0/z, [x0]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1b	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z0.h }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1b	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1b	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1b	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z21.b }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1b	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1b	{ z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z0.b }, p0/z, [x0]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1b	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z0.h }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1b	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  2      9     0.50    *                   ld1b	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1b	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1b	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1b	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      6     0.50    *                   ld1b	{ z21.s }, p5/z, [x10, x21]
 # CHECK-NEXT:  1      6     0.50    *                   ld1b	{ z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z31.b }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1b	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1b	{ z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1b	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1b	{ z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1b	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1b	{ z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1b	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld1b	{ z31.s }, p7/z, [z31.s, #31]
 # CHECK-NEXT:  1      6     0.50    *                   ld1b	{ z5.h }, p3/z, [x17, x16]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z0.d }, p0/z, [x0, z0.d, uxtw #3]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1d	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1d	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT:  1      6     0.33    *                   ld1d	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      6     0.33    *                   ld1d	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      6     0.50    *                   ld1d	{ z23.d }, p3/z, [sp, x8, lsl #3]
 # CHECK-NEXT:  1      6     0.50    *                   ld1d	{ z23.d }, p3/z, [x13, x8, lsl #3]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z23.d }, p3/z, [x13, z8.d, lsl #3]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1d	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1d	{ z31.d }, p7/z, [z31.d, #248]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z0.h }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1h	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1h	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1h	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT:  1      6     0.33    *                   ld1d	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1d	{ z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z0.h }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1h	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  2      9     0.50    *                   ld1h	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1h	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      6     0.50    *                   ld1h	{ z21.s }, p5/z, [x10, x21, lsl #1]
 # CHECK-NEXT:  1      6     0.50    *                   ld1h	{ z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1h	{ z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1h	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      10    1.00    *             U     ld1h	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT:  4      10    1.00    *             U     ld1h	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1h	{ z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1h	{ z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1h	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      10    1.00    *                   ld1h	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT:  4      10    1.00    *                   ld1h	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT:  2      9     0.50    *                   ld1h	{ z31.s }, p7/z, [z31.s, #62]
 # CHECK-NEXT:  1      6     0.50    *                   ld1h	{ z5.h }, p3/z, [sp, x16, lsl #1]
 # CHECK-NEXT:  1      6     0.50    *                   ld1h	{ z5.h }, p3/z, [x17, x16, lsl #1]
 # CHECK-NEXT:  1      6     0.33    *                   ld1rb	{ z0.b }, p0/z, [x0]
@@ -4524,146 +4524,146 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      6     0.33    *                   ld1rw	{ z0.s }, p0/z, [x0]
 # CHECK-NEXT:  1      6     0.33    *                   ld1rw	{ z31.d }, p7/z, [sp, #252]
 # CHECK-NEXT:  1      6     0.33    *                   ld1rw	{ z31.s }, p7/z, [sp, #252]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sb	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sb	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sb	{ z0.h }, p0/z, [sp, x0]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sb	{ z0.h }, p0/z, [x0, x0]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z0.h }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sb	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sb	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sb	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sb	{ z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z0.h }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sb	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sb	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sb	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sb	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sb	{ z21.s }, p5/z, [x10, x21]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sb	{ z23.d }, p3/z, [x13, x8]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sb	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sb	{ z31.d }, p7/z, [z31.d, #31]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sb	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sb	{ z31.s }, p7/z, [z31.s, #31]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sh	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sh	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sh	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sh	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sh	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sh	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sb	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sb	{ z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sb	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sb	{ z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sh	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sh	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sh	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sh	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sh	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sh	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sh	{ z21.s }, p5/z, [sp, x21, lsl #1]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sh	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sh	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sh	{ z21.s }, p5/z, [x10, x21, lsl #1]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sh	{ z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sh	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sh	{ z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sh	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      10    1.00    *             U     ld1sh	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT:  4      10    1.00    *             U     ld1sh	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1sh	{ z31.s }, p7/z, [z31.s, #62]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sw	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sw	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sh	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sh	{ z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sh	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      10    1.00    *                   ld1sh	{ z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT:  4      10    1.00    *                   ld1sh	{ z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT:  2      9     0.50    *                   ld1sh	{ z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sw	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sw	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sw	{ z23.d }, p3/z, [sp, x8, lsl #2]
 # CHECK-NEXT:  1      6     0.50    *                   ld1sw	{ z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1sw	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1sw	{ z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1w	{ z0.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z0.d }, p0/z, [z0.d]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1w	{ z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1w	{ z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1w	{ z0.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1w	{ z0.s }, p0/z, [z0.s]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1w	{ z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT:  1      6     0.33    *                   ld1sw	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1sw	{ z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT:  1      6     0.33    *                   ld1w	{ z0.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z0.d }, p0/z, [z0.d]
+# CHECK-NEXT:  2      9     0.50    *                   ld1w	{ z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT:  2      9     0.50    *                   ld1w	{ z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT:  1      6     0.33    *                   ld1w	{ z0.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld1w	{ z0.s }, p0/z, [z0.s]
+# CHECK-NEXT:  1      6     0.33    *                   ld1w	{ z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z21.d }, p5/z, [x10, z21.d, uxtw]
 # CHECK-NEXT:  1      6     0.50    *                   ld1w	{ z21.s }, p5/z, [sp, x21, lsl #2]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1w	{ z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ld1w	{ z21.s }, p5/z, [x10, #5, mul vl]
 # CHECK-NEXT:  1      6     0.50    *                   ld1w	{ z21.s }, p5/z, [x10, x21, lsl #2]
 # CHECK-NEXT:  1      6     0.50    *                   ld1w	{ z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1w	{ z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT:  4      9     1.00    *             U     ld1w	{ z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT:  1      6     0.33    *             U     ld1w	{ z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT:  4      10    1.00    *             U     ld1w	{ z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT:  4      10    1.00    *             U     ld1w	{ z31.s }, p7/z, [sp, z31.s, uxtw #2]
-# CHECK-NEXT:  2      9     0.50    *             U     ld1w	{ z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2b	{ z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2b	{ z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2b	{ z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2b	{ z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2b	{ z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2d	{ z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2d	{ z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2d	{ z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2d	{ z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2d	{ z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2h	{ z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2h	{ z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2h	{ z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2h	{ z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2h	{ z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2w	{ z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2w	{ z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT:  2      8     0.50    *             U     ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT:  1      6     0.33    *                   ld1w	{ z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT:  4      9     1.00    *                   ld1w	{ z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT:  1      6     0.33    *                   ld1w	{ z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT:  4      10    1.00    *                   ld1w	{ z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT:  4      10    1.00    *                   ld1w	{ z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT:  2      9     0.50    *                   ld1w	{ z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT:  2      9     0.50    *                   ld2b	{ z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  2      8     0.50    *                   ld2b	{ z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT:  2      8     0.50    *                   ld2b	{ z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  2      8     0.50    *                   ld2b	{ z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld2b	{ z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  2      9     0.50    *                   ld2d	{ z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  2      8     0.50    *                   ld2d	{ z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT:  2      8     0.50    *                   ld2d	{ z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  2      8     0.50    *                   ld2d	{ z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld2d	{ z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  2      9     0.50    *                   ld2h	{ z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  2      8     0.50    *                   ld2h	{ z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT:  2      8     0.50    *                   ld2h	{ z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  2      8     0.50    *                   ld2h	{ z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld2h	{ z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  2      9     0.50    *                   ld2w	{ z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  2      8     0.50    *                   ld2w	{ z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT:  2      8     0.50    *                   ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT:  2      8     0.50    *                   ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  3      10    0.67    *                   ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld3b	{ z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *                   ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  3      10    0.67    *                   ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  2      9     0.50    *                   ld3d	{ z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *                   ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  3      10    0.67    *                   ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  2      9     0.50    *                   ld3h	{ z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *                   ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  3      10    0.67    *                   ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  2      9     0.50    *                   ld3w	{ z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *                   ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *                   ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *                   ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  6      10    1.00    *                   ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld4b	{ z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *                   ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  6      10    1.00    *                   ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  4      9     1.00    *                   ld4d	{ z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *                   ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  6      10    1.00    *                   ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  4      9     1.00    *                   ld4h	{ z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *                   ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  6      10    1.00    *                   ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  4      9     1.00    *                   ld4w	{ z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *                   ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *                   ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *                   ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 # CHECK-NEXT:  2      6     0.50    *             U     ldff1b	{ z0.d }, p0/z, [x0, x0]
 # CHECK-NEXT:  4      9     1.00    *             U     ldff1b	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  2      6     0.50    *             U     ldff1b	{ z0.h }, p0/z, [x0, x0]
@@ -4874,9 +4874,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      6     0.50    *                   ldr	p0, [x0]
 # CHECK-NEXT:  2      6     0.50    *                   ldr	p5, [x10, #255, mul vl]
 # CHECK-NEXT:  2      6     0.50    *                   ldr	p7, [x13, #-256, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ldr	z0, [x0]
-# CHECK-NEXT:  1      6     0.33    *             U     ldr	z23, [x13, #255, mul vl]
-# CHECK-NEXT:  1      6     0.33    *             U     ldr	z31, [sp, #-256, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ldr	z0, [x0]
+# CHECK-NEXT:  1      6     0.33    *                   ldr	z23, [x13, #255, mul vl]
+# CHECK-NEXT:  1      6     0.33    *                   ldr	z31, [sp, #-256, mul vl]
 # CHECK-NEXT:  1      2     1.00                        lsl	z0.b, p0/m, z0.b, #0
 # CHECK-NEXT:  1      2     1.00                        lsl	z0.b, p0/m, z0.b, z0.b
 # CHECK-NEXT:  1      2     1.00                        lsl	z0.b, p0/m, z0.b, z1.d
@@ -4903,10 +4903,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        lsl	z31.h, z31.h, #15
 # CHECK-NEXT:  1      2     1.00                        lsl	z31.s, p0/m, z31.s, #31
 # CHECK-NEXT:  1      2     1.00                        lsl	z31.s, z31.s, #31
-# CHECK-NEXT:  1      2     1.00                  U     lslr	z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT:  1      2     1.00                  U     lslr	z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT:  1      2     1.00                  U     lslr	z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT:  1      2     1.00                  U     lslr	z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT:  1      2     1.00                        lslr	z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT:  1      2     1.00                        lslr	z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT:  1      2     1.00                        lslr	z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT:  1      2     1.00                        lslr	z0.s, p0/m, z0.s, z0.s
 # CHECK-NEXT:  1      2     1.00                        lsr	z0.b, p0/m, z0.b, #1
 # CHECK-NEXT:  1      2     1.00                        lsr	z0.b, p0/m, z0.b, z0.b
 # CHECK-NEXT:  1      2     1.00                        lsr	z0.b, p0/m, z0.b, z1.d
@@ -4933,10 +4933,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        lsr	z31.h, z31.h, #16
 # CHECK-NEXT:  1      2     1.00                        lsr	z31.s, p0/m, z31.s, #32
 # CHECK-NEXT:  1      2     1.00                        lsr	z31.s, z31.s, #32
-# CHECK-NEXT:  1      2     1.00                  U     lsrr	z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT:  1      2     1.00                  U     lsrr	z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT:  1      2     1.00                  U     lsrr	z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT:  1      2     1.00                  U     lsrr	z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT:  1      2     1.00                        lsrr	z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT:  1      2     1.00                        lsrr	z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT:  1      2     1.00                        lsrr	z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT:  1      2     1.00                        lsrr	z0.s, p0/m, z0.s, z0.s
 # CHECK-NEXT:  1      4     1.00                        mad	z0.b, p7/m, z1.b, z31.b
 # CHECK-NEXT:  2      5     2.00                        mad	z0.d, p7/m, z1.d, z31.d
 # CHECK-NEXT:  1      4     1.00                        mad	z0.h, p7/m, z1.h, z31.h
@@ -4967,7 +4967,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     1.00                        mov	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      2     0.50                        mov	z0.b, #127
 # CHECK-NEXT:  1      2     0.50                        mov	z0.b, b0
-# CHECK-NEXT:  1      2     0.50                  U     mov	z0.b, p0/m, b0
+# CHECK-NEXT:  1      2     0.50                        mov	z0.b, p0/m, b0
 # CHECK-NEXT:  2      5     1.00                        mov	z0.b, p0/m, w0
 # CHECK-NEXT:  1      2     0.50                        mov	z0.b, p0/z, #127
 # CHECK-NEXT:  1      3     3.00                        mov	z0.b, w0
@@ -5029,14 +5029,14 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        mov	z21.s, p15/m, #-128
 # CHECK-NEXT:  1      2     0.50                        mov	z21.s, p15/m, #-32768
 # CHECK-NEXT:  1      2     0.50                        mov	z31.b, p15/m, z31.b
-# CHECK-NEXT:  1      2     0.50                  U     mov	z31.b, p7/m, b31
+# CHECK-NEXT:  1      2     0.50                        mov	z31.b, p7/m, b31
 # CHECK-NEXT:  1      2     0.50                        movprfx	z31, z6
 # CHECK-NEXT:  2      5     1.00                        mov	z31.b, p7/m, wsp
 # CHECK-NEXT:  1      3     3.00                        mov	z31.b, wsp
 # CHECK-NEXT:  1      2     0.50                        mov	z31.b, z31.b[63]
 # CHECK-NEXT:  1      2     0.50                        mov	z31.d, p15/m, z31.d
 # CHECK-NEXT:  1      2     0.50                        mov	z31.d, p7/m, d31
-# CHECK-NEXT:  1      2     0.50                  U     movprfx	z31.d, p7/z, z6.d
+# CHECK-NEXT:  1      2     0.50                        movprfx	z31.d, p7/z, z6.d
 # CHECK-NEXT:  2      5     1.00                        mov	z31.d, p7/m, sp
 # CHECK-NEXT:  1      3     3.00                        mov	z31.d, sp
 # CHECK-NEXT:  1      2     0.50                        mov	z31.d, z0.d
@@ -5062,10 +5062,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        mov	z5.h, #-6
 # CHECK-NEXT:  1      2     0.50                        mov	z5.q, z17.q[3]
 # CHECK-NEXT:  1      2     0.50                        mov	z5.s, #-6
-# CHECK-NEXT:  2      2     1.00                  U     movs	p0.b, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     movs	p0.b, p0/z, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     movs	p15.b, p15.b
-# CHECK-NEXT:  2      2     1.00                  U     movs	p15.b, p15/z, p15.b
+# CHECK-NEXT:  2      2     1.00                        movs	p0.b, p0.b
+# CHECK-NEXT:  2      2     1.00                        movs	p0.b, p0/z, p0.b
+# CHECK-NEXT:  2      2     1.00                        movs	p15.b, p15.b
+# CHECK-NEXT:  2      2     1.00                        movs	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      1     0.10                  U     mrs	x3, ID_AA64ZFR0_EL1
 # CHECK-NEXT:  1      1     0.10                  U     mrs	x3, ZCR_EL1
 # CHECK-NEXT:  1      1     0.10                  U     mrs	x3, ZCR_EL12
@@ -5100,8 +5100,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        mul	z31.s, z31.s, #127
 # CHECK-NEXT:  1      1     1.00                        nand	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      1     1.00                        nand	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  2      2     1.00                  U     nands	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     nands	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      2     1.00                        nands	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  2      2     1.00                        nands	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      2     0.50                        nbsl	z0.d, z0.d, z1.d, z2.d
 # CHECK-NEXT:  1      2     0.50                        neg	z0.b, p0/m, z0.b
 # CHECK-NEXT:  1      2     0.50                        neg	z0.d, p0/m, z0.d
@@ -5117,20 +5117,20 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      2     1.00                        nmatch	p15.h, p7/z, z30.h, z31.h
 # CHECK-NEXT:  1      1     1.00                        nor	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      1     1.00                        nor	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  2      2     1.00                  U     nors	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     nors	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      2     1.00                        nors	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  2      2     1.00                        nors	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      1     1.00                        not	p0.b, p0/z, p0.b
 # CHECK-NEXT:  1      1     1.00                        not	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      2     0.50                        not	z31.b, p7/m, z31.b
 # CHECK-NEXT:  1      2     0.50                        not	z31.d, p7/m, z31.d
 # CHECK-NEXT:  1      2     0.50                        not	z31.h, p7/m, z31.h
 # CHECK-NEXT:  1      2     0.50                        not	z31.s, p7/m, z31.s
-# CHECK-NEXT:  2      2     1.00                  U     nots	p0.b, p0/z, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     nots	p15.b, p15/z, p15.b
+# CHECK-NEXT:  2      2     1.00                        nots	p0.b, p0/z, p0.b
+# CHECK-NEXT:  2      2     1.00                        nots	p15.b, p15/z, p15.b
 # CHECK-NEXT:  1      1     1.00                        orn	p0.b, p0/z, p0.b, p0.b
 # CHECK-NEXT:  1      1     1.00                        orn	p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT:  2      2     1.00                  U     orns	p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT:  2      2     1.00                  U     orns	p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT:  2      2     1.00                        orns	p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT:  2      2     1.00                        orns	p15.b, p15/z, p15.b, p15.b
 # CHECK-NEXT:  1      1     1.00                        orr	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  1      2     0.50                        orr	z0.d, z0.d, #0x6
 # CHECK-NEXT:  1      2     0.50                        orr	z0.d, z0.d, #0xfffffffffffffff9
@@ -5145,7 +5145,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        orr	z31.s, p7/m, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        orr	z5.b, z5.b, #0x6
 # CHECK-NEXT:  1      2     0.50                        orr	z5.b, z5.b, #0xf9
-# CHECK-NEXT:  2      2     1.00                  U     orrs	p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT:  2      2     1.00                        orrs	p0.b, p0/z, p0.b, p1.b
 # CHECK-NEXT:  2      6     1.00                        orv	b0, p7, z31.b
 # CHECK-NEXT:  2      6     1.00                        orv	d0, p7, z31.d
 # CHECK-NEXT:  2      6     1.00                        orv	h0, p7, z31.h
@@ -5170,8 +5170,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfb	#15, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfb	#6, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfb	#7, p0, [x0]
-# CHECK-NEXT:  1      4     0.33    *      *            prfb	#7, p3, [z13.s, #31]
-# CHECK-NEXT:  1      4     0.33    *      *            prfb	#7, p3, [z13.s]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfb	#7, p3, [z13.s, #31]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfb	#7, p3, [z13.s]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfb	pldl1keep, p0, [x0, z0.d, uxtw]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfb	pldl1keep, p0, [x0, z0.d]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfb	pldl1keep, p0, [x0, z0.s, uxtw]
@@ -5197,8 +5197,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#15, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#15, p7, [z31.d, #248]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#15, p7, [z31.d]
-# CHECK-NEXT:  1      4     0.33    *      *            prfd	#15, p7, [z31.s, #248]
-# CHECK-NEXT:  1      4     0.33    *      *            prfd	#15, p7, [z31.s]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#15, p7, [z31.s, #248]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#15, p7, [z31.s]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#6, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfd	#7, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfd	pldl1keep, p0, [x0, z0.d, lsl #3]
@@ -5224,8 +5224,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#15, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#15, p7, [z31.d, #62]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#15, p7, [z31.d]
-# CHECK-NEXT:  1      4     0.33    *      *            prfh	#15, p7, [z31.s, #62]
-# CHECK-NEXT:  1      4     0.33    *      *            prfh	#15, p7, [z31.s]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#15, p7, [z31.s, #62]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#15, p7, [z31.s]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#6, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfh	#7, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfh	pldl1keep, p0, [x0, z0.d, lsl #1]
@@ -5251,8 +5251,8 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#15, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#15, p7, [z31.d, #124]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#15, p7, [z31.d]
-# CHECK-NEXT:  1      4     0.33    *      *            prfw	#15, p7, [z31.s, #124]
-# CHECK-NEXT:  1      4     0.33    *      *            prfw	#15, p7, [z31.s]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#15, p7, [z31.s, #124]
+# CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#15, p7, [z31.s]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#6, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#7, p0, [x0]
 # CHECK-NEXT:  1      4     0.33    *      *      U     prfw	#7, p3, [x13, z8.d, uxtw #2]
@@ -5315,45 +5315,45 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        ptrue	p7.s, vl64
 # CHECK-NEXT:  1      2     0.50                        ptrue	p7.s, vl7
 # CHECK-NEXT:  1      2     0.50                        ptrue	p7.s, vl8
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p0.b, pow2
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p0.d, pow2
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p0.h, pow2
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p0.s, pow2
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p15.b
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p15.d
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p15.h
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p15.s
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #14
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #15
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #16
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #17
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #18
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #19
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #20
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #21
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #22
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #23
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #24
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #25
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #26
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #27
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, #28
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, mul3
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, mul4
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl1
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl128
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl16
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl2
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl256
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl3
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl32
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl4
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl5
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl6
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl64
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl7
-# CHECK-NEXT:  1      3     0.50                  U     ptrues	p7.s, vl8
+# CHECK-NEXT:  1      3     0.50                        ptrues	p0.b, pow2
+# CHECK-NEXT:  1      3     0.50                        ptrues	p0.d, pow2
+# CHECK-NEXT:  1      3     0.50                        ptrues	p0.h, pow2
+# CHECK-NEXT:  1      3     0.50                        ptrues	p0.s, pow2
+# CHECK-NEXT:  1      3     0.50                        ptrues	p15.b
+# CHECK-NEXT:  1      3     0.50                        ptrues	p15.d
+# CHECK-NEXT:  1      3     0.50                        ptrues	p15.h
+# CHECK-NEXT:  1      3     0.50                        ptrues	p15.s
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #14
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #15
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #16
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #17
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #18
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #19
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #20
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #21
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #22
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #23
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #24
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #25
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #26
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #27
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, #28
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, mul3
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, mul4
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl1
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl128
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl16
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl2
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl256
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl3
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl32
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl4
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl5
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl6
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl64
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl7
+# CHECK-NEXT:  1      3     0.50                        ptrues	p7.s, vl8
 # CHECK-NEXT:  1      2     0.50                        punpkhi	p0.h, p0.b
 # CHECK-NEXT:  1      2     0.50                        punpkhi	p15.h, p15.b
 # CHECK-NEXT:  1      2     0.50                        punpklo	p0.h, p0.b
@@ -5585,10 +5585,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        smullt	z0.s, z1.h, z7.h[7]
 # CHECK-NEXT:  1      4     1.00                        smullt	z29.s, z30.h, z31.h
 # CHECK-NEXT:  1      4     1.00                        smullt	z31.d, z31.s, z31.s
-# CHECK-NEXT:  1      3     1.00                  U     splice	z29.b, p7, { z30.b, z31.b }
-# CHECK-NEXT:  1      3     1.00                  U     splice	z29.d, p7, { z30.d, z31.d }
-# CHECK-NEXT:  1      3     1.00                  U     splice	z29.h, p7, { z30.h, z31.h }
-# CHECK-NEXT:  1      3     1.00                  U     splice	z29.s, p7, { z30.s, z31.s }
+# CHECK-NEXT:  1      3     1.00                        splice	z29.b, p7, { z30.b, z31.b }
+# CHECK-NEXT:  1      3     1.00                        splice	z29.d, p7, { z30.d, z31.d }
+# CHECK-NEXT:  1      3     1.00                        splice	z29.h, p7, { z30.h, z31.h }
+# CHECK-NEXT:  1      3     1.00                        splice	z29.s, p7, { z30.s, z31.s }
 # CHECK-NEXT:  1      3     1.00                        splice	z31.b, p7, z31.b, z31.b
 # CHECK-NEXT:  1      3     1.00                        splice	z31.d, p7, z31.d, z31.d
 # CHECK-NEXT:  1      3     1.00                        splice	z31.h, p7, z31.h, z31.h
@@ -5629,19 +5629,19 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqdecb	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqdecb	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqdecb	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqdecb	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqdecb	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqdecb	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqdecb	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdecb	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqdecb	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdecb	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqdecb	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqdecd	x0
 # CHECK-NEXT:  1      2     0.50                        sqdecd	x0, #14
 # CHECK-NEXT:  1      2     0.50                        sqdecd	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqdecd	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqdecd	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqdecd	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqdecd	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqdecd	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqdecd	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdecd	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqdecd	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdecd	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqdecd	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecd	z0.d
 # CHECK-NEXT:  1      2     1.00                        sqdecd	z0.d, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecd	z0.d, pow2
@@ -5651,10 +5651,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqdech	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqdech	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqdech	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqdech	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqdech	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqdech	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqdech	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdech	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqdech	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdech	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqdech	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdech	z0.h
 # CHECK-NEXT:  1      2     1.00                        sqdech	z0.h, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdech	z0.h, pow2
@@ -5663,10 +5663,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqdecp	x0, p0.d
 # CHECK-NEXT:  1      2     0.50                        sqdecp	x0, p0.h
 # CHECK-NEXT:  1      2     0.50                        sqdecp	x0, p0.s
-# CHECK-NEXT:  1      2     0.50                  U     sqdecp	xzr, p15.b, wzr
-# CHECK-NEXT:  1      2     0.50                  U     sqdecp	xzr, p15.d, wzr
-# CHECK-NEXT:  1      2     0.50                  U     sqdecp	xzr, p15.h, wzr
-# CHECK-NEXT:  1      2     0.50                  U     sqdecp	xzr, p15.s, wzr
+# CHECK-NEXT:  1      2     0.50                        sqdecp	xzr, p15.b, wzr
+# CHECK-NEXT:  1      2     0.50                        sqdecp	xzr, p15.d, wzr
+# CHECK-NEXT:  1      2     0.50                        sqdecp	xzr, p15.h, wzr
+# CHECK-NEXT:  1      2     0.50                        sqdecp	xzr, p15.s, wzr
 # CHECK-NEXT:  3      7     1.00                        sqdecp	z0.d, p0.d
 # CHECK-NEXT:  3      7     1.00                        sqdecp	z0.h, p0.h
 # CHECK-NEXT:  3      7     1.00                        sqdecp	z0.s, p0.s
@@ -5675,10 +5675,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqdecw	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqdecw	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqdecw	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqdecw	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqdecw	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqdecw	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqdecw	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdecw	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqdecw	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqdecw	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqdecw	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecw	z0.s
 # CHECK-NEXT:  1      2     1.00                        sqdecw	z0.s, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqdecw	z0.s, pow2
@@ -5731,19 +5731,19 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqincb	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqincb	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqincb	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqincb	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqincb	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqincb	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqincb	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqincb	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqincb	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqincb	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqincb	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqincd	x0
 # CHECK-NEXT:  1      2     0.50                        sqincd	x0, #14
 # CHECK-NEXT:  1      2     0.50                        sqincd	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqincd	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqincd	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqincd	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqincd	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqincd	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqincd	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqincd	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqincd	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqincd	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqincd	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincd	z0.d
 # CHECK-NEXT:  1      2     1.00                        sqincd	z0.d, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincd	z0.d, pow2
@@ -5753,10 +5753,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqinch	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqinch	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqinch	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqinch	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqinch	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqinch	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqinch	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqinch	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqinch	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqinch	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqinch	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqinch	z0.h
 # CHECK-NEXT:  1      2     1.00                        sqinch	z0.h, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqinch	z0.h, pow2
@@ -5765,10 +5765,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqincp	x0, p0.d
 # CHECK-NEXT:  1      2     0.50                        sqincp	x0, p0.h
 # CHECK-NEXT:  1      2     0.50                        sqincp	x0, p0.s
-# CHECK-NEXT:  1      2     0.50                  U     sqincp	xzr, p15.b, wzr
-# CHECK-NEXT:  1      2     0.50                  U     sqincp	xzr, p15.d, wzr
-# CHECK-NEXT:  1      2     0.50                  U     sqincp	xzr, p15.h, wzr
-# CHECK-NEXT:  1      2     0.50                  U     sqincp	xzr, p15.s, wzr
+# CHECK-NEXT:  1      2     0.50                        sqincp	xzr, p15.b, wzr
+# CHECK-NEXT:  1      2     0.50                        sqincp	xzr, p15.d, wzr
+# CHECK-NEXT:  1      2     0.50                        sqincp	xzr, p15.h, wzr
+# CHECK-NEXT:  1      2     0.50                        sqincp	xzr, p15.s, wzr
 # CHECK-NEXT:  3      7     1.00                        sqincp	z0.d, p0.d
 # CHECK-NEXT:  3      7     1.00                        sqincp	z0.h, p0.h
 # CHECK-NEXT:  3      7     1.00                        sqincp	z0.s, p0.s
@@ -5777,10 +5777,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sqincw	x0, all, mul #16
 # CHECK-NEXT:  1      2     0.50                        sqincw	x0, pow2
 # CHECK-NEXT:  1      2     0.50                        sqincw	x0, vl1
-# CHECK-NEXT:  1      2     0.50                  U     sqincw	x0, w0
-# CHECK-NEXT:  1      2     0.50                  U     sqincw	x0, w0, all, mul #16
-# CHECK-NEXT:  1      2     0.50                  U     sqincw	x0, w0, pow2
-# CHECK-NEXT:  1      2     0.50                  U     sqincw	x0, w0, pow2, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqincw	x0, w0
+# CHECK-NEXT:  1      2     0.50                        sqincw	x0, w0, all, mul #16
+# CHECK-NEXT:  1      2     0.50                        sqincw	x0, w0, pow2
+# CHECK-NEXT:  1      2     0.50                        sqincw	x0, w0, pow2, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincw	z0.s
 # CHECK-NEXT:  1      2     1.00                        sqincw	z0.s, all, mul #16
 # CHECK-NEXT:  1      2     1.00                        sqincw	z0.s, pow2
@@ -5834,10 +5834,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        sqrshl	z0.h, p0/m, z0.h, z1.h
 # CHECK-NEXT:  1      4     1.00                        sqrshl	z29.s, p7/m, z29.s, z30.s
 # CHECK-NEXT:  1      4     1.00                        sqrshl	z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT:  1      4     1.00                  U     sqrshlr	z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT:  1      4     1.00                  U     sqrshlr	z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT:  1      4     1.00                  U     sqrshlr	z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT:  1      4     1.00                  U     sqrshlr	z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT:  1      4     1.00                        sqrshlr	z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT:  1      4     1.00                        sqrshlr	z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT:  1      4     1.00                        sqrshlr	z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT:  1      4     1.00                        sqrshlr	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        sqrshrnb	z0.b, z0.h, #1
 # CHECK-NEXT:  1      4     1.00                        sqrshrnb	z0.h, z0.s, #1
 # CHECK-NEXT:  1      4     1.00                        sqrshrnb	z0.s, z0.d, #1
@@ -5874,10 +5874,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        sqshl	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        sqshl	z31.h, p0/m, z31.h, #15
 # CHECK-NEXT:  1      4     1.00                        sqshl	z31.s, p0/m, z31.s, #31
-# CHECK-NEXT:  1      4     1.00                  U     sqshlr	z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT:  1      4     1.00                  U     sqshlr	z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT:  1      4     1.00                  U     sqshlr	z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT:  1      4     1.00                  U     sqshlr	z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT:  1      4     1.00                        sqshlr	z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT:  1      4     1.00                        sqshlr	z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT:  1      4     1.00                        sqshlr	z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT:  1      4     1.00                        sqshlr	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        sqshlu	z0.b, p0/m, z0.b, #0
 # CHECK-NEXT:  1      4     1.00                        sqshlu	z0.d, p0/m, z0.d, #0
 # CHECK-NEXT:  1      4     1.00                        sqshlu	z0.h, p0/m, z0.h, #0
@@ -5961,10 +5961,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        srshl	z0.h, p0/m, z0.h, z1.h
 # CHECK-NEXT:  1      4     1.00                        srshl	z29.s, p7/m, z29.s, z30.s
 # CHECK-NEXT:  1      4     1.00                        srshl	z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT:  1      4     1.00                  U     srshlr	z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT:  1      4     1.00                  U     srshlr	z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT:  1      4     1.00                  U     srshlr	z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT:  1      4     1.00                  U     srshlr	z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT:  1      4     1.00                        srshlr	z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT:  1      4     1.00                        srshlr	z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT:  1      4     1.00                        srshlr	z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT:  1      4     1.00                        srshlr	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        srshr	z0.b, p0/m, z0.b, #1
 # CHECK-NEXT:  1      4     1.00                        srshr	z0.d, p0/m, z0.d, #1
 # CHECK-NEXT:  1      4     1.00                        srshr	z0.h, p0/m, z0.h, #1
@@ -6104,66 +6104,66 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      2     0.50           *            st1w	{ z31.d }, p7, [z31.d, #124]
 # CHECK-NEXT:  2      2     0.50           *            st1w	{ z31.s }, p7, [sp, #-1, mul vl]
 # CHECK-NEXT:  4      4     1.00           *            st1w	{ z31.s }, p7, [z31.s, #124]
-# CHECK-NEXT:  2      4     0.50           *      U     st2b	{ z0.b, z1.b }, p0, [x0, x0]
-# CHECK-NEXT:  2      4     0.50           *      U     st2b	{ z0.b, z1.b }, p0, [x0]
-# CHECK-NEXT:  2      4     0.50           *      U     st2b	{ z21.b, z22.b }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2b	{ z23.b, z24.b }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2b	{ z5.b, z6.b }, p3, [x17, x16]
-# CHECK-NEXT:  2      4     0.50           *      U     st2d	{ z0.d, z1.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  2      4     0.50           *      U     st2d	{ z0.d, z1.d }, p0, [x0]
-# CHECK-NEXT:  2      4     0.50           *      U     st2d	{ z21.d, z22.d }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2d	{ z23.d, z24.d }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2d	{ z5.d, z6.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  3      4     0.50           *      U     st2h	{ z0.h, z1.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  2      4     0.50           *      U     st2h	{ z0.h, z1.h }, p0, [x0]
-# CHECK-NEXT:  2      4     0.50           *      U     st2h	{ z21.h, z22.h }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2h	{ z23.h, z24.h }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  3      4     0.50           *      U     st2h	{ z5.h, z6.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z0.s, z1.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z0.s, z1.s }, p0, [x0]
-# CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  15     7     2.50           *      U     st3b	{ z0.b - z2.b }, p0, [x0, x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z0.b - z2.b }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3b	{ z5.b - z7.b }, p3, [x17, x16]
-# CHECK-NEXT:  15     7     2.50           *      U     st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z0.d - z2.d }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  15     7     2.50           *      U     st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z0.h - z2.h }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  15     7     2.50           *      U     st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z0.s - z2.s }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  27     11    4.50           *      U     st4b	{ z0.b - z3.b }, p0, [x0, x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z0.b - z3.b }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4b	{ z5.b - z8.b }, p3, [x17, x16]
-# CHECK-NEXT:  27     11    4.50           *      U     st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z0.d - z3.d }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  27     11    4.50           *      U     st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z0.h - z3.h }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  27     11    4.50           *      U     st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z0.s - z3.s }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  2      4     0.50           *            st2b	{ z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT:  2      4     0.50           *            st2b	{ z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT:  2      4     0.50           *            st2b	{ z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2b	{ z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2b	{ z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT:  2      4     0.50           *            st2d	{ z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  2      4     0.50           *            st2d	{ z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT:  2      4     0.50           *            st2d	{ z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2d	{ z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2d	{ z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  3      4     0.50           *            st2h	{ z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  2      4     0.50           *            st2h	{ z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT:  2      4     0.50           *            st2h	{ z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2h	{ z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  3      4     0.50           *            st2h	{ z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  2      4     0.50           *            st2w	{ z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  2      4     0.50           *            st2w	{ z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT:  2      4     0.50           *            st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT:  2      4     0.50           *            st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  15     7     2.50           *            st3b	{ z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT:  10     7     2.50           *            st3b	{ z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *            st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *            st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *            st3b	{ z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT:  15     7     2.50           *            st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  10     7     2.50           *            st3d	{ z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *            st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *            st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *            st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  15     7     2.50           *            st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  10     7     2.50           *            st3h	{ z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *            st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *            st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *            st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  15     7     2.50           *            st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  10     7     2.50           *            st3w	{ z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *            st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *            st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *            st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  27     11    4.50           *            st4b	{ z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT:  18     11    4.50           *            st4b	{ z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *            st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *            st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *            st4b	{ z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT:  27     11    4.50           *            st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  18     11    4.50           *            st4d	{ z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *            st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *            st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *            st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  27     11    4.50           *            st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  18     11    4.50           *            st4h	{ z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *            st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *            st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *            st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  27     11    4.50           *            st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  18     11    4.50           *            st4w	{ z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *            st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *            st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *            st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
 # CHECK-NEXT:  2      2     0.50           *            stnt1b	{ z0.b }, p0, [x0, x0]
 # CHECK-NEXT:  2      2     0.50           *            stnt1b	{ z0.b }, p0, [x0]
 # CHECK-NEXT:  2      2     0.50           *            stnt1b	{ z0.d }, p0, [z1.d]
@@ -6204,9 +6204,9 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      1     0.50           *            str	p0, [x0]
 # CHECK-NEXT:  1      1     0.50           *            str	p15, [sp, #-256, mul vl]
 # CHECK-NEXT:  1      1     0.50           *            str	p5, [x10, #255, mul vl]
-# CHECK-NEXT:  2      2     0.50           *      U     str	z0, [x0]
-# CHECK-NEXT:  2      2     0.50           *      U     str	z21, [x10, #-256, mul vl]
-# CHECK-NEXT:  2      2     0.50           *      U     str	z31, [sp, #255, mul vl]
+# CHECK-NEXT:  2      2     0.50           *            str	z0, [x0]
+# CHECK-NEXT:  2      2     0.50           *            str	z21, [x10, #-256, mul vl]
+# CHECK-NEXT:  2      2     0.50           *            str	z31, [sp, #255, mul vl]
 # CHECK-NEXT:  1      2     0.50                        sub	z0.b, p0/m, z0.b, z0.b
 # CHECK-NEXT:  1      2     0.50                        sub	z0.b, z0.b, #0
 # CHECK-NEXT:  1      2     0.50                        sub	z0.b, z0.b, z0.b
@@ -6293,10 +6293,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     1.00                        sxth	z31.s, p7/m, z31.s
 # CHECK-NEXT:  1      2     1.00                        sxtw	z0.d, p0/m, z0.d
 # CHECK-NEXT:  1      2     1.00                        sxtw	z31.d, p7/m, z31.d
-# CHECK-NEXT:  1      2     0.50                  U     tbl	z28.b, { z29.b, z30.b }, z31.b
-# CHECK-NEXT:  1      2     0.50                  U     tbl	z28.d, { z29.d, z30.d }, z31.d
-# CHECK-NEXT:  1      2     0.50                  U     tbl	z28.h, { z29.h, z30.h }, z31.h
-# CHECK-NEXT:  1      2     0.50                  U     tbl	z28.s, { z29.s, z30.s }, z31.s
+# CHECK-NEXT:  1      2     0.50                        tbl	z28.b, { z29.b, z30.b }, z31.b
+# CHECK-NEXT:  1      2     0.50                        tbl	z28.d, { z29.d, z30.d }, z31.d
+# CHECK-NEXT:  1      2     0.50                        tbl	z28.h, { z29.h, z30.h }, z31.h
+# CHECK-NEXT:  1      2     0.50                        tbl	z28.s, { z29.s, z30.s }, z31.s
 # CHECK-NEXT:  1      2     0.50                        tbl	z31.b, { z31.b }, z31.b
 # CHECK-NEXT:  1      2     0.50                        tbl	z31.d, { z31.d }, z31.d
 # CHECK-NEXT:  1      2     0.50                        tbl	z31.h, { z31.h }, z31.h
@@ -6595,10 +6595,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        uqrshl	z0.h, p0/m, z0.h, z1.h
 # CHECK-NEXT:  1      4     1.00                        uqrshl	z29.s, p7/m, z29.s, z30.s
 # CHECK-NEXT:  1      4     1.00                        uqrshl	z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT:  1      4     1.00                  U     uqrshlr	z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT:  1      4     1.00                  U     uqrshlr	z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT:  1      4     1.00                  U     uqrshlr	z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT:  1      4     1.00                  U     uqrshlr	z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT:  1      4     1.00                        uqrshlr	z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT:  1      4     1.00                        uqrshlr	z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT:  1      4     1.00                        uqrshlr	z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT:  1      4     1.00                        uqrshlr	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        uqrshrnb	z0.b, z0.h, #1
 # CHECK-NEXT:  1      4     1.00                        uqrshrnb	z0.h, z0.s, #1
 # CHECK-NEXT:  1      4     1.00                        uqrshrnb	z0.s, z0.d, #1
@@ -6623,10 +6623,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        uqshl	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        uqshl	z31.h, p0/m, z31.h, #15
 # CHECK-NEXT:  1      4     1.00                        uqshl	z31.s, p0/m, z31.s, #31
-# CHECK-NEXT:  1      4     1.00                  U     uqshlr	z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT:  1      4     1.00                  U     uqshlr	z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT:  1      4     1.00                  U     uqshlr	z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT:  1      4     1.00                  U     uqshlr	z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT:  1      4     1.00                        uqshlr	z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT:  1      4     1.00                        uqshlr	z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT:  1      4     1.00                        uqshlr	z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT:  1      4     1.00                        uqshlr	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        uqshrnb	z0.b, z0.h, #1
 # CHECK-NEXT:  1      4     1.00                        uqshrnb	z0.h, z0.s, #1
 # CHECK-NEXT:  1      4     1.00                        uqshrnb	z0.s, z0.d, #1
@@ -6677,10 +6677,10 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      4     1.00                        urshl	z0.h, p0/m, z0.h, z1.h
 # CHECK-NEXT:  1      4     1.00                        urshl	z29.s, p7/m, z29.s, z30.s
 # CHECK-NEXT:  1      4     1.00                        urshl	z31.d, p7/m, z31.d, z30.d
-# CHECK-NEXT:  1      4     1.00                  U     urshlr	z0.b, p0/m, z0.b, z1.b
-# CHECK-NEXT:  1      4     1.00                  U     urshlr	z0.h, p0/m, z0.h, z1.h
-# CHECK-NEXT:  1      4     1.00                  U     urshlr	z29.s, p7/m, z29.s, z30.s
-# CHECK-NEXT:  1      4     1.00                  U     urshlr	z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT:  1      4     1.00                        urshlr	z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT:  1      4     1.00                        urshlr	z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT:  1      4     1.00                        urshlr	z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT:  1      4     1.00                        urshlr	z31.d, p7/m, z31.d, z30.d
 # CHECK-NEXT:  1      4     1.00                        urshr	z0.b, p0/m, z0.b, #1
 # CHECK-NEXT:  1      4     1.00                        urshr	z0.d, p0/m, z0.d, #1
 # CHECK-NEXT:  1      4     1.00                        urshr	z0.h, p0/m, z0.h, #1
@@ -6779,14 +6779,14 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      3     0.50                        whilege	p15.h, x0, xzr
 # CHECK-NEXT:  1      3     0.50                        whilege	p15.s, w0, wzr
 # CHECK-NEXT:  1      3     0.50                        whilege	p15.s, x0, xzr
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilerw	p15.b, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilerw	p15.d, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilerw	p15.h, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilerw	p15.s, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilewr	p15.b, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilewr	p15.d, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilewr	p15.h, x30, x30
-# CHECK-NEXT:  1      3     0.50    *      *      U     whilewr	p15.s, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilerw	p15.b, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilerw	p15.d, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilerw	p15.h, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilerw	p15.s, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilewr	p15.b, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilewr	p15.d, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilewr	p15.h, x30, x30
+# CHECK-NEXT:  1      3     0.50                        whilewr	p15.s, x30, x30
 # CHECK-NEXT:  1      2     2.00           *      U     wrffr	p0.b
 # CHECK-NEXT:  1      2     2.00           *      U     wrffr	p15.b
 # CHECK-NEXT:  1      2     1.00                        xar	z0.b, z0.b, z1.b, #1


        


More information about the llvm-commits mailing list