[llvm] 60e2aad - [AArch64]Change printVectorList to print SVE vector range

Caroline Concatto via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 14 11:00:41 PDT 2022


Author: Caroline Concatto
Date: 2022-10-14T18:59:56+01:00
New Revision: 60e2aad109fc793de831de4a00116a3616e0e543

URL: https://github.com/llvm/llvm-project/commit/60e2aad109fc793de831de4a00116a3616e0e543
DIFF: https://github.com/llvm/llvm-project/commit/60e2aad109fc793de831de4a00116a3616e0e543.diff

LOG: [AArch64]Change printVectorList to print SVE vector range

This patch has the prefered disassembly changed for SVE vector list.
For instance, instead of printing this assembly:
  ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
it will print this:
  ld4d { z1.d-z4.d }, p0/z, [x0]

Differential Revision: https://reviews.llvm.org/D135952

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
    llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sve-ldN.mir
    llvm/test/CodeGen/AArch64/sve-stN.mir
    llvm/test/MC/AArch64/SVE/ld3b.s
    llvm/test/MC/AArch64/SVE/ld3d.s
    llvm/test/MC/AArch64/SVE/ld3h.s
    llvm/test/MC/AArch64/SVE/ld3w.s
    llvm/test/MC/AArch64/SVE/ld4b.s
    llvm/test/MC/AArch64/SVE/ld4d.s
    llvm/test/MC/AArch64/SVE/ld4h.s
    llvm/test/MC/AArch64/SVE/ld4w.s
    llvm/test/MC/AArch64/SVE/st3b.s
    llvm/test/MC/AArch64/SVE/st3d.s
    llvm/test/MC/AArch64/SVE/st3h.s
    llvm/test/MC/AArch64/SVE/st3w.s
    llvm/test/MC/AArch64/SVE/st4b.s
    llvm/test/MC/AArch64/SVE/st4d.s
    llvm/test/MC/AArch64/SVE/st4h.s
    llvm/test/MC/AArch64/SVE/st4w.s
    llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
    llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ee142a266e32d..c418ca0c02243 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1466,17 +1466,31 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
     Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC);
   }
 
-  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
-    if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
-      printRegName(O, Reg);
-    else
-      printRegName(O, Reg, AArch64::vreg);
+  if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg) && NumRegs > 1 &&
+      // Do not print the range when the last register is lower than the first.
+      // Because it is a wrap-around register.
+      Reg < getNextVectorRegister(Reg, NumRegs - 1)) {
+    printRegName(O, Reg);
     O << LayoutSuffix;
-
-    if (i + 1 != NumRegs)
-      O << ", ";
+    if (NumRegs > 1) {
+      // Set of two sve registers should be separated by ','
+      StringRef split_char = NumRegs == 2 ? ", " : " - ";
+      O << split_char;
+      printRegName(O, (getNextVectorRegister(Reg, NumRegs - 1)));
+      O << LayoutSuffix;
+    }
+  } else {
+    for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
+      // wrap-around sve register
+      if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
+        printRegName(O, Reg);
+      else
+        printRegName(O, Reg, AArch64::vreg);
+      O << LayoutSuffix;
+      if (i + 1 != NumRegs)
+        O << ", ";
+    }
   }
-
   O << " }";
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
index be3b844efb0e7..554f9b986b23d 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -13,8 +13,8 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind {
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    fmov s0, #1.00000000
-; CHECK-NEXT:    ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
-; CHECK-NEXT:    ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
+; CHECK-NEXT:    ld4d { z1.d - z4.d }, p0/z, [x0]
+; CHECK-NEXT:    ld4d { z16.d - z19.d }, p0/z, [x1]
 ; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x2]
 ; CHECK-NEXT:    mov x0, sp
 ; CHECK-NEXT:    ptrue p0.d
@@ -60,8 +60,8 @@ define float @foo2(double* %x0, double* %x1) nounwind {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    add x9, sp, #16
-; CHECK-NEXT:    ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
-; CHECK-NEXT:    ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
+; CHECK-NEXT:    ld4d { z1.d - z4.d }, p0/z, [x0]
+; CHECK-NEXT:    ld4d { z16.d - z19.d }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    fmov s0, #1.00000000
@@ -118,8 +118,8 @@ define float @foo3(double* %x0, double* %x1, double* %x2) nounwind {
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    fmov s0, #1.00000000
-; CHECK-NEXT:    ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0]
-; CHECK-NEXT:    ld3d { z16.d, z17.d, z18.d }, p0/z, [x1]
+; CHECK-NEXT:    ld4d { z2.d - z5.d }, p0/z, [x0]
+; CHECK-NEXT:    ld3d { z16.d - z18.d }, p0/z, [x1]
 ; CHECK-NEXT:    ld1d { z6.d }, p0/z, [x2]
 ; CHECK-NEXT:    fmov s1, #2.00000000
 ; CHECK-NEXT:    mov x0, sp
@@ -234,7 +234,7 @@ entry:
   ret double %x0
 }
 
-; Use AAVPCS, SVE register in z0-z7 used
+; Use AAVPCS, SVE register in z0 - z7 used
 
 define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, <vscale x 4 x i32> %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, i32 * %ptr) nounwind {
 ; CHECK-LABEL: aavpcs1:
@@ -267,7 +267,7 @@ entry:
   ret void
 }
 
-; Use AAVPCS, SVE register in z0-z7 used
+; Use AAVPCS, SVE register in z0 - z7 used
 
 define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, <vscale x 4 x float> %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12,<vscale x 4 x float> %s13,<vscale x 4 x float> %s14,<vscale x 4 x float> %s15,<vscale x 4 x float> %s16,float * %ptr) nounwind {
 ; CHECK-LABEL: aavpcs2:
@@ -306,7 +306,7 @@ entry:
   ret void
 }
 
-; Use AAVPCS, no SVE register in z0-z7 used (floats occupy z0-z7) but predicate arg is used
+; Use AAVPCS, no SVE register in z0 - z7 used (floats occupy z0 - z7) but predicate arg is used
 
 define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, <vscale x 16 x i1> %p0, float * %ptr) nounwind {
 ; CHECK-LABEL: aavpcs3:
@@ -347,7 +347,7 @@ entry:
   ret void
 }
 
-; use AAVPCS, SVE register in z0-z7 used (i32s dont occupy z0-z7)
+; use AAVPCS, SVE register in z0 - z7 used (i32s dont occupy z0 - z7)
 
 define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, <vscale x 4 x i32> %s17, i32 * %ptr) nounwind {
 ; CHECK-LABEL: aavpcs4:

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
index 8974022e0436f..9aac5d35d8b25 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
@@ -173,7 +173,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #3
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -185,7 +185,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8_lower_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #-24
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -197,7 +197,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8_upper_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #21
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -209,7 +209,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_01:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #4
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -221,7 +221,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_02:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #5
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -233,7 +233,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8_outside_lower_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #-27
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -245,7 +245,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 ; CHECK-LABEL: ld3.nxv48i8_outside_upper_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #24
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -258,7 +258,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24
 ; CHECK-LABEL: ld3.nxv24i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #21
-; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x8]
+; CHECK-NEXT:    ld3h { z0.h - z2.h }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 21
   %base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -270,7 +270,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nx
 ; CHECK-LABEL: ld3.nxv24f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #21
-; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x8]
+; CHECK-NEXT:    ld3h { z0.h - z2.h }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 21
   %base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -282,7 +282,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @
 ; CHECK-LABEL: ld3.nxv24bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-24
-; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x8]
+; CHECK-NEXT:    ld3h { z0.h - z2.h }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -24
   %base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -295,7 +295,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12
 ; CHECK-LABEL: ld3.nxv12i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #21
-; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x8]
+; CHECK-NEXT:    ld3w { z0.s - z2.s }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 21
   %base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -307,7 +307,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3
 ; CHECK-LABEL: ld3.nxv12f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-24
-; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x8]
+; CHECK-NEXT:    ld3w { z0.s - z2.s }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -24
   %base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -320,7 +320,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i
 ; CHECK-LABEL: ld3.nxv6i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #21
-; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x8]
+; CHECK-NEXT:    ld3d { z0.d - z2.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 21
   %base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -332,7 +332,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @
 ; CHECK-LABEL: ld3.nxv6f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-24
-; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x8]
+; CHECK-NEXT:    ld3d { z0.d - z2.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -24
   %base_ptr = bitcast <vscale x 2 x double>* %base to double *
@@ -345,7 +345,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #4
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -357,7 +357,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8_lower_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #-32
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -369,7 +369,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8_upper_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #28
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -381,7 +381,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_01:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #5
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -393,7 +393,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_02:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #6
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -405,7 +405,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_03:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #7
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
   %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -420,7 +420,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-NEXT:    mov x9, #-576
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
 ; xM = -9 * 2^6
@@ -439,7 +439,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-NEXT:    mov w9, #512
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #16) #2)
 ; xM = 2^9
@@ -456,7 +456,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 ; CHECK-LABEL: ld4.nxv32i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #8
-; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8]
+; CHECK-NEXT:    ld4h { z0.h - z3.h }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
   %base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -468,7 +468,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 ; CHECK-LABEL: ld4.nxv32f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #28
-; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8]
+; CHECK-NEXT:    ld4h { z0.h - z3.h }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 28
   %base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -480,7 +480,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 ; CHECK-LABEL: ld4.nxv32bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-32
-; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8]
+; CHECK-NEXT:    ld4h { z0.h - z3.h }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -32
   %base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -493,7 +493,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 ; CHECK-LABEL: ld4.nxv16i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #28
-; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8]
+; CHECK-NEXT:    ld4w { z0.s - z3.s }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 28
   %base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -505,7 +505,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 ; CHECK-LABEL: ld4.nxv16f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-32
-; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8]
+; CHECK-NEXT:    ld4w { z0.s - z3.s }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -32
   %base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -518,7 +518,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 ; CHECK-LABEL: ld4.nxv8i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #28
-; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8]
+; CHECK-NEXT:    ld4d { z0.d - z3.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 28
   %base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -530,7 +530,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 ; CHECK-LABEL: ld4.nxv8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-32
-; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8]
+; CHECK-NEXT:    ld4d { z0.d - z3.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -32
   %base_ptr = bitcast <vscale x 2 x double>* %base to double *

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
index ce11d4729f6a6..1c54865bb50bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
@@ -90,7 +90,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @ld2.nxv4f64(<vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv48i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ld3b { z0.b - z2.b }, p0/z, [x0, x1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i8, i8 *  %addr, i64 %a
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
@@ -101,7 +101,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv24i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i16, i16 *  %addr, i64 %a
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
@@ -111,7 +111,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nxv24f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv24f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr half, half *  %addr, i64 %a
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> %Pg, half *%addr2)
@@ -121,7 +121,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nx
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
 ; CHECK-LABEL: ld3.nxv24bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr bfloat, bfloat *  %addr, i64 %a
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
@@ -132,7 +132,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv12i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i32, i32 *  %addr, i64 %a
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
@@ -142,7 +142,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3.nxv12f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv12f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr float, float *  %addr, i64 %a
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> %Pg, float *%addr2)
@@ -153,7 +153,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv6i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i64, i64 *  %addr, i64 %a
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
@@ -163,7 +163,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld3.nxv6f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv6f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr double, double *  %addr, i64 %a
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> %Pg, double *%addr2)
@@ -174,7 +174,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv64i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i8, i8 *  %addr, i64 %a
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
@@ -185,7 +185,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld4.nxv32i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv32i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i16, i16 *  %addr, i64 %a
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
@@ -195,7 +195,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld4.nxv32f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv32f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr half, half *  %addr, i64 %a
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> %Pg, half *%addr2)
@@ -205,7 +205,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
 ; CHECK-LABEL: ld4.nxv32bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr bfloat, bfloat *  %addr, i64 %a
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
@@ -216,7 +216,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld4.nxv16i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i32, i32 *  %addr, i64 %a
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
@@ -226,7 +226,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld4.nxv16f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv16f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr float, float *  %addr, i64 %a
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> %Pg, float *%addr2)
@@ -237,7 +237,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld4.nxv8i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr i64, i64 *  %addr, i64 %a
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
@@ -247,7 +247,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld4.nxv8f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv8f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %addr2 = getelementptr double, double *  %addr, i64 %a
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %Pg, double *%addr2)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index e6ca643182b58..3adafc98ef4a7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -217,7 +217,7 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -235,7 +235,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
 ; CHECK-NEXT:    rdvl x8, #4
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -253,7 +253,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
 ; CHECK-NEXT:    rdvl x8, #5
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -271,7 +271,7 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK-NEXT:    rdvl x8, #-27
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -289,7 +289,7 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK-NEXT:    rdvl x8, #24
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -306,7 +306,7 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -323,7 +323,7 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -344,7 +344,7 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -361,7 +361,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -382,7 +382,7 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
+; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -399,7 +399,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
+; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -420,7 +420,7 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -437,7 +437,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -459,7 +459,7 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -479,7 +479,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -499,7 +499,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -519,7 +519,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <v
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -542,7 +542,7 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
 ; xM = -9 * 2^6
@@ -569,7 +569,7 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
 ; xM = 2^9
@@ -592,7 +592,7 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -611,7 +611,7 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -634,7 +634,7 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -653,7 +653,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -676,7 +676,7 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
+; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -695,7 +695,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
+; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -718,7 +718,7 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -737,7 +737,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index 464cf97c57036..1d5b0011c20e7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -133,7 +133,7 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, i8* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -154,7 +154,7 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, i16* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -171,7 +171,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, half* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -192,7 +192,7 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, i32* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -209,7 +209,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, float* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -230,7 +230,7 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, i64* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -247,7 +247,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, double* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -269,7 +269,7 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, i8* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -292,7 +292,7 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, i16* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -311,7 +311,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, half* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -334,7 +334,7 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, i32* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -353,7 +353,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, float* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -376,7 +376,7 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, i64* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -395,7 +395,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, double* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index 556ecfe866f5e..3992ce3ff9262 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -154,7 +154,7 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0]
+; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -174,7 +174,7 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -190,7 +190,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -206,7 +206,7 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
                                           <vscale x 8 x bfloat> %v1,
@@ -226,7 +226,7 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0]
+; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -242,7 +242,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0]
+; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -262,7 +262,7 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -278,7 +278,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
@@ -294,7 +294,7 @@ define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
                                            <vscale x 2 x i8*> %v1,
@@ -315,7 +315,7 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -337,7 +337,7 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -355,7 +355,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -373,7 +373,7 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
                                           <vscale x 8 x bfloat> %v1,
@@ -395,7 +395,7 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -413,7 +413,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -435,7 +435,7 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -453,7 +453,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
@@ -471,7 +471,7 @@ define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
 ; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
                                            <vscale x 2 x i8*> %v1,

diff  --git a/llvm/test/CodeGen/AArch64/sve-ldN.mir b/llvm/test/CodeGen/AArch64/sve-ldN.mir
index c59c53da806ba..b6b89abc61bfc 100644
--- a/llvm/test/CodeGen/AArch64/sve-ldN.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ldN.mir
@@ -59,22 +59,22 @@ body:             |
     ; CHECK-OFFSET-NEXT: ld2w { z0.s, z1.s }, p0/z, [sp, #14, mul vl]
     ; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp, #-16, mul vl]
     ; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp, #14, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [sp, #28, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [sp, #28, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [sp, #28, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [sp, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #31
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #1
     ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16
@@ -195,37 +195,37 @@ body:             |
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #2
     ; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8, #14, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #31
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #1
     ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16

diff  --git a/llvm/test/CodeGen/AArch64/sve-stN.mir b/llvm/test/CodeGen/AArch64/sve-stN.mir
index ac5c036a10bd0..7371f30a4a512 100644
--- a/llvm/test/CodeGen/AArch64/sve-stN.mir
+++ b/llvm/test/CodeGen/AArch64/sve-stN.mir
@@ -59,22 +59,22 @@ body:             |
     ; CHECK-OFFSET-NEXT: st2w { z0.s, z1.s }, p0, [sp, #14, mul vl]
     ; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [sp, #-16, mul vl]
     ; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [sp, #14, mul vl]
-    ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [sp, #-24, mul vl]
-    ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [sp, #21, mul vl]
-    ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [sp, #28, mul vl]
-    ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [sp, #28, mul vl]
-    ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [sp, #28, mul vl]
-    ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [sp, #-32, mul vl]
-    ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [sp, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [sp, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [sp, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [sp, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [sp, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #31
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #1
     ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16
@@ -195,37 +195,37 @@ body:             |
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #2
     ; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [x8, #14, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
-    ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x8, #-24, mul vl]
+    ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [x8, #-24, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #3
-    ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x8, #21, mul vl]
+    ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [x8, #21, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
-    ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x8, #-32, mul vl]
+    ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [x8, #-32, mul vl]
     ; CHECK-OFFSET-NEXT: addvl x8, sp, #4
-    ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x8, #28, mul vl]
+    ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [x8, #28, mul vl]
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #31
     ; CHECK-OFFSET-NEXT: addvl sp, sp, #1
     ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16

diff  --git a/llvm/test/MC/AArch64/SVE/ld3b.s b/llvm/test/MC/AArch64/SVE/ld3b.s
index f5d67870d145f..c986fff658b58 100644
--- a/llvm/test/MC/AArch64/SVE/ld3b.s
+++ b/llvm/test/MC/AArch64/SVE/ld3b.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld3b    { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-// CHECK-INST: ld3b    { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
+// CHECK-INST: ld3b    { z0.b - z2.b }, p0/z, [x0, x0]
 // CHECK-ENCODING: [0x00,0xc0,0x40,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a440c000 <unknown>
 
 ld3b    { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-// CHECK-INST: ld3b    { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
+// CHECK-INST: ld3b    { z5.b - z7.b }, p3/z, [x17, x16]
 // CHECK-ENCODING: [0x25,0xce,0x50,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a450ce25 <unknown>
 
 ld3b    { z0.b, z1.b, z2.b }, p0/z, [x0]
-// CHECK-INST: ld3b    { z0.b, z1.b, z2.b }, p0/z, [x0]
+// CHECK-INST: ld3b    { z0.b - z2.b }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x40,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a440e000 <unknown>
 
 ld3b    { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3b    { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3b    { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x48,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a448edb7 <unknown>
 
 ld3b    { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3b    { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3b    { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x45,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a445f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld3d.s b/llvm/test/MC/AArch64/SVE/ld3d.s
index 4a82a8a6feb2f..3daad4603ff61 100644
--- a/llvm/test/MC/AArch64/SVE/ld3d.s
+++ b/llvm/test/MC/AArch64/SVE/ld3d.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld3d    { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-// CHECK-INST: ld3d    { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
+// CHECK-INST: ld3d    { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
 // CHECK-ENCODING: [0x00,0xc0,0xc0,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5c0c000 <unknown>
 
 ld3d    { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-// CHECK-INST: ld3d    { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
+// CHECK-INST: ld3d    { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
 // CHECK-ENCODING: [0x25,0xce,0xd0,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5d0ce25 <unknown>
 
 ld3d    { z0.d, z1.d, z2.d }, p0/z, [x0]
-// CHECK-INST: ld3d    { z0.d, z1.d, z2.d }, p0/z, [x0]
+// CHECK-INST: ld3d    { z0.d - z2.d }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xc0,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5c0e000 <unknown>
 
 ld3d    { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3d    { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3d    { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xc8,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5c8edb7 <unknown>
 
 ld3d    { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3d    { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3d    { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xc5,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5c5f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld3h.s b/llvm/test/MC/AArch64/SVE/ld3h.s
index e323623f1a4c1..9470a94b579cd 100644
--- a/llvm/test/MC/AArch64/SVE/ld3h.s
+++ b/llvm/test/MC/AArch64/SVE/ld3h.s
@@ -10,31 +10,37 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld3h    { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-// CHECK-INST: ld3h    { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
+// CHECK-INST: ld3h    { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
 // CHECK-ENCODING: [0x00,0xc0,0xc0,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4c0c000 <unknown>
 
 ld3h    { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-// CHECK-INST: ld3h    { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
+// CHECK-INST: ld3h    { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
 // CHECK-ENCODING: [0x25,0xce,0xd0,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4d0ce25 <unknown>
 
 ld3h    { z0.h, z1.h, z2.h }, p0/z, [x0]
-// CHECK-INST: ld3h    { z0.h, z1.h, z2.h }, p0/z, [x0]
+// CHECK-INST: ld3h    { z0.h - z2.h }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xc0,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4c0e000 <unknown>
 
 ld3h    { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3h    { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3h    { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xc8,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4c8edb7 <unknown>
 
 ld3h    { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3h    { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3h    { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xc5,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4c5f555 <unknown>
+
+ld3h    { z30.h, z31.h, z0.h }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3h    { z30.h, z31.h, z0.h }, p5/z, [x10, #15, mul vl]
+// CHECK-ENCODING: [0x5e,0xf5,0xc5,0xa4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: a4c5f55e <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld3w.s b/llvm/test/MC/AArch64/SVE/ld3w.s
index a7c8debc059ab..e8f3f55a5f6ff 100644
--- a/llvm/test/MC/AArch64/SVE/ld3w.s
+++ b/llvm/test/MC/AArch64/SVE/ld3w.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld3w    { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-// CHECK-INST: ld3w    { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
+// CHECK-INST: ld3w    { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
 // CHECK-ENCODING: [0x00,0xc0,0x40,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a540c000 <unknown>
 
 ld3w    { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-// CHECK-INST: ld3w    { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
+// CHECK-INST: ld3w    { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
 // CHECK-ENCODING: [0x25,0xce,0x50,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a550ce25 <unknown>
 
 ld3w    { z0.s, z1.s, z2.s }, p0/z, [x0]
-// CHECK-INST: ld3w    { z0.s, z1.s, z2.s }, p0/z, [x0]
+// CHECK-INST: ld3w    { z0.s - z2.s }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x40,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a540e000 <unknown>
 
 ld3w    { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3w    { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3w    { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x48,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a548edb7 <unknown>
 
 ld3w    { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3w    { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3w    { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x45,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a545f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld4b.s b/llvm/test/MC/AArch64/SVE/ld4b.s
index de4a58bbc1320..bfb576df84043 100644
--- a/llvm/test/MC/AArch64/SVE/ld4b.s
+++ b/llvm/test/MC/AArch64/SVE/ld4b.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld4b    { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-// CHECK-INST: ld4b    { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
+// CHECK-INST: ld4b    { z0.b - z3.b }, p0/z, [x0, x0]
 // CHECK-ENCODING: [0x00,0xc0,0x60,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a460c000 <unknown>
 
 ld4b    { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-// CHECK-INST: ld4b    { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
+// CHECK-INST: ld4b    { z5.b - z8.b }, p3/z, [x17, x16]
 // CHECK-ENCODING: [0x25,0xce,0x70,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a470ce25 <unknown>
 
 ld4b    { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-// CHECK-INST: ld4b    { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+// CHECK-INST: ld4b    { z0.b - z3.b }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x60,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a460e000 <unknown>
 
 ld4b    { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4b    { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4b    { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x68,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a468edb7 <unknown>
 
 ld4b    { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4b    { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4b    { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x65,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a465f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld4d.s b/llvm/test/MC/AArch64/SVE/ld4d.s
index 034a446de5254..59b72f98a6218 100644
--- a/llvm/test/MC/AArch64/SVE/ld4d.s
+++ b/llvm/test/MC/AArch64/SVE/ld4d.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld4d    { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-// CHECK-INST: ld4d    { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
+// CHECK-INST: ld4d    { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
 // CHECK-ENCODING: [0x00,0xc0,0xe0,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5e0c000 <unknown>
 
 ld4d    { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-// CHECK-INST: ld4d    { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
+// CHECK-INST: ld4d    { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
 // CHECK-ENCODING: [0x25,0xce,0xf0,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5f0ce25 <unknown>
 
 ld4d    { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-// CHECK-INST: ld4d    { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+// CHECK-INST: ld4d    { z0.d - z3.d }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xe0,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5e0e000 <unknown>
 
 ld4d    { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4d    { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4d    { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xe8,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5e8edb7 <unknown>
 
 ld4d    { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4d    { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4d    { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xe5,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a5e5f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld4h.s b/llvm/test/MC/AArch64/SVE/ld4h.s
index 6b6371b8fe2a6..3df9bd0ccb26e 100644
--- a/llvm/test/MC/AArch64/SVE/ld4h.s
+++ b/llvm/test/MC/AArch64/SVE/ld4h.s
@@ -10,31 +10,37 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld4h    { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-// CHECK-INST: ld4h    { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
+// CHECK-INST: ld4h    { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
 // CHECK-ENCODING: [0x00,0xc0,0xe0,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4e0c000 <unknown>
 
 ld4h    { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-// CHECK-INST: ld4h    { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
+// CHECK-INST: ld4h    { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
 // CHECK-ENCODING: [0x25,0xce,0xf0,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4f0ce25 <unknown>
 
 ld4h    { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-// CHECK-INST: ld4h    { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+// CHECK-INST: ld4h    { z0.h - z3.h }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xe0,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4e0e000 <unknown>
 
 ld4h    { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4h    { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4h    { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xe8,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4e8edb7 <unknown>
 
 ld4h    { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4h    { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4h    { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xe5,0xa4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a4e5f555 <unknown>
+
+ld4h    { z31.h, z0.h, z1.h, z2.h }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4h    { z31.h, z0.h, z1.h, z2.h }, p5/z, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x5f,0xf5,0xe5,0xa4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: a4e5f55f <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/ld4w.s b/llvm/test/MC/AArch64/SVE/ld4w.s
index c5cf15b62e469..2b03447945925 100644
--- a/llvm/test/MC/AArch64/SVE/ld4w.s
+++ b/llvm/test/MC/AArch64/SVE/ld4w.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 ld4w    { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-// CHECK-INST: ld4w    { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
+// CHECK-INST: ld4w    { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
 // CHECK-ENCODING: [0x00,0xc0,0x60,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a560c000 <unknown>
 
 ld4w    { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
-// CHECK-INST: ld4w    { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+// CHECK-INST: ld4w    { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 // CHECK-ENCODING: [0x25,0xce,0x70,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a570ce25 <unknown>
 
 ld4w    { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-// CHECK-INST: ld4w    { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+// CHECK-INST: ld4w    { z0.s - z3.s }, p0/z, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x60,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a560e000 <unknown>
 
 ld4w    { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4w    { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4w    { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x68,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a568edb7 <unknown>
 
 ld4w    { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4w    { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4w    { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x65,0xa5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: a565f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st3b.s b/llvm/test/MC/AArch64/SVE/st3b.s
index 7049a2da9b98b..7ed86472103f6 100644
--- a/llvm/test/MC/AArch64/SVE/st3b.s
+++ b/llvm/test/MC/AArch64/SVE/st3b.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st3b    { z0.b, z1.b, z2.b }, p0, [x0, x0]
-// CHECK-INST: st3b    { z0.b, z1.b, z2.b }, p0, [x0, x0]
+// CHECK-INST: st3b    { z0.b - z2.b }, p0, [x0, x0]
 // CHECK-ENCODING: [0x00,0x60,0x40,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4406000 <unknown>
 
 st3b    { z5.b, z6.b, z7.b }, p3, [x17, x16]
-// CHECK-INST: st3b    { z5.b, z6.b, z7.b }, p3, [x17, x16]
+// CHECK-INST: st3b    { z5.b - z7.b }, p3, [x17, x16]
 // CHECK-ENCODING: [0x25,0x6e,0x50,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4506e25 <unknown>
 
 st3b    { z0.b, z1.b, z2.b }, p0, [x0]
-// CHECK-INST: st3b    { z0.b, z1.b, z2.b }, p0, [x0]
+// CHECK-INST: st3b    { z0.b - z2.b }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x50,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e450e000 <unknown>
 
 st3b    { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3b    { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3b    { z23.b - z25.b }, p3, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x58,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e458edb7 <unknown>
 
 st3b    { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3b    { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3b    { z21.b - z23.b }, p5, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x55,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e455f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st3d.s b/llvm/test/MC/AArch64/SVE/st3d.s
index 0a2285e7f0d47..e020906ce076c 100644
--- a/llvm/test/MC/AArch64/SVE/st3d.s
+++ b/llvm/test/MC/AArch64/SVE/st3d.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st3d    { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-// CHECK-INST: st3d    { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
+// CHECK-INST: st3d    { z0.d - z2.d }, p0, [x0, x0, lsl #3]
 // CHECK-ENCODING: [0x00,0x60,0xc0,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5c06000 <unknown>
 
 st3d    { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-// CHECK-INST: st3d    { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
+// CHECK-INST: st3d    { z5.d - z7.d }, p3, [x17, x16, lsl #3]
 // CHECK-ENCODING: [0x25,0x6e,0xd0,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5d06e25 <unknown>
 
 st3d    { z0.d, z1.d, z2.d }, p0, [x0]
-// CHECK-INST: st3d    { z0.d, z1.d, z2.d }, p0, [x0]
+// CHECK-INST: st3d    { z0.d - z2.d }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xd0,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5d0e000 <unknown>
 
 st3d    { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3d    { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3d    { z23.d - z25.d }, p3, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xd8,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5d8edb7 <unknown>
 
 st3d    { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3d    { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3d    { z21.d - z23.d }, p5, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xd5,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5d5f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st3h.s b/llvm/test/MC/AArch64/SVE/st3h.s
index c0a00d0bb87dc..0b79a23f70ab7 100644
--- a/llvm/test/MC/AArch64/SVE/st3h.s
+++ b/llvm/test/MC/AArch64/SVE/st3h.s
@@ -10,31 +10,37 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st3h    { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-// CHECK-INST: st3h    { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
+// CHECK-INST: st3h    { z0.h - z2.h }, p0, [x0, x0, lsl #1]
 // CHECK-ENCODING: [0x00,0x60,0xc0,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4c06000 <unknown>
 
 st3h    { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-// CHECK-INST: st3h    { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
+// CHECK-INST: st3h    { z5.h - z7.h }, p3, [x17, x16, lsl #1]
 // CHECK-ENCODING: [0x25,0x6e,0xd0,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4d06e25 <unknown>
 
 st3h    { z0.h, z1.h, z2.h }, p0, [x0]
-// CHECK-INST: st3h    { z0.h, z1.h, z2.h }, p0, [x0]
+// CHECK-INST: st3h    { z0.h - z2.h }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xd0,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4d0e000 <unknown>
 
 st3h    { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3h    { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3h    { z23.h - z25.h }, p3, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xd8,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4d8edb7 <unknown>
 
 st3h    { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3h    { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3h    { z21.h - z23.h }, p5, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xd5,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4d5f555 <unknown>
+
+st3h    { z31.h, z0.h, z1.h }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3h    { z31.h, z0.h, z1.h }, p5, [x10, #15, mul vl]
+// CHECK-ENCODING: [0x5f,0xf5,0xd5,0xe4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: e4d5f55f <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st3w.s b/llvm/test/MC/AArch64/SVE/st3w.s
index 7738c0e8cd0a1..8ff579e7c9650 100644
--- a/llvm/test/MC/AArch64/SVE/st3w.s
+++ b/llvm/test/MC/AArch64/SVE/st3w.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st3w    { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-// CHECK-INST: st3w    { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
+// CHECK-INST: st3w    { z0.s - z2.s }, p0, [x0, x0, lsl #2]
 // CHECK-ENCODING: [0x00,0x60,0x40,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5406000 <unknown>
 
 st3w    { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-// CHECK-INST: st3w    { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
+// CHECK-INST: st3w    { z5.s - z7.s }, p3, [x17, x16, lsl #2]
 // CHECK-ENCODING: [0x25,0x6e,0x50,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5506e25 <unknown>
 
 st3w    { z0.s, z1.s, z2.s }, p0, [x0]
-// CHECK-INST: st3w    { z0.s, z1.s, z2.s }, p0, [x0]
+// CHECK-INST: st3w    { z0.s - z2.s }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x50,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e550e000 <unknown>
 
 st3w    { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3w    { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3w    { z23.s - z25.s }, p3, [x13, #-24, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x58,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e558edb7 <unknown>
 
 st3w    { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3w    { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3w    { z21.s - z23.s }, p5, [x10, #15, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x55,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e555f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st4b.s b/llvm/test/MC/AArch64/SVE/st4b.s
index 115e8e0fc605c..86c80d7f468c9 100644
--- a/llvm/test/MC/AArch64/SVE/st4b.s
+++ b/llvm/test/MC/AArch64/SVE/st4b.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st4b    { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-// CHECK-INST: st4b    { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
+// CHECK-INST: st4b    { z0.b - z3.b }, p0, [x0, x0]
 // CHECK-ENCODING: [0x00,0x60,0x60,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4606000 <unknown>
 
 st4b    { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-// CHECK-INST: st4b    { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
+// CHECK-INST: st4b    { z5.b - z8.b }, p3, [x17, x16]
 // CHECK-ENCODING: [0x25,0x6e,0x70,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4706e25 <unknown>
 
 st4b    { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-// CHECK-INST: st4b    { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+// CHECK-INST: st4b    { z0.b - z3.b }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x70,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e470e000 <unknown>
 
 st4b    { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4b    { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4b    { z23.b - z26.b }, p3, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x78,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e478edb7 <unknown>
 
 st4b    { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4b    { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4b    { z21.b - z24.b }, p5, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x75,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e475f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st4d.s b/llvm/test/MC/AArch64/SVE/st4d.s
index d1890839f571e..17cacea3e4f81 100644
--- a/llvm/test/MC/AArch64/SVE/st4d.s
+++ b/llvm/test/MC/AArch64/SVE/st4d.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st4d    { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-// CHECK-INST: st4d    { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
+// CHECK-INST: st4d    { z0.d - z3.d }, p0, [x0, x0, lsl #3]
 // CHECK-ENCODING: [0x00,0x60,0xe0,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5e06000 <unknown>
 
 st4d    { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-// CHECK-INST: st4d    { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
+// CHECK-INST: st4d    { z5.d - z8.d }, p3, [x17, x16, lsl #3]
 // CHECK-ENCODING: [0x25,0x6e,0xf0,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5f06e25 <unknown>
 
 st4d    { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-// CHECK-INST: st4d    { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+// CHECK-INST: st4d    { z0.d - z3.d }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xf0,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5f0e000 <unknown>
 
 st4d    { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4d    { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4d    { z23.d - z26.d }, p3, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xf8,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5f8edb7 <unknown>
 
 st4d    { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4d    { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4d    { z21.d - z24.d }, p5, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xf5,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5f5f555 <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st4h.s b/llvm/test/MC/AArch64/SVE/st4h.s
index 1feb6f3ddf9ac..7587724ce4edd 100644
--- a/llvm/test/MC/AArch64/SVE/st4h.s
+++ b/llvm/test/MC/AArch64/SVE/st4h.s
@@ -10,31 +10,37 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st4h    { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-// CHECK-INST: st4h    { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
+// CHECK-INST: st4h    { z0.h - z3.h }, p0, [x0, x0, lsl #1]
 // CHECK-ENCODING: [0x00,0x60,0xe0,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4e06000 <unknown>
 
 st4h    { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-// CHECK-INST: st4h    { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
+// CHECK-INST: st4h    { z5.h - z8.h }, p3, [x17, x16, lsl #1]
 // CHECK-ENCODING: [0x25,0x6e,0xf0,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4f06e25 <unknown>
 
 st4h    { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-// CHECK-INST: st4h    { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+// CHECK-INST: st4h    { z0.h - z3.h }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0xf0,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4f0e000 <unknown>
 
 st4h    { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4h    { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4h    { z23.h - z26.h }, p3, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0xf8,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4f8edb7 <unknown>
 
 st4h    { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4h    { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4h    { z21.h - z24.h }, p5, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0xf5,0xe4]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e4f5f555 <unknown>
+
+st4h    { z29.h, z30.h, z31.h, z0.h }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4h    { z29.h, z30.h, z31.h, z0.h }, p5, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x5d,0xf5,0xf5,0xe4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: e4f5f55d <unknown>

diff  --git a/llvm/test/MC/AArch64/SVE/st4w.s b/llvm/test/MC/AArch64/SVE/st4w.s
index 278b9652fbe06..5d6b31573639f 100644
--- a/llvm/test/MC/AArch64/SVE/st4w.s
+++ b/llvm/test/MC/AArch64/SVE/st4w.s
@@ -10,31 +10,31 @@
 // RUN:   | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 st4w    { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-// CHECK-INST: st4w    { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
+// CHECK-INST: st4w    { z0.s - z3.s }, p0, [x0, x0, lsl #2]
 // CHECK-ENCODING: [0x00,0x60,0x60,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5606000 <unknown>
 
 st4w    { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
-// CHECK-INST: st4w    { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+// CHECK-INST: st4w    { z5.s - z8.s }, p3, [x17, x16, lsl #2]
 // CHECK-ENCODING: [0x25,0x6e,0x70,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e5706e25 <unknown>
 
 st4w    { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-// CHECK-INST: st4w    { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+// CHECK-INST: st4w    { z0.s - z3.s }, p0, [x0]
 // CHECK-ENCODING: [0x00,0xe0,0x70,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e570e000 <unknown>
 
 st4w    { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4w    { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4w    { z23.s - z26.s }, p3, [x13, #-32, mul vl]
 // CHECK-ENCODING: [0xb7,0xed,0x78,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e578edb7 <unknown>
 
 st4w    { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4w    { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4w    { z21.s - z24.s }, p5, [x10, #20, mul vl]
 // CHECK-ENCODING: [0x55,0xf5,0x75,0xe5]
 // CHECK-ERROR: instruction requires: sve or sme
 // CHECK-UNKNOWN: e575f555 <unknown>

diff  --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
index 0f1d46f43bd4b..ee1eb521a1e2b 100644
--- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
@@ -3568,46 +3568,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      11    1.00    *             U     ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
 # CHECK-NEXT:  2      11    1.00    *             U     ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
 # CHECK-NEXT:  3      12    1.50    *             U     ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3d	{ z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      12    2.00    *             U     ld3w	{ z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4d	{ z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      12    2.50    *             U     ld4w	{ z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  4      12    2.00    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  3      11    1.50    *             U     ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      12    2.00    *             U     ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    6.50    *             U     ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  4      12    2.00    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  3      11    1.50    *             U     ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      12    2.00    *             U     ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  5      12    2.50    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      11    2.00    *             U     ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      12    2.50    *             U     ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    8.50    *             U     ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  5      12    2.50    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      11    2.00    *             U     ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      12    2.50    *             U     ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 # CHECK-NEXT:  1      11    0.50    *             U     ldff1b	{ z0.d }, p0/z, [x0, x0]
 # CHECK-NEXT:  1      16    2.00    *             U     ldff1b	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  1      11    0.50    *             U     ldff1b	{ z0.h }, p0/z, [x0, x0]
@@ -4580,46 +4580,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  3      12    2.00           *      U     st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
 # CHECK-NEXT:  3      12    2.00           *      U     st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
 # CHECK-NEXT:  2      11    2.00           *      U     st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT:  3      11    3.00           *      U     st3d	{ z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      11    3.00           *      U     st3d	{ z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  3      11    3.00           *      U     st3w	{ z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      11    3.00           *      U     st3w	{ z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT:  4      11    4.00           *      U     st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  4      11    4.00           *      U     st4d	{ z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  4      11    4.00           *      U     st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  4      11    4.00           *      U     st4w	{ z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    12.00          *      U     st3b	{ z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT:  3      11    3.00           *      U     st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      12    3.00           *      U     st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      11    3.00           *      U     st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  4      15    12.00          *      U     st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  3      11    3.00           *      U     st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  4      12    3.00           *      U     st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      11    3.00           *      U     st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    16.00          *      U     st4b	{ z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT:  4      11    4.00           *      U     st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      12    4.00           *      U     st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  4      11    4.00           *      U     st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  5      15    16.00          *      U     st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  4      11    4.00           *      U     st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  5      12    4.00           *      U     st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  4      11    4.00           *      U     st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
 # CHECK-NEXT:  1      11    1.00           *            stnt1b	{ z0.b }, p0, [x0, x0]
 # CHECK-NEXT:  1      11    1.00           *            stnt1b	{ z0.b }, p0, [x0]
 # CHECK-NEXT:  1      11    1.00           *            stnt1b	{ z21.b }, p5, [x10, #7, mul vl]
@@ -6080,46 +6080,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -     1.00   1.00    -      -      -      -      -     ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
 # CHECK-NEXT:  -     1.00   1.00    -      -      -      -      -     ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
 # CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3d	{ z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3d	{ z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3d	{ z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3w	{ z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3w	{ z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3w	{ z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4d	{ z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4d	{ z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4d	{ z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4w	{ z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4w	{ z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4w	{ z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3d	{ z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -      -      -      -     ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3w	{ z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -     1.50   1.50    -      -      -      -      -     ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4d	{ z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -      -      -      -     ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4w	{ z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -      -      -      -     ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     2.50   2.50    -      -      -      -      -     ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     ldff1b	{ z0.d }, p0/z, [x0, x0]
 # CHECK-NEXT:  -     2.00   2.00    -      -     1.00    -      -     ldff1b	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     ldff1b	{ z0.h }, p0/z, [x0, x0]
@@ -7092,46 +7092,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -     1.50   1.50    -      -     2.00    -      -     st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
 # CHECK-NEXT:  -     1.50   1.50    -      -     2.00    -      -     st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
 # CHECK-NEXT:  -     1.00   1.00    -      -     2.00    -      -     st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3d	{ z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3d	{ z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3d	{ z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3d	{ z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3d	{ z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3w	{ z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3w	{ z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3w	{ z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3w	{ z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3w	{ z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4d	{ z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4d	{ z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4d	{ z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4w	{ z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4w	{ z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4w	{ z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3b	{ z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3d	{ z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     6.50   6.50    -      -     12.00   -      -     st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3w	{ z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -     3.00    -      -     st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -     1.50   1.50    -      -     3.00    -      -     st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4b	{ z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4d	{ z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     8.50   8.50    -      -     16.00   -      -     st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4w	{ z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -     2.50   2.50    -      -     4.00    -      -     st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -     2.00   2.00    -      -     4.00    -      -     st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
 # CHECK-NEXT:  -     0.50   0.50    -      -     1.00    -      -     stnt1b	{ z0.b }, p0, [x0, x0]
 # CHECK-NEXT:  -     0.50   0.50    -      -     1.00    -      -     stnt1b	{ z0.b }, p0, [x0]
 # CHECK-NEXT:  -     0.50   0.50    -      -     1.00    -      -     stnt1b	{ z21.b }, p5, [x10, #7, mul vl]

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index 062ac80bd718b..5ba286f21ba6e 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -4624,46 +4624,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      8     0.50    *             U     ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
 # CHECK-NEXT:  2      8     0.50    *             U     ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
 # CHECK-NEXT:  2      9     0.50    *             U     ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3b	{ z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3d	{ z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3h	{ z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  3      10    0.67    *             U     ld3w	{ z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4b	{ z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4d	{ z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4h	{ z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  6      10    1.00    *             U     ld4w	{ z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  2      9     0.50    *             U     ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  3      10    0.67    *             U     ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  4      9     1.00    *             U     ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  6      10    1.00    *             U     ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 # CHECK-NEXT:  2      6     0.50    *             U     ldff1b	{ z0.d }, p0/z, [x0, x0]
 # CHECK-NEXT:  4      9     1.00    *             U     ldff1b	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  2      6     0.50    *             U     ldff1b	{ z0.h }, p0/z, [x0, x0]
@@ -6124,46 +6124,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
 # CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
 # CHECK-NEXT:  2      4     0.50           *      U     st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  15     7     2.50           *      U     st3b	{ z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3b	{ z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT:  15     7     2.50           *      U     st3d	{ z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3d	{ z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  15     7     2.50           *      U     st3h	{ z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3h	{ z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  15     7     2.50           *      U     st3w	{ z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  15     7     2.50           *      U     st3w	{ z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  27     11    4.50           *      U     st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4b	{ z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT:  27     11    4.50           *      U     st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4d	{ z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  27     11    4.50           *      U     st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4h	{ z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  27     11    4.50           *      U     st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  27     11    4.50           *      U     st4w	{ z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  15     7     2.50           *      U     st3b	{ z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *      U     st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *      U     st3b	{ z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT:  15     7     2.50           *      U     st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *      U     st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *      U     st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  15     7     2.50           *      U     st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *      U     st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *      U     st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  15     7     2.50           *      U     st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  10     7     2.50           *      U     st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  15     7     2.50           *      U     st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  27     11    4.50           *      U     st4b	{ z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *      U     st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *      U     st4b	{ z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT:  27     11    4.50           *      U     st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *      U     st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *      U     st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  27     11    4.50           *      U     st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *      U     st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *      U     st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  27     11    4.50           *      U     st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  18     11    4.50           *      U     st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  27     11    4.50           *      U     st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
 # CHECK-NEXT:  2      2     0.50           *            stnt1b	{ z0.b }, p0, [x0, x0]
 # CHECK-NEXT:  2      2     0.50           *            stnt1b	{ z0.b }, p0, [x0]
 # CHECK-NEXT:  2      2     0.50           *            stnt1b	{ z0.d }, p0, [z1.d]
@@ -8055,46 +8055,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld2w	{ z21.s, z22.s }, p5/z, [x10, #10, mul vl]
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld2w	{ z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld2w	{ z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3b	{ z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3b	{ z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3b	{ z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3b	{ z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3d	{ z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3d	{ z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3d	{ z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3d	{ z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3h	{ z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3h	{ z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3h	{ z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3h	{ z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3w	{ z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3w	{ z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3w	{ z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3w	{ z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4b	{ z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4b	{ z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4b	{ z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4b	{ z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4d	{ z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4d	{ z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4d	{ z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4d	{ z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4h	{ z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4h	{ z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4h	{ z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4h	{ z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4w	{ z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4w	{ z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4w	{ z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4w	{ z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3b	{ z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3b	{ z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3b	{ z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3b	{ z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3b	{ z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3d	{ z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3d	{ z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3d	{ z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3d	{ z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3d	{ z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3h	{ z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3h	{ z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3h	{ z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3h	{ z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3h	{ z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3w	{ z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3w	{ z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3w	{ z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -      -     0.50   0.50   ld3w	{ z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     0.50   0.50   ld3w	{ z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4b	{ z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4b	{ z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4b	{ z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4b	{ z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4b	{ z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4d	{ z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4d	{ z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4d	{ z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4d	{ z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4d	{ z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4h	{ z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4h	{ z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4h	{ z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4h	{ z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4h	{ z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4w	{ z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4w	{ z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4w	{ z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ld4w	{ z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -     1.00   1.00   1.00   1.00   ld4w	{ z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -     0.50   0.50    -      -     ldff1b	{ z0.d }, p0/z, [x0, x0]
 # CHECK-NEXT:  -      -      -      -     0.67   0.67   0.67    -      -      -      -     1.00   1.00   ldff1b	{ z0.d }, p0/z, [z0.d]
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -     0.50   0.50    -      -     ldff1b	{ z0.h }, p0/z, [x0, x0]
@@ -9555,46 +9555,46 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50   st2w	{ z21.s, z22.s }, p5, [x10, #10, mul vl]
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50   st2w	{ z23.s, z24.s }, p3, [x13, #-16, mul vl]
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50   st2w	{ z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3b	{ z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3b	{ z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3b	{ z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3b	{ z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3b	{ z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3d	{ z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3d	{ z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3d	{ z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3d	{ z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3d	{ z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3h	{ z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3h	{ z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3h	{ z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3h	{ z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3h	{ z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3w	{ z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3w	{ z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3w	{ z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3w	{ z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3w	{ z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4b	{ z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4b	{ z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4b	{ z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4b	{ z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4d	{ z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4d	{ z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4d	{ z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4d	{ z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4h	{ z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4h	{ z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4h	{ z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4h	{ z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4w	{ z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4w	{ z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4w	{ z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4w	{ z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3b	{ z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3b	{ z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3b	{ z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3b	{ z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3b	{ z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3d	{ z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3d	{ z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3d	{ z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3d	{ z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3d	{ z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3h	{ z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3h	{ z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3h	{ z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3h	{ z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3h	{ z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3w	{ z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3w	{ z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3w	{ z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -      -      -     2.50   2.50   st3w	{ z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     2.50   2.50    -      -     2.50   2.50   2.50   2.50   st3w	{ z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4b	{ z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4b	{ z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4b	{ z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4b	{ z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4b	{ z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4d	{ z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4d	{ z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4d	{ z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4d	{ z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4d	{ z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4h	{ z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4h	{ z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4h	{ z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4h	{ z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4h	{ z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4w	{ z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4w	{ z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4w	{ z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -      -      -     4.50   4.50   st4w	{ z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT:  -      -      -      -      -     4.50   4.50    -      -     4.50   4.50   4.50   4.50   st4w	{ z5.s - z8.s }, p3, [x17, x16, lsl #2]
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50   stnt1b	{ z0.b }, p0, [x0, x0]
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50   stnt1b	{ z0.b }, p0, [x0]
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50   stnt1b	{ z0.d }, p0, [z1.d]


        


More information about the llvm-commits mailing list