[llvm] 60e2aad - [AArch64]Change printVectorList to print SVE vector range
Caroline Concatto via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 14 11:00:41 PDT 2022
Author: Caroline Concatto
Date: 2022-10-14T18:59:56+01:00
New Revision: 60e2aad109fc793de831de4a00116a3616e0e543
URL: https://github.com/llvm/llvm-project/commit/60e2aad109fc793de831de4a00116a3616e0e543
DIFF: https://github.com/llvm/llvm-project/commit/60e2aad109fc793de831de4a00116a3616e0e543.diff
LOG: [AArch64]Change printVectorList to print SVE vector range
This patch has the prefered disassembly changed for SVE vector list.
For instance, instead of printing this assembly:
ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
it will print this:
ld4d { z1.d-z4.d }, p0/z, [x0]
Differential Revision: https://reviews.llvm.org/D135952
Added:
Modified:
llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
llvm/test/CodeGen/AArch64/sve-ldN.mir
llvm/test/CodeGen/AArch64/sve-stN.mir
llvm/test/MC/AArch64/SVE/ld3b.s
llvm/test/MC/AArch64/SVE/ld3d.s
llvm/test/MC/AArch64/SVE/ld3h.s
llvm/test/MC/AArch64/SVE/ld3w.s
llvm/test/MC/AArch64/SVE/ld4b.s
llvm/test/MC/AArch64/SVE/ld4d.s
llvm/test/MC/AArch64/SVE/ld4h.s
llvm/test/MC/AArch64/SVE/ld4w.s
llvm/test/MC/AArch64/SVE/st3b.s
llvm/test/MC/AArch64/SVE/st3d.s
llvm/test/MC/AArch64/SVE/st3h.s
llvm/test/MC/AArch64/SVE/st3w.s
llvm/test/MC/AArch64/SVE/st4b.s
llvm/test/MC/AArch64/SVE/st4d.s
llvm/test/MC/AArch64/SVE/st4h.s
llvm/test/MC/AArch64/SVE/st4w.s
llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ee142a266e32d..c418ca0c02243 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1466,17 +1466,31 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC);
}
- for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
- if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
- printRegName(O, Reg);
- else
- printRegName(O, Reg, AArch64::vreg);
+ if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg) && NumRegs > 1 &&
+ // Do not print the range when the last register is lower than the first.
+ // Because it is a wrap-around register.
+ Reg < getNextVectorRegister(Reg, NumRegs - 1)) {
+ printRegName(O, Reg);
O << LayoutSuffix;
-
- if (i + 1 != NumRegs)
- O << ", ";
+ if (NumRegs > 1) {
+ // Set of two sve registers should be separated by ','
+ StringRef split_char = NumRegs == 2 ? ", " : " - ";
+ O << split_char;
+ printRegName(O, (getNextVectorRegister(Reg, NumRegs - 1)));
+ O << LayoutSuffix;
+ }
+ } else {
+ for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
+ // wrap-around sve register
+ if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
+ printRegName(O, Reg);
+ else
+ printRegName(O, Reg, AArch64::vreg);
+ O << LayoutSuffix;
+ if (i + 1 != NumRegs)
+ O << ", ";
+ }
}
-
O << " }";
}
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
index be3b844efb0e7..554f9b986b23d 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -13,8 +13,8 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind {
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: fmov s0, #1.00000000
-; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
-; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
+; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
+; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2]
; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: ptrue p0.d
@@ -60,8 +60,8 @@ define float @foo2(double* %x0, double* %x1) nounwind {
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x9, sp, #16
-; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
-; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
+; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
+; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: fmov s0, #1.00000000
@@ -118,8 +118,8 @@ define float @foo3(double* %x0, double* %x1, double* %x2) nounwind {
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: fmov s0, #1.00000000
-; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0]
-; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1]
+; CHECK-NEXT: ld4d { z2.d - z5.d }, p0/z, [x0]
+; CHECK-NEXT: ld3d { z16.d - z18.d }, p0/z, [x1]
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2]
; CHECK-NEXT: fmov s1, #2.00000000
; CHECK-NEXT: mov x0, sp
@@ -234,7 +234,7 @@ entry:
ret double %x0
}
-; Use AAVPCS, SVE register in z0-z7 used
+; Use AAVPCS, SVE register in z0 - z7 used
define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, <vscale x 4 x i32> %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, i32 * %ptr) nounwind {
; CHECK-LABEL: aavpcs1:
@@ -267,7 +267,7 @@ entry:
ret void
}
-; Use AAVPCS, SVE register in z0-z7 used
+; Use AAVPCS, SVE register in z0 - z7 used
define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, <vscale x 4 x float> %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12,<vscale x 4 x float> %s13,<vscale x 4 x float> %s14,<vscale x 4 x float> %s15,<vscale x 4 x float> %s16,float * %ptr) nounwind {
; CHECK-LABEL: aavpcs2:
@@ -306,7 +306,7 @@ entry:
ret void
}
-; Use AAVPCS, no SVE register in z0-z7 used (floats occupy z0-z7) but predicate arg is used
+; Use AAVPCS, no SVE register in z0 - z7 used (floats occupy z0 - z7) but predicate arg is used
define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, <vscale x 16 x i1> %p0, float * %ptr) nounwind {
; CHECK-LABEL: aavpcs3:
@@ -347,7 +347,7 @@ entry:
ret void
}
-; use AAVPCS, SVE register in z0-z7 used (i32s dont occupy z0-z7)
+; use AAVPCS, SVE register in z0 - z7 used (i32s dont occupy z0 - z7)
define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, <vscale x 4 x i32> %s17, i32 * %ptr) nounwind {
; CHECK-LABEL: aavpcs4:
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
index 8974022e0436f..9aac5d35d8b25 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
@@ -173,7 +173,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #3
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -185,7 +185,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8_lower_bound:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #-24
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -197,7 +197,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8_upper_bound:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #21
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -209,7 +209,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_01:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #4
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -221,7 +221,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_02:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #5
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -233,7 +233,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8_outside_lower_bound:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #-27
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -245,7 +245,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
; CHECK-LABEL: ld3.nxv48i8_outside_upper_bound:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #24
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -258,7 +258,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24
; CHECK-LABEL: ld3.nxv24i16:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #21
-; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8]
+; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 21
%base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -270,7 +270,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nx
; CHECK-LABEL: ld3.nxv24f16:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #21
-; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8]
+; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 21
%base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -282,7 +282,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @
; CHECK-LABEL: ld3.nxv24bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #-24
-; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8]
+; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -24
%base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -295,7 +295,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12
; CHECK-LABEL: ld3.nxv12i32:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #21
-; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8]
+; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 21
%base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -307,7 +307,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3
; CHECK-LABEL: ld3.nxv12f32:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #-24
-; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8]
+; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -24
%base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -320,7 +320,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i
; CHECK-LABEL: ld3.nxv6i64:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #21
-; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8]
+; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 21
%base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -332,7 +332,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @
; CHECK-LABEL: ld3.nxv6f64:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #-24
-; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8]
+; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -24
%base_ptr = bitcast <vscale x 2 x double>* %base to double *
@@ -345,7 +345,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-LABEL: ld4.nxv64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #4
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -357,7 +357,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-LABEL: ld4.nxv64i8_lower_bound:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #-32
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -369,7 +369,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-LABEL: ld4.nxv64i8_upper_bound:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #28
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -381,7 +381,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_01:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #5
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -393,7 +393,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_02:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #6
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -405,7 +405,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_03:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #7
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
%base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
@@ -420,7 +420,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-NEXT: mov x9, #-576
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
; xM = -9 * 2^6
@@ -439,7 +439,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK-NEXT: mov w9, #512
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8]
; CHECK-NEXT: ret
; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #16) #2)
; xM = 2^9
@@ -456,7 +456,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
; CHECK-LABEL: ld4.nxv32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #8
-; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8]
+; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
%base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
@@ -468,7 +468,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
; CHECK-LABEL: ld4.nxv32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #28
-; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8]
+; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 28
%base_ptr = bitcast <vscale x 8 x half>* %base to half *
@@ -480,7 +480,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
; CHECK-LABEL: ld4.nxv32bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #-32
-; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8]
+; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -32
%base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
@@ -493,7 +493,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
; CHECK-LABEL: ld4.nxv16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #28
-; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8]
+; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 28
%base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
@@ -505,7 +505,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
; CHECK-LABEL: ld4.nxv16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #-32
-; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8]
+; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -32
%base_ptr = bitcast <vscale x 4 x float>* %base to float *
@@ -518,7 +518,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
; CHECK-LABEL: ld4.nxv8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #28
-; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8]
+; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 28
%base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
@@ -530,7 +530,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
; CHECK-LABEL: ld4.nxv8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: addvl x8, x0, #-32
-; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8]
+; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -32
%base_ptr = bitcast <vscale x 2 x double>* %base to double *
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
index ce11d4729f6a6..1c54865bb50bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll
@@ -90,7 +90,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @ld2.nxv4f64(<vscale x 2
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv48i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1]
+; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x1]
; CHECK-NEXT: ret
%addr2 = getelementptr i8, i8 * %addr, i64 %a
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
@@ -101,7 +101,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv24i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%addr2 = getelementptr i16, i16 * %addr, i64 %a
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
@@ -111,7 +111,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nxv24f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv24f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%addr2 = getelementptr half, half * %addr, i64 %a
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> %Pg, half *%addr2)
@@ -121,7 +121,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nx
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
; CHECK-LABEL: ld3.nxv24bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%addr2 = getelementptr bfloat, bfloat * %addr, i64 %a
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
@@ -132,7 +132,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv12i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%addr2 = getelementptr i32, i32 * %addr, i64 %a
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
@@ -142,7 +142,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3.nxv12f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv12f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%addr2 = getelementptr float, float * %addr, i64 %a
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> %Pg, float *%addr2)
@@ -153,7 +153,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv6i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%addr2 = getelementptr i64, i64 * %addr, i64 %a
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
@@ -163,7 +163,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld3.nxv6f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
; CHECK-LABEL: ld3.nxv6f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%addr2 = getelementptr double, double * %addr, i64 %a
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> %Pg, double *%addr2)
@@ -174,7 +174,7 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv64i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1]
+; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x1]
; CHECK-NEXT: ret
%addr2 = getelementptr i8, i8 * %addr, i64 %a
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
@@ -185,7 +185,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld4.nxv32i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv32i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%addr2 = getelementptr i16, i16 * %addr, i64 %a
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
@@ -195,7 +195,7 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld4.nxv32f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv32f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%addr2 = getelementptr half, half * %addr, i64 %a
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> %Pg, half *%addr2)
@@ -205,7 +205,7 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
; CHECK-LABEL: ld4.nxv32bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%addr2 = getelementptr bfloat, bfloat * %addr, i64 %a
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
@@ -216,7 +216,7 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld4.nxv16i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv16i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%addr2 = getelementptr i32, i32 * %addr, i64 %a
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
@@ -226,7 +226,7 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld4.nxv16f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv16f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%addr2 = getelementptr float, float * %addr, i64 %a
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> %Pg, float *%addr2)
@@ -237,7 +237,7 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld4.nxv8i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv8i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%addr2 = getelementptr i64, i64 * %addr, i64 %a
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
@@ -247,7 +247,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld4.nxv8f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
; CHECK-LABEL: ld4.nxv8f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%addr2 = getelementptr double, double * %addr, i64 %a
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %Pg, double *%addr2)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index e6ca643182b58..3adafc98ef4a7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -217,7 +217,7 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -235,7 +235,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
; CHECK-NEXT: rdvl x8, #4
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -253,7 +253,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
; CHECK-NEXT: rdvl x8, #5
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -271,7 +271,7 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK-NEXT: rdvl x8, #-27
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -289,7 +289,7 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK-NEXT: rdvl x8, #24
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -306,7 +306,7 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -323,7 +323,7 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -344,7 +344,7 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0
call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -361,7 +361,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0
call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -382,7 +382,7 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
+; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0
call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -399,7 +399,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
+; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0
call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -420,7 +420,7 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0
call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -437,7 +437,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0
call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -459,7 +459,7 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -479,7 +479,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -499,7 +499,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -519,7 +519,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <v
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -542,7 +542,7 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
; xM = -9 * 2^6
@@ -569,7 +569,7 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
; xM = 2^9
@@ -592,7 +592,7 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -611,7 +611,7 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -634,7 +634,7 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0
call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -653,7 +653,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0
call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -676,7 +676,7 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
+; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0
call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -695,7 +695,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
+; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0
call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -718,7 +718,7 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0
call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -737,7 +737,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0
call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index 464cf97c57036..1d5b0011c20e7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -133,7 +133,7 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, i8* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -154,7 +154,7 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, i16* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -171,7 +171,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, half* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -192,7 +192,7 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, i32* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -209,7 +209,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, float* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -230,7 +230,7 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, i64* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -247,7 +247,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, double* %addr, i64 %offset
call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -269,7 +269,7 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, i8* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -292,7 +292,7 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, i16* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -311,7 +311,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, half* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -334,7 +334,7 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, i32* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -353,7 +353,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, float* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -376,7 +376,7 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, i64* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -395,7 +395,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, double* %addr, i64 %offset
call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index 556ecfe866f5e..3992ce3ff9262 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -154,7 +154,7 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0]
+; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
@@ -174,7 +174,7 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
@@ -190,7 +190,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
@@ -206,7 +206,7 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
@@ -226,7 +226,7 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0]
+; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
@@ -242,7 +242,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0]
+; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
@@ -262,7 +262,7 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
@@ -278,7 +278,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
@@ -294,7 +294,7 @@ define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
<vscale x 2 x i8*> %v1,
@@ -315,7 +315,7 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
@@ -337,7 +337,7 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
@@ -355,7 +355,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
@@ -373,7 +373,7 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
@@ -395,7 +395,7 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
@@ -413,7 +413,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
@@ -435,7 +435,7 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
@@ -453,7 +453,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
@@ -471,7 +471,7 @@ define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
<vscale x 2 x i8*> %v1,
diff --git a/llvm/test/CodeGen/AArch64/sve-ldN.mir b/llvm/test/CodeGen/AArch64/sve-ldN.mir
index c59c53da806ba..b6b89abc61bfc 100644
--- a/llvm/test/CodeGen/AArch64/sve-ldN.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ldN.mir
@@ -59,22 +59,22 @@ body: |
; CHECK-OFFSET-NEXT: ld2w { z0.s, z1.s }, p0/z, [sp, #14, mul vl]
; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp, #-16, mul vl]
; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp, #14, mul vl]
- ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [sp, #28, mul vl]
- ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [sp, #28, mul vl]
- ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [sp, #28, mul vl]
- ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [sp, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl sp, sp, #31
; CHECK-OFFSET-NEXT: addvl sp, sp, #1
; CHECK-OFFSET-NEXT: ldr x29, [sp], #16
@@ -195,37 +195,37 @@ body: |
; CHECK-OFFSET-NEXT: addvl x8, sp, #2
; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8, #14, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl sp, sp, #31
; CHECK-OFFSET-NEXT: addvl sp, sp, #1
; CHECK-OFFSET-NEXT: ldr x29, [sp], #16
diff --git a/llvm/test/CodeGen/AArch64/sve-stN.mir b/llvm/test/CodeGen/AArch64/sve-stN.mir
index ac5c036a10bd0..7371f30a4a512 100644
--- a/llvm/test/CodeGen/AArch64/sve-stN.mir
+++ b/llvm/test/CodeGen/AArch64/sve-stN.mir
@@ -59,22 +59,22 @@ body: |
; CHECK-OFFSET-NEXT: st2w { z0.s, z1.s }, p0, [sp, #14, mul vl]
; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [sp, #-16, mul vl]
; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [sp, #14, mul vl]
- ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [sp, #-24, mul vl]
- ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [sp, #21, mul vl]
- ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [sp, #28, mul vl]
- ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [sp, #28, mul vl]
- ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [sp, #28, mul vl]
- ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [sp, #-32, mul vl]
- ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [sp, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [sp, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [sp, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [sp, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [sp, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl sp, sp, #31
; CHECK-OFFSET-NEXT: addvl sp, sp, #1
; CHECK-OFFSET-NEXT: ldr x29, [sp], #16
@@ -195,37 +195,37 @@ body: |
; CHECK-OFFSET-NEXT: addvl x8, sp, #2
; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [x8, #14, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-3
- ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x8, #-24, mul vl]
+ ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [x8, #-24, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #3
- ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x8, #21, mul vl]
+ ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [x8, #21, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #-4
- ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x8, #-32, mul vl]
+ ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [x8, #-32, mul vl]
; CHECK-OFFSET-NEXT: addvl x8, sp, #4
- ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x8, #28, mul vl]
+ ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [x8, #28, mul vl]
; CHECK-OFFSET-NEXT: addvl sp, sp, #31
; CHECK-OFFSET-NEXT: addvl sp, sp, #1
; CHECK-OFFSET-NEXT: ldr x29, [sp], #16
diff --git a/llvm/test/MC/AArch64/SVE/ld3b.s b/llvm/test/MC/AArch64/SVE/ld3b.s
index f5d67870d145f..c986fff658b58 100644
--- a/llvm/test/MC/AArch64/SVE/ld3b.s
+++ b/llvm/test/MC/AArch64/SVE/ld3b.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-// CHECK-INST: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
+// CHECK-INST: ld3b { z0.b - z2.b }, p0/z, [x0, x0]
// CHECK-ENCODING: [0x00,0xc0,0x40,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a440c000 <unknown>
ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-// CHECK-INST: ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
+// CHECK-INST: ld3b { z5.b - z7.b }, p3/z, [x17, x16]
// CHECK-ENCODING: [0x25,0xce,0x50,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a450ce25 <unknown>
ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
-// CHECK-INST: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
+// CHECK-INST: ld3b { z0.b - z2.b }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x40,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a440e000 <unknown>
ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x48,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a448edb7 <unknown>
ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x45,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a445f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld3d.s b/llvm/test/MC/AArch64/SVE/ld3d.s
index 4a82a8a6feb2f..3daad4603ff61 100644
--- a/llvm/test/MC/AArch64/SVE/ld3d.s
+++ b/llvm/test/MC/AArch64/SVE/ld3d.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-// CHECK-INST: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
+// CHECK-INST: ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
// CHECK-ENCODING: [0x00,0xc0,0xc0,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5c0c000 <unknown>
ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-// CHECK-INST: ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
+// CHECK-INST: ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
// CHECK-ENCODING: [0x25,0xce,0xd0,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5d0ce25 <unknown>
ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-// CHECK-INST: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
+// CHECK-INST: ld3d { z0.d - z2.d }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xc0,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5c0e000 <unknown>
ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xc8,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5c8edb7 <unknown>
ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xc5,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5c5f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld3h.s b/llvm/test/MC/AArch64/SVE/ld3h.s
index e323623f1a4c1..9470a94b579cd 100644
--- a/llvm/test/MC/AArch64/SVE/ld3h.s
+++ b/llvm/test/MC/AArch64/SVE/ld3h.s
@@ -10,31 +10,37 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-// CHECK-INST: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
+// CHECK-INST: ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
// CHECK-ENCODING: [0x00,0xc0,0xc0,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4c0c000 <unknown>
ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-// CHECK-INST: ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
+// CHECK-INST: ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
// CHECK-ENCODING: [0x25,0xce,0xd0,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4d0ce25 <unknown>
ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-// CHECK-INST: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
+// CHECK-INST: ld3h { z0.h - z2.h }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xc0,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4c0e000 <unknown>
ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xc8,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4c8edb7 <unknown>
ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xc5,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4c5f555 <unknown>
+
+ld3h { z30.h, z31.h, z0.h }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3h { z30.h, z31.h, z0.h }, p5/z, [x10, #15, mul vl]
+// CHECK-ENCODING: [0x5e,0xf5,0xc5,0xa4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: a4c5f55e <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld3w.s b/llvm/test/MC/AArch64/SVE/ld3w.s
index a7c8debc059ab..e8f3f55a5f6ff 100644
--- a/llvm/test/MC/AArch64/SVE/ld3w.s
+++ b/llvm/test/MC/AArch64/SVE/ld3w.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-// CHECK-INST: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
+// CHECK-INST: ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
// CHECK-ENCODING: [0x00,0xc0,0x40,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a540c000 <unknown>
ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-// CHECK-INST: ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
+// CHECK-INST: ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
// CHECK-ENCODING: [0x25,0xce,0x50,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a550ce25 <unknown>
ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-// CHECK-INST: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
+// CHECK-INST: ld3w { z0.s - z2.s }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x40,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a540e000 <unknown>
ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-// CHECK-INST: ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
+// CHECK-INST: ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x48,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a548edb7 <unknown>
ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-// CHECK-INST: ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+// CHECK-INST: ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x45,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a545f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld4b.s b/llvm/test/MC/AArch64/SVE/ld4b.s
index de4a58bbc1320..bfb576df84043 100644
--- a/llvm/test/MC/AArch64/SVE/ld4b.s
+++ b/llvm/test/MC/AArch64/SVE/ld4b.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-// CHECK-INST: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
+// CHECK-INST: ld4b { z0.b - z3.b }, p0/z, [x0, x0]
// CHECK-ENCODING: [0x00,0xc0,0x60,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a460c000 <unknown>
ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-// CHECK-INST: ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
+// CHECK-INST: ld4b { z5.b - z8.b }, p3/z, [x17, x16]
// CHECK-ENCODING: [0x25,0xce,0x70,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a470ce25 <unknown>
ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-// CHECK-INST: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+// CHECK-INST: ld4b { z0.b - z3.b }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x60,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a460e000 <unknown>
ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x68,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a468edb7 <unknown>
ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x65,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a465f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld4d.s b/llvm/test/MC/AArch64/SVE/ld4d.s
index 034a446de5254..59b72f98a6218 100644
--- a/llvm/test/MC/AArch64/SVE/ld4d.s
+++ b/llvm/test/MC/AArch64/SVE/ld4d.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-// CHECK-INST: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
+// CHECK-INST: ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
// CHECK-ENCODING: [0x00,0xc0,0xe0,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5e0c000 <unknown>
ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-// CHECK-INST: ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
+// CHECK-INST: ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
// CHECK-ENCODING: [0x25,0xce,0xf0,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5f0ce25 <unknown>
ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-// CHECK-INST: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+// CHECK-INST: ld4d { z0.d - z3.d }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xe0,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5e0e000 <unknown>
ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xe8,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5e8edb7 <unknown>
ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xe5,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a5e5f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld4h.s b/llvm/test/MC/AArch64/SVE/ld4h.s
index 6b6371b8fe2a6..3df9bd0ccb26e 100644
--- a/llvm/test/MC/AArch64/SVE/ld4h.s
+++ b/llvm/test/MC/AArch64/SVE/ld4h.s
@@ -10,31 +10,37 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-// CHECK-INST: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
+// CHECK-INST: ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
// CHECK-ENCODING: [0x00,0xc0,0xe0,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4e0c000 <unknown>
ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-// CHECK-INST: ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
+// CHECK-INST: ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
// CHECK-ENCODING: [0x25,0xce,0xf0,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4f0ce25 <unknown>
ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-// CHECK-INST: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+// CHECK-INST: ld4h { z0.h - z3.h }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xe0,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4e0e000 <unknown>
ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xe8,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4e8edb7 <unknown>
ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xe5,0xa4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a4e5f555 <unknown>
+
+ld4h { z31.h, z0.h, z1.h, z2.h }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4h { z31.h, z0.h, z1.h, z2.h }, p5/z, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x5f,0xf5,0xe5,0xa4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: a4e5f55f <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/ld4w.s b/llvm/test/MC/AArch64/SVE/ld4w.s
index c5cf15b62e469..2b03447945925 100644
--- a/llvm/test/MC/AArch64/SVE/ld4w.s
+++ b/llvm/test/MC/AArch64/SVE/ld4w.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-// CHECK-INST: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
+// CHECK-INST: ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
// CHECK-ENCODING: [0x00,0xc0,0x60,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a560c000 <unknown>
ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
-// CHECK-INST: ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+// CHECK-INST: ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
// CHECK-ENCODING: [0x25,0xce,0x70,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a570ce25 <unknown>
ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-// CHECK-INST: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+// CHECK-INST: ld4w { z0.s - z3.s }, p0/z, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x60,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a560e000 <unknown>
ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-// CHECK-INST: ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x68,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a568edb7 <unknown>
ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-// CHECK-INST: ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x65,0xa5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: a565f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st3b.s b/llvm/test/MC/AArch64/SVE/st3b.s
index 7049a2da9b98b..7ed86472103f6 100644
--- a/llvm/test/MC/AArch64/SVE/st3b.s
+++ b/llvm/test/MC/AArch64/SVE/st3b.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
-// CHECK-INST: st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
+// CHECK-INST: st3b { z0.b - z2.b }, p0, [x0, x0]
// CHECK-ENCODING: [0x00,0x60,0x40,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4406000 <unknown>
st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
-// CHECK-INST: st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
+// CHECK-INST: st3b { z5.b - z7.b }, p3, [x17, x16]
// CHECK-ENCODING: [0x25,0x6e,0x50,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4506e25 <unknown>
st3b { z0.b, z1.b, z2.b }, p0, [x0]
-// CHECK-INST: st3b { z0.b, z1.b, z2.b }, p0, [x0]
+// CHECK-INST: st3b { z0.b - z2.b }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x50,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e450e000 <unknown>
st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x58,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e458edb7 <unknown>
st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x55,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e455f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st3d.s b/llvm/test/MC/AArch64/SVE/st3d.s
index 0a2285e7f0d47..e020906ce076c 100644
--- a/llvm/test/MC/AArch64/SVE/st3d.s
+++ b/llvm/test/MC/AArch64/SVE/st3d.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-// CHECK-INST: st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
+// CHECK-INST: st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
// CHECK-ENCODING: [0x00,0x60,0xc0,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5c06000 <unknown>
st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-// CHECK-INST: st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
+// CHECK-INST: st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
// CHECK-ENCODING: [0x25,0x6e,0xd0,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5d06e25 <unknown>
st3d { z0.d, z1.d, z2.d }, p0, [x0]
-// CHECK-INST: st3d { z0.d, z1.d, z2.d }, p0, [x0]
+// CHECK-INST: st3d { z0.d - z2.d }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xd0,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5d0e000 <unknown>
st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xd8,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5d8edb7 <unknown>
st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xd5,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5d5f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st3h.s b/llvm/test/MC/AArch64/SVE/st3h.s
index c0a00d0bb87dc..0b79a23f70ab7 100644
--- a/llvm/test/MC/AArch64/SVE/st3h.s
+++ b/llvm/test/MC/AArch64/SVE/st3h.s
@@ -10,31 +10,37 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-// CHECK-INST: st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
+// CHECK-INST: st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
// CHECK-ENCODING: [0x00,0x60,0xc0,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4c06000 <unknown>
st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-// CHECK-INST: st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
+// CHECK-INST: st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
// CHECK-ENCODING: [0x25,0x6e,0xd0,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4d06e25 <unknown>
st3h { z0.h, z1.h, z2.h }, p0, [x0]
-// CHECK-INST: st3h { z0.h, z1.h, z2.h }, p0, [x0]
+// CHECK-INST: st3h { z0.h - z2.h }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xd0,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4d0e000 <unknown>
st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xd8,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4d8edb7 <unknown>
st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xd5,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4d5f555 <unknown>
+
+st3h { z31.h, z0.h, z1.h }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3h { z31.h, z0.h, z1.h }, p5, [x10, #15, mul vl]
+// CHECK-ENCODING: [0x5f,0xf5,0xd5,0xe4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: e4d5f55f <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st3w.s b/llvm/test/MC/AArch64/SVE/st3w.s
index 7738c0e8cd0a1..8ff579e7c9650 100644
--- a/llvm/test/MC/AArch64/SVE/st3w.s
+++ b/llvm/test/MC/AArch64/SVE/st3w.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-// CHECK-INST: st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
+// CHECK-INST: st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
// CHECK-ENCODING: [0x00,0x60,0x40,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5406000 <unknown>
st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-// CHECK-INST: st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
+// CHECK-INST: st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
// CHECK-ENCODING: [0x25,0x6e,0x50,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5506e25 <unknown>
st3w { z0.s, z1.s, z2.s }, p0, [x0]
-// CHECK-INST: st3w { z0.s, z1.s, z2.s }, p0, [x0]
+// CHECK-INST: st3w { z0.s - z2.s }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x50,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e550e000 <unknown>
st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-// CHECK-INST: st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
+// CHECK-INST: st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x58,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e558edb7 <unknown>
st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-// CHECK-INST: st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+// CHECK-INST: st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x55,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e555f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st4b.s b/llvm/test/MC/AArch64/SVE/st4b.s
index 115e8e0fc605c..86c80d7f468c9 100644
--- a/llvm/test/MC/AArch64/SVE/st4b.s
+++ b/llvm/test/MC/AArch64/SVE/st4b.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-// CHECK-INST: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
+// CHECK-INST: st4b { z0.b - z3.b }, p0, [x0, x0]
// CHECK-ENCODING: [0x00,0x60,0x60,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4606000 <unknown>
st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-// CHECK-INST: st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
+// CHECK-INST: st4b { z5.b - z8.b }, p3, [x17, x16]
// CHECK-ENCODING: [0x25,0x6e,0x70,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4706e25 <unknown>
st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-// CHECK-INST: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+// CHECK-INST: st4b { z0.b - z3.b }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x70,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e470e000 <unknown>
st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x78,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e478edb7 <unknown>
st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x75,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e475f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st4d.s b/llvm/test/MC/AArch64/SVE/st4d.s
index d1890839f571e..17cacea3e4f81 100644
--- a/llvm/test/MC/AArch64/SVE/st4d.s
+++ b/llvm/test/MC/AArch64/SVE/st4d.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-// CHECK-INST: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
+// CHECK-INST: st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
// CHECK-ENCODING: [0x00,0x60,0xe0,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5e06000 <unknown>
st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-// CHECK-INST: st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
+// CHECK-INST: st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
// CHECK-ENCODING: [0x25,0x6e,0xf0,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5f06e25 <unknown>
st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-// CHECK-INST: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+// CHECK-INST: st4d { z0.d - z3.d }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xf0,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5f0e000 <unknown>
st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xf8,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5f8edb7 <unknown>
st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xf5,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5f5f555 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st4h.s b/llvm/test/MC/AArch64/SVE/st4h.s
index 1feb6f3ddf9ac..7587724ce4edd 100644
--- a/llvm/test/MC/AArch64/SVE/st4h.s
+++ b/llvm/test/MC/AArch64/SVE/st4h.s
@@ -10,31 +10,37 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-// CHECK-INST: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
+// CHECK-INST: st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
// CHECK-ENCODING: [0x00,0x60,0xe0,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4e06000 <unknown>
st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-// CHECK-INST: st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
+// CHECK-INST: st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
// CHECK-ENCODING: [0x25,0x6e,0xf0,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4f06e25 <unknown>
st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-// CHECK-INST: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+// CHECK-INST: st4h { z0.h - z3.h }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0xf0,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4f0e000 <unknown>
st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0xf8,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4f8edb7 <unknown>
st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0xf5,0xe4]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e4f5f555 <unknown>
+
+st4h { z29.h, z30.h, z31.h, z0.h }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4h { z29.h, z30.h, z31.h, z0.h }, p5, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x5d,0xf5,0xf5,0xe4]
+// CHECK-ERROR: instruction requires: sve or sme
+// CHECK-UNKNOWN: e4f5f55d <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st4w.s b/llvm/test/MC/AArch64/SVE/st4w.s
index 278b9652fbe06..5d6b31573639f 100644
--- a/llvm/test/MC/AArch64/SVE/st4w.s
+++ b/llvm/test/MC/AArch64/SVE/st4w.s
@@ -10,31 +10,31 @@
// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-// CHECK-INST: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
+// CHECK-INST: st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
// CHECK-ENCODING: [0x00,0x60,0x60,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5606000 <unknown>
st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
-// CHECK-INST: st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+// CHECK-INST: st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
// CHECK-ENCODING: [0x25,0x6e,0x70,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e5706e25 <unknown>
st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-// CHECK-INST: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+// CHECK-INST: st4w { z0.s - z3.s }, p0, [x0]
// CHECK-ENCODING: [0x00,0xe0,0x70,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e570e000 <unknown>
st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-// CHECK-INST: st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
+// CHECK-INST: st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
// CHECK-ENCODING: [0xb7,0xed,0x78,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e578edb7 <unknown>
st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-// CHECK-INST: st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+// CHECK-INST: st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
// CHECK-ENCODING: [0x55,0xf5,0x75,0xe5]
// CHECK-ERROR: instruction requires: sve or sme
// CHECK-UNKNOWN: e575f555 <unknown>
diff --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
index 0f1d46f43bd4b..ee1eb521a1e2b 100644
--- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
@@ -3568,46 +3568,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 11 1.00 * U ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: 2 11 1.00 * U ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
# CHECK-NEXT: 3 12 1.50 * U ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 4 12 2.00 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 3 11 1.50 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT: 3 11 1.50 * U ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 3 11 1.50 * U ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 12 2.00 * U ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 6.50 * U ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 4 12 2.00 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 3 11 1.50 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT: 3 11 1.50 * U ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 3 11 1.50 * U ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 12 2.00 * U ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 5 12 2.50 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 11 2.00 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT: 4 11 2.00 * U ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 11 2.00 * U ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 12 2.50 * U ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 8.50 * U ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 5 12 2.50 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 11 2.00 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT: 4 11 2.00 * U ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 11 2.00 * U ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 12 2.50 * U ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 4 15 6.50 * U ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 6.50 * U ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 6.50 * U ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 4 12 2.00 * U ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 3 11 1.50 * U ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 3 11 1.50 * U ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 1.50 * U ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 12 2.00 * U ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 4 15 6.50 * U ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 6.50 * U ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 6.50 * U ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 12 2.00 * U ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 3 11 1.50 * U ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 3 11 1.50 * U ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 1.50 * U ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 12 2.00 * U ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 5 15 8.50 * U ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 8.50 * U ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 8.50 * U ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 5 12 2.50 * U ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 11 2.00 * U ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 4 11 2.00 * U ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 11 2.00 * U ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 12 2.50 * U ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 5 15 8.50 * U ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 8.50 * U ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 8.50 * U ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 5 12 2.50 * U ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 11 2.00 * U ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 4 11 2.00 * U ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 11 2.00 * U ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 12 2.50 * U ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: 1 11 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: 1 16 2.00 * U ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 11 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0]
@@ -4580,46 +4580,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 3 12 2.00 * U st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
# CHECK-NEXT: 3 12 2.00 * U st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
# CHECK-NEXT: 2 11 2.00 * U st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT: 3 11 3.00 * U st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 12 3.00 * U st3d { z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT: 4 12 3.00 * U st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 12 3.00 * U st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 11 3.00 * U st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 4 15 12.00 * U st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 11 3.00 * U st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 12 3.00 * U st3w { z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT: 4 12 3.00 * U st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 4 12 3.00 * U st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 11 3.00 * U st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT: 4 11 4.00 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 5 12 4.00 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT: 5 12 4.00 * U st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 12 4.00 * U st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 4 11 4.00 * U st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 5 15 16.00 * U st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 4 11 4.00 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 5 12 4.00 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT: 5 12 4.00 * U st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 5 12 4.00 * U st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 4 11 4.00 * U st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 4 15 12.00 * U st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 12.00 * U st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 12.00 * U st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 3 11 3.00 * U st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 12 3.00 * U st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 4 12 3.00 * U st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 12 3.00 * U st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 11 3.00 * U st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 4 15 12.00 * U st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 15 12.00 * U st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 15 12.00 * U st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 11 3.00 * U st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 12 3.00 * U st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 4 12 3.00 * U st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 4 12 3.00 * U st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 11 3.00 * U st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 5 15 16.00 * U st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 16.00 * U st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 16.00 * U st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 4 11 4.00 * U st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 5 12 4.00 * U st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 5 12 4.00 * U st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 12 4.00 * U st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 11 4.00 * U st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 5 15 16.00 * U st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 15 16.00 * U st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 5 15 16.00 * U st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 11 4.00 * U st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 12 4.00 * U st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 5 12 4.00 * U st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 5 12 4.00 * U st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 11 4.00 * U st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: 1 11 1.00 * stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 1 11 1.00 * stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: 1 11 1.00 * stnt1b { z21.b }, p5, [x10, #7, mul vl]
@@ -6080,46 +6080,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - 1.00 1.00 - - - - - ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: - 1.00 1.00 - - - - - ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
# CHECK-NEXT: - 1.50 1.50 - - - - - ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: - 0.50 0.50 - - - - - ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: - 2.00 2.00 - - 1.00 - - ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - 0.50 0.50 - - - - - ldff1b { z0.h }, p0/z, [x0, x0]
@@ -7092,46 +7092,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - 1.50 1.50 - - 2.00 - - st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
# CHECK-NEXT: - 1.50 1.50 - - 2.00 - - st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
# CHECK-NEXT: - 1.00 1.00 - - 2.00 - - st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: - 0.50 0.50 - - 1.00 - - stnt1b { z21.b }, p5, [x10, #7, mul vl]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index 062ac80bd718b..5ba286f21ba6e 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -4624,46 +4624,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 8 0.50 * U ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: 2 8 0.50 * U ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
# CHECK-NEXT: 2 9 0.50 * U ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 3 10 0.67 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 3 10 0.67 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 9 0.50 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 10 0.67 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 9 0.50 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 10 0.67 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 9 0.50 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.50 * U ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 9 0.50 * U ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 10 0.67 * U ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 6 10 1.00 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 6 10 1.00 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 9 1.00 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 6 10 1.00 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 9 1.00 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 6 10 1.00 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 9 1.00 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT: 4 9 1.00 * U ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 4 9 1.00 * U ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 6 10 1.00 * U ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 10 0.67 * U ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 9 0.50 * U ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * U ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * U ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * U ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 3 10 0.67 * U ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 9 0.50 * U ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * U ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * U ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * U ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 10 0.67 * U ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 9 0.50 * U ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * U ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * U ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * U ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 10 0.67 * U ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 9 0.50 * U ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 2 9 0.50 * U ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 9 0.50 * U ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 10 0.67 * U ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 6 10 1.00 * U ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 4 9 1.00 * U ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * U ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * U ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * U ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 6 10 1.00 * U ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 9 1.00 * U ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * U ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * U ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * U ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 6 10 1.00 * U ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 9 1.00 * U ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * U ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * U ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * U ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 6 10 1.00 * U ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 9 1.00 * U ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * U ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 4 9 1.00 * U ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 6 10 1.00 * U ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: 4 9 1.00 * U ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0]
@@ -6124,46 +6124,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 4 0.50 * U st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
# CHECK-NEXT: 2 4 0.50 * U st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
# CHECK-NEXT: 2 4 0.50 * U st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 15 7 2.50 * U st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT: 10 7 2.50 * U st3b { z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT: 15 7 2.50 * U st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 10 7 2.50 * U st3d { z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 15 7 2.50 * U st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 10 7 2.50 * U st3h { z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 15 7 2.50 * U st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 10 7 2.50 * U st3w { z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * U st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * U st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * U st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 27 11 4.50 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT: 18 11 4.50 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT: 27 11 4.50 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 18 11 4.50 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 27 11 4.50 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 18 11 4.50 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 27 11 4.50 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 18 11 4.50 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT: 18 11 4.50 * U st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 11 4.50 * U st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * U st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 15 7 2.50 * U st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 10 7 2.50 * U st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * U st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * U st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * U st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 15 7 2.50 * U st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 10 7 2.50 * U st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * U st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * U st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * U st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 15 7 2.50 * U st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 10 7 2.50 * U st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * U st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * U st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * U st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 15 7 2.50 * U st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 10 7 2.50 * U st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 10 7 2.50 * U st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 10 7 2.50 * U st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 15 7 2.50 * U st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 27 11 4.50 * U st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 18 11 4.50 * U st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * U st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * U st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * U st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 27 11 4.50 * U st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 18 11 4.50 * U st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * U st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * U st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * U st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 27 11 4.50 * U st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 18 11 4.50 * U st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * U st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * U st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * U st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 27 11 4.50 * U st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 18 11 4.50 * U st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 18 11 4.50 * U st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 18 11 4.50 * U st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 27 11 4.50 * U st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.d }, p0, [z1.d]
@@ -8055,46 +8055,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - 0.50 0.50 - - ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - 0.50 0.50 - - ldff1b { z0.h }, p0/z, [x0, x0]
@@ -9555,46 +9555,46 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z0.b, z1.b, z2.b }, p0, [x0]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z0.d, z1.d, z2.d }, p0, [x0]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z0.h, z1.h, z2.h }, p0, [x0]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z0.s, z1.s, z2.s }, p0, [x0]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 stnt1b { z0.d }, p0, [z1.d]
More information about the llvm-commits
mailing list