[llvm] 56ebe64 - [AArch64] Enable aggressivelyPreferBuildVectorSources (#142729)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 6 09:03:15 PDT 2025
Author: David Green
Date: 2025-06-06T17:03:10+01:00
New Revision: 56ebe64ce69adde8b10793de7aa571df00c75e08
URL: https://github.com/llvm/llvm-project/commit/56ebe64ce69adde8b10793de7aa571df00c75e08
DIFF: https://github.com/llvm/llvm-project/commit/56ebe64ce69adde8b10793de7aa571df00c75e08.diff
LOG: [AArch64] Enable aggressivelyPreferBuildVectorSources (#142729)
This helps to remove some inefficient buildvector lowering by converting
extract_vector_elt(buildvector) to the original source.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
llvm/test/CodeGen/AArch64/fptrunc.ll
llvm/test/CodeGen/AArch64/itofp.ll
llvm/test/CodeGen/AArch64/sext.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 7b7f020f7c771..cc59e43e0622e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -449,6 +449,10 @@ class AArch64TargetLowering : public TargetLowering {
/// Enable aggressive FMA fusion on targets that want it.
bool enableAggressiveFMAFusion(EVT VT) const override;
+ bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
+ return true;
+ }
+
/// Returns the size of the platform's va_list object.
unsigned getVaListSizeInBits(const DataLayout &DL) const override;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 93f6051c3bd3b..5189582d0b6ac 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -55,13 +55,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
; CHECK-NEXT: %arg2:_(s64) = COPY $x1
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
- ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
- ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64)
- ; CHECK-NEXT: $x0 = COPY %extract(s64)
- ; CHECK-NEXT: $x1 = COPY %extract2(s64)
+ ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+ ; CHECK-NEXT: $x1 = COPY %arg2(s64)
; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index e2933690c7c55..e81447a1de4b6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -175,10 +175,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
; CHECK-NEXT: %arg2:_(s64) = COPY $x1
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
- ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
- ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: $x0 = COPY %arg1(s64)
; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
index c000a8e635bc6..86c0575961a17 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -278,8 +278,8 @@ body: |
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
; CHECK-NEXT: RET_ReallyLR
%3:_(s8) = G_CONSTANT i8 127
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index a33b1ef569fc3..04dfdedb42752 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -726,7 +726,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-LABEL: test_signed_v3f128_v3i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #128
-; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x30, x23, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 128
@@ -734,13 +734,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
+; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w30, -48
-; CHECK-SD-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT: mov v2.16b, v1.16b
+; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
-; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: stp q2, q1, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -755,15 +755,15 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT: mov w22, #2147483647 // =0x7fffffff
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: csel w19, w21, w19, gt
+; CHECK-SD-NEXT: csel w19, w22, w19, gt
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: bl __unordtf2
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: csel w22, wzr, w19, ne
+; CHECK-SD-NEXT: csel w21, wzr, w19, ne
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
@@ -775,16 +775,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: csel w19, w21, w19, gt
+; CHECK-SD-NEXT: csel w19, w22, w19, gt
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: bl __unordtf2
-; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: csel w8, wzr, w19, ne
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: mov v0.s[1], w22
-; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csel w23, wzr, w19, ne
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
@@ -796,16 +793,17 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: csel w19, w21, w19, gt
+; CHECK-SD-NEXT: csel w19, w22, w19, gt
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: bl __unordtf2
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-SD-NEXT: csel w8, wzr, w19, ne
; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: mov v0.s[1], w23
+; CHECK-SD-NEXT: ldp x30, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.s[2], w21
; CHECK-SD-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: add sp, sp, #128
; CHECK-SD-NEXT: ret
;
@@ -4890,63 +4888,61 @@ define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) {
; CHECK-SD-LABEL: test_signed_v8f64_v8i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov d4, v3.d[1]
-; CHECK-SD-NEXT: fcvtzs w11, d3
-; CHECK-SD-NEXT: mov w9, #127 // =0x7f
-; CHECK-SD-NEXT: mov d3, v1.d[1]
-; CHECK-SD-NEXT: fcvtzs w13, d2
-; CHECK-SD-NEXT: fcvtzs w15, d1
-; CHECK-SD-NEXT: fcvtzs w17, d0
-; CHECK-SD-NEXT: fcvtzs w8, d4
-; CHECK-SD-NEXT: mov d4, v2.d[1]
-; CHECK-SD-NEXT: mov d2, v0.d[1]
-; CHECK-SD-NEXT: fcvtzs w14, d3
-; CHECK-SD-NEXT: cmp w8, #127
-; CHECK-SD-NEXT: fcvtzs w12, d4
-; CHECK-SD-NEXT: fcvtzs w16, d2
-; CHECK-SD-NEXT: csel w10, w8, w9, lt
-; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT: mov d4, v0.d[1]
+; CHECK-SD-NEXT: fcvtzs w10, d0
+; CHECK-SD-NEXT: mov w8, #127 // =0x7f
+; CHECK-SD-NEXT: fcvtzs w12, d1
+; CHECK-SD-NEXT: mov d1, v1.d[1]
+; CHECK-SD-NEXT: fcvtzs w9, d4
+; CHECK-SD-NEXT: cmp w9, #127
+; CHECK-SD-NEXT: csel w11, w9, w8, lt
+; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: cmn w10, #128
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: cmp w12, #127
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: mov v0.b[1], w11
+; CHECK-SD-NEXT: csel w11, w12, w8, lt
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: mov v0.b[2], w11
+; CHECK-SD-NEXT: fcvtzs w11, d2
; CHECK-SD-NEXT: cmn w10, #128
-; CHECK-SD-NEXT: csel w10, w10, w8, gt
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: csel w11, w11, w9, lt
+; CHECK-SD-NEXT: mov v0.b[3], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v3.d[1]
; CHECK-SD-NEXT: cmn w11, #128
-; CHECK-SD-NEXT: csel w11, w11, w8, gt
-; CHECK-SD-NEXT: cmp w12, #127
-; CHECK-SD-NEXT: csel w12, w12, w9, lt
-; CHECK-SD-NEXT: fmov s3, w11
-; CHECK-SD-NEXT: cmn w12, #128
-; CHECK-SD-NEXT: csel w12, w12, w8, gt
-; CHECK-SD-NEXT: cmp w13, #127
-; CHECK-SD-NEXT: csel w13, w13, w9, lt
-; CHECK-SD-NEXT: mov v3.s[1], w10
-; CHECK-SD-NEXT: cmn w13, #128
-; CHECK-SD-NEXT: csel w13, w13, w8, gt
-; CHECK-SD-NEXT: cmp w14, #127
-; CHECK-SD-NEXT: csel w14, w14, w9, lt
-; CHECK-SD-NEXT: fmov s2, w13
-; CHECK-SD-NEXT: cmn w14, #128
-; CHECK-SD-NEXT: csel w14, w14, w8, gt
-; CHECK-SD-NEXT: cmp w15, #127
-; CHECK-SD-NEXT: csel w15, w15, w9, lt
-; CHECK-SD-NEXT: mov v2.s[1], w12
-; CHECK-SD-NEXT: cmn w15, #128
-; CHECK-SD-NEXT: csel w15, w15, w8, gt
-; CHECK-SD-NEXT: cmp w16, #127
-; CHECK-SD-NEXT: csel w11, w16, w9, lt
-; CHECK-SD-NEXT: fmov s1, w15
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.b[4], w11
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: fcvtzs w11, d3
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: cmn w10, #128
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: mov v0.b[5], w10
+; CHECK-SD-NEXT: cmp w11, #127
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
; CHECK-SD-NEXT: cmn w11, #128
-; CHECK-SD-NEXT: csel w10, w11, w8, gt
-; CHECK-SD-NEXT: cmp w17, #127
-; CHECK-SD-NEXT: csel w9, w17, w9, lt
-; CHECK-SD-NEXT: mov v1.s[1], w14
-; CHECK-SD-NEXT: cmn w9, #128
-; CHECK-SD-NEXT: csel w8, w9, w8, gt
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: adrp x8, .LCPI82_0
-; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT: mov v0.s[1], w10
-; CHECK-SD-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.b[6], w11
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w8, w10, w8, lt
+; CHECK-SD-NEXT: cmn w8, #128
+; CHECK-SD-NEXT: csel w8, w8, w9, gt
+; CHECK-SD-NEXT: mov v0.b[7], w8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_signed_v8f64_v8i8:
@@ -4990,11 +4986,9 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
; CHECK-SD-NEXT: mov d16, v0.d[1]
; CHECK-SD-NEXT: fcvtzs w10, d0
; CHECK-SD-NEXT: mov w8, #127 // =0x7f
-; CHECK-SD-NEXT: mov d0, v1.d[1]
-; CHECK-SD-NEXT: fcvtzs w13, d1
-; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: fcvtzs w12, d1
+; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: fcvtzs w9, d16
-; CHECK-SD-NEXT: fcvtzs w12, d0
; CHECK-SD-NEXT: cmp w9, #127
; CHECK-SD-NEXT: csel w11, w9, w8, lt
; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80
@@ -5006,115 +5000,94 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
; CHECK-SD-NEXT: csel w10, w10, w9, gt
; CHECK-SD-NEXT: cmp w12, #127
; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: csel w10, w12, w8, lt
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: mov v0.b[1], w11
+; CHECK-SD-NEXT: csel w11, w12, w8, lt
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: mov v0.b[2], w11
+; CHECK-SD-NEXT: fcvtzs w11, d2
; CHECK-SD-NEXT: cmn w10, #128
; CHECK-SD-NEXT: csel w10, w10, w9, gt
-; CHECK-SD-NEXT: cmp w13, #127
-; CHECK-SD-NEXT: csel w12, w13, w8, lt
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: fcvtzs w11, d1
-; CHECK-SD-NEXT: cmn w12, #128
-; CHECK-SD-NEXT: csel w12, w12, w9, gt
-; CHECK-SD-NEXT: fmov s1, w12
-; CHECK-SD-NEXT: fcvtzs w12, d2
-; CHECK-SD-NEXT: mov d2, v3.d[1]
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: mov w13, v0.s[1]
-; CHECK-SD-NEXT: mov v1.s[1], w10
-; CHECK-SD-NEXT: csel w10, w11, w8, lt
+; CHECK-SD-NEXT: mov v0.b[3], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v3.d[1]
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.b[4], w11
+; CHECK-SD-NEXT: fcvtzs w11, d3
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
; CHECK-SD-NEXT: cmn w10, #128
-; CHECK-SD-NEXT: fcvtzs w11, d2
; CHECK-SD-NEXT: csel w10, w10, w9, gt
-; CHECK-SD-NEXT: cmp w12, #127
-; CHECK-SD-NEXT: mov v0.b[1], w13
-; CHECK-SD-NEXT: csel w12, w12, w8, lt
-; CHECK-SD-NEXT: cmn w12, #128
-; CHECK-SD-NEXT: mov w13, v1.s[1]
-; CHECK-SD-NEXT: csel w12, w12, w9, gt
+; CHECK-SD-NEXT: mov v0.b[5], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: fmov s2, w12
-; CHECK-SD-NEXT: fcvtzs w12, d3
-; CHECK-SD-NEXT: mov d3, v4.d[1]
-; CHECK-SD-NEXT: mov v0.b[2], v1.b[0]
-; CHECK-SD-NEXT: mov v2.s[1], w10
-; CHECK-SD-NEXT: csel w10, w11, w8, lt
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v4.d[1]
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.b[6], w11
+; CHECK-SD-NEXT: fcvtzs w11, d4
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
; CHECK-SD-NEXT: cmn w10, #128
-; CHECK-SD-NEXT: fcvtzs w11, d3
; CHECK-SD-NEXT: csel w10, w10, w9, gt
-; CHECK-SD-NEXT: cmp w12, #127
-; CHECK-SD-NEXT: mov v0.b[3], w13
-; CHECK-SD-NEXT: csel w12, w12, w8, lt
-; CHECK-SD-NEXT: cmn w12, #128
-; CHECK-SD-NEXT: mov w13, v2.s[1]
-; CHECK-SD-NEXT: csel w12, w12, w9, gt
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: fmov s3, w12
-; CHECK-SD-NEXT: fcvtzs w12, d4
-; CHECK-SD-NEXT: mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT: mov d4, v5.d[1]
-; CHECK-SD-NEXT: mov v3.s[1], w10
-; CHECK-SD-NEXT: csel w10, w11, w8, lt
+; CHECK-SD-NEXT: mov v0.b[7], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v5.d[1]
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.b[8], w11
+; CHECK-SD-NEXT: fcvtzs w11, d5
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
; CHECK-SD-NEXT: cmn w10, #128
-; CHECK-SD-NEXT: mov v0.b[5], w13
; CHECK-SD-NEXT: csel w10, w10, w9, gt
-; CHECK-SD-NEXT: cmp w12, #127
-; CHECK-SD-NEXT: fcvtzs w11, d4
-; CHECK-SD-NEXT: csel w12, w12, w8, lt
-; CHECK-SD-NEXT: cmn w12, #128
-; CHECK-SD-NEXT: mov w13, v3.s[1]
-; CHECK-SD-NEXT: csel w12, w12, w9, gt
-; CHECK-SD-NEXT: mov v0.b[6], v3.b[0]
-; CHECK-SD-NEXT: fmov s4, w12
-; CHECK-SD-NEXT: fcvtzs w12, d5
+; CHECK-SD-NEXT: mov v0.b[9], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: mov d5, v6.d[1]
-; CHECK-SD-NEXT: mov v4.s[1], w10
-; CHECK-SD-NEXT: csel w10, w11, w8, lt
-; CHECK-SD-NEXT: mov v0.b[7], w13
-; CHECK-SD-NEXT: cmn w10, #128
-; CHECK-SD-NEXT: csel w10, w10, w9, gt
-; CHECK-SD-NEXT: cmp w12, #127
-; CHECK-SD-NEXT: fcvtzs w13, d5
-; CHECK-SD-NEXT: csel w11, w12, w8, lt
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v6.d[1]
; CHECK-SD-NEXT: cmn w11, #128
-; CHECK-SD-NEXT: mov w12, v4.s[1]
-; CHECK-SD-NEXT: mov v0.b[8], v4.b[0]
; CHECK-SD-NEXT: csel w11, w11, w9, gt
-; CHECK-SD-NEXT: fmov s5, w11
+; CHECK-SD-NEXT: mov v0.b[10], w11
; CHECK-SD-NEXT: fcvtzs w11, d6
-; CHECK-SD-NEXT: cmp w13, #127
-; CHECK-SD-NEXT: mov d6, v7.d[1]
-; CHECK-SD-NEXT: mov v0.b[9], w12
-; CHECK-SD-NEXT: mov v5.s[1], w10
-; CHECK-SD-NEXT: csel w10, w13, w8, lt
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
; CHECK-SD-NEXT: cmn w10, #128
; CHECK-SD-NEXT: csel w10, w10, w9, gt
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: fcvtzs w13, d6
+; CHECK-SD-NEXT: mov v0.b[11], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v7.d[1]
; CHECK-SD-NEXT: cmn w11, #128
-; CHECK-SD-NEXT: mov v0.b[10], v5.b[0]
-; CHECK-SD-NEXT: mov w12, v5.s[1]
; CHECK-SD-NEXT: csel w11, w11, w9, gt
-; CHECK-SD-NEXT: fmov s6, w11
+; CHECK-SD-NEXT: mov v0.b[12], w11
+; CHECK-SD-NEXT: cmp w10, #127
; CHECK-SD-NEXT: fcvtzs w11, d7
-; CHECK-SD-NEXT: cmp w13, #127
-; CHECK-SD-NEXT: mov v0.b[11], w12
-; CHECK-SD-NEXT: mov v6.s[1], w10
-; CHECK-SD-NEXT: csel w10, w13, w8, lt
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
; CHECK-SD-NEXT: cmn w10, #128
; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: mov v0.b[13], w10
; CHECK-SD-NEXT: cmp w11, #127
-; CHECK-SD-NEXT: csel w8, w11, w8, lt
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: cmn w11, #128
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.b[14], w11
+; CHECK-SD-NEXT: cmp w10, #127
+; CHECK-SD-NEXT: csel w8, w10, w8, lt
; CHECK-SD-NEXT: cmn w8, #128
-; CHECK-SD-NEXT: mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT: mov w11, v6.s[1]
; CHECK-SD-NEXT: csel w8, w8, w9, gt
-; CHECK-SD-NEXT: fmov s7, w8
-; CHECK-SD-NEXT: mov v0.b[13], w11
-; CHECK-SD-NEXT: mov v7.s[1], w10
-; CHECK-SD-NEXT: mov v0.b[14], v7.b[0]
-; CHECK-SD-NEXT: mov w8, v7.s[1]
; CHECK-SD-NEXT: mov v0.b[15], w8
; CHECK-SD-NEXT: ret
;
@@ -5179,63 +5152,60 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
; CHECK-SD-LABEL: test_signed_v8f64_v8i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov d4, v3.d[1]
+; CHECK-SD-NEXT: mov d4, v0.d[1]
; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
-; CHECK-SD-NEXT: fcvtzs w11, d3
-; CHECK-SD-NEXT: mov d3, v1.d[1]
-; CHECK-SD-NEXT: fcvtzs w13, d2
-; CHECK-SD-NEXT: fcvtzs w15, d1
-; CHECK-SD-NEXT: fcvtzs w17, d0
+; CHECK-SD-NEXT: fcvtzs w10, d0
+; CHECK-SD-NEXT: fcvtzs w12, d1
+; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: fcvtzs w9, d4
-; CHECK-SD-NEXT: mov d4, v2.d[1]
-; CHECK-SD-NEXT: mov d2, v0.d[1]
-; CHECK-SD-NEXT: fcvtzs w14, d3
; CHECK-SD-NEXT: cmp w9, w8
-; CHECK-SD-NEXT: fcvtzs w12, d4
-; CHECK-SD-NEXT: fcvtzs w16, d2
-; CHECK-SD-NEXT: csel w10, w9, w8, lt
+; CHECK-SD-NEXT: csel w11, w9, w8, lt
; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000
+; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: cmp w12, w8
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: mov v0.h[1], w11
+; CHECK-SD-NEXT: csel w11, w12, w8, lt
+; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: mov v0.h[2], w11
+; CHECK-SD-NEXT: fcvtzs w11, d2
; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768
; CHECK-SD-NEXT: csel w10, w10, w9, gt
; CHECK-SD-NEXT: cmp w11, w8
+; CHECK-SD-NEXT: mov v0.h[3], w10
+; CHECK-SD-NEXT: fcvtzs w10, d1
; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: mov d1, v3.d[1]
; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
; CHECK-SD-NEXT: csel w11, w11, w9, gt
-; CHECK-SD-NEXT: cmp w12, w8
-; CHECK-SD-NEXT: csel w12, w12, w8, lt
-; CHECK-SD-NEXT: fmov s3, w11
-; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w12, w12, w9, gt
-; CHECK-SD-NEXT: cmp w13, w8
-; CHECK-SD-NEXT: csel w13, w13, w8, lt
-; CHECK-SD-NEXT: mov v3.s[1], w10
-; CHECK-SD-NEXT: cmn w13, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w13, w13, w9, gt
-; CHECK-SD-NEXT: cmp w14, w8
-; CHECK-SD-NEXT: csel w14, w14, w8, lt
-; CHECK-SD-NEXT: fmov s2, w13
-; CHECK-SD-NEXT: cmn w14, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w14, w14, w9, gt
-; CHECK-SD-NEXT: cmp w15, w8
-; CHECK-SD-NEXT: csel w15, w15, w8, lt
-; CHECK-SD-NEXT: mov v2.s[1], w12
-; CHECK-SD-NEXT: cmn w15, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w15, w15, w9, gt
-; CHECK-SD-NEXT: cmp w16, w8
-; CHECK-SD-NEXT: csel w11, w16, w8, lt
-; CHECK-SD-NEXT: fmov s1, w15
+; CHECK-SD-NEXT: mov v0.h[4], w11
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: fcvtzs w11, d3
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: mov v0.h[5], w10
+; CHECK-SD-NEXT: cmp w11, w8
+; CHECK-SD-NEXT: fcvtzs w10, d1
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w10, w11, w9, gt
-; CHECK-SD-NEXT: cmp w17, w8
-; CHECK-SD-NEXT: csel w8, w17, w8, lt
-; CHECK-SD-NEXT: mov v1.s[1], w14
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v0.h[6], w11
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: csel w8, w10, w8, lt
; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768
; CHECK-SD-NEXT: csel w8, w8, w9, gt
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: adrp x8, .LCPI84_0
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI84_0]
-; CHECK-SD-NEXT: mov v0.s[1], w10
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT: mov v0.h[7], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_signed_v8f64_v8i16:
@@ -5275,116 +5245,112 @@ define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
; CHECK-SD-LABEL: test_signed_v16f64_v16i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov d16, v3.d[1]
-; CHECK-SD-NEXT: mov w9, #32767 // =0x7fff
-; CHECK-SD-NEXT: fcvtzs w11, d3
-; CHECK-SD-NEXT: mov d3, v1.d[1]
-; CHECK-SD-NEXT: fcvtzs w14, d2
-; CHECK-SD-NEXT: fcvtzs w15, d1
-; CHECK-SD-NEXT: mov d1, v7.d[1]
-; CHECK-SD-NEXT: fcvtzs w18, d0
-; CHECK-SD-NEXT: fcvtzs w1, d7
-; CHECK-SD-NEXT: fcvtzs w2, d6
-; CHECK-SD-NEXT: fcvtzs w4, d5
-; CHECK-SD-NEXT: fcvtzs w6, d4
-; CHECK-SD-NEXT: fcvtzs w8, d16
-; CHECK-SD-NEXT: mov d16, v2.d[1]
-; CHECK-SD-NEXT: mov d2, v0.d[1]
-; CHECK-SD-NEXT: mov d0, v6.d[1]
+; CHECK-SD-NEXT: mov d16, v0.d[1]
+; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
+; CHECK-SD-NEXT: fcvtzs w11, d0
+; CHECK-SD-NEXT: mov d0, v1.d[1]
+; CHECK-SD-NEXT: fcvtzs w12, d1
+; CHECK-SD-NEXT: fcvtzs w15, d3
+; CHECK-SD-NEXT: mov d1, v4.d[1]
+; CHECK-SD-NEXT: fcvtzs w1, d5
+; CHECK-SD-NEXT: fcvtzs w9, d16
+; CHECK-SD-NEXT: fcvtzs w14, d0
+; CHECK-SD-NEXT: mov d0, v2.d[1]
; CHECK-SD-NEXT: fcvtzs w0, d1
-; CHECK-SD-NEXT: cmp w8, w9
-; CHECK-SD-NEXT: fcvtzs w13, d16
-; CHECK-SD-NEXT: fcvtzs w17, d2
-; CHECK-SD-NEXT: csel w10, w8, w9, lt
-; CHECK-SD-NEXT: mov w8, #-32768 // =0xffff8000
+; CHECK-SD-NEXT: cmp w9, w8
+; CHECK-SD-NEXT: csel w10, w9, w8, lt
+; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000
; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w10, w10, w8, gt
-; CHECK-SD-NEXT: cmp w11, w9
-; CHECK-SD-NEXT: csel w11, w11, w9, lt
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: cmp w11, w8
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w13, w11, w9, gt
+; CHECK-SD-NEXT: cmp w12, w8
+; CHECK-SD-NEXT: csel w11, w12, w8, lt
+; CHECK-SD-NEXT: fcvtzs w12, d2
+; CHECK-SD-NEXT: mov d2, v5.d[1]
; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w12, w11, w8, gt
-; CHECK-SD-NEXT: cmp w13, w9
-; CHECK-SD-NEXT: csel w11, w13, w9, lt
-; CHECK-SD-NEXT: fcvtzs w13, d3
+; CHECK-SD-NEXT: csel w17, w11, w9, gt
+; CHECK-SD-NEXT: cmp w14, w8
+; CHECK-SD-NEXT: csel w11, w14, w8, lt
+; CHECK-SD-NEXT: fcvtzs w14, d0
+; CHECK-SD-NEXT: mov d0, v3.d[1]
; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w11, w11, w8, gt
-; CHECK-SD-NEXT: cmp w14, w9
-; CHECK-SD-NEXT: csel w14, w14, w9, lt
-; CHECK-SD-NEXT: cmn w14, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w14, w14, w8, gt
-; CHECK-SD-NEXT: cmp w13, w9
-; CHECK-SD-NEXT: csel w13, w13, w9, lt
-; CHECK-SD-NEXT: cmn w13, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w13, w13, w8, gt
-; CHECK-SD-NEXT: cmp w15, w9
-; CHECK-SD-NEXT: csel w15, w15, w9, lt
-; CHECK-SD-NEXT: cmn w15, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w16, w15, w8, gt
-; CHECK-SD-NEXT: cmp w17, w9
-; CHECK-SD-NEXT: csel w15, w17, w9, lt
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: cmp w12, w8
+; CHECK-SD-NEXT: csel w12, w12, w8, lt
+; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: fcvtzs w18, d0
+; CHECK-SD-NEXT: fmov s0, w13
+; CHECK-SD-NEXT: csel w16, w12, w9, gt
+; CHECK-SD-NEXT: cmp w14, w8
+; CHECK-SD-NEXT: csel w12, w14, w8, lt
+; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: mov v0.h[1], w10
+; CHECK-SD-NEXT: fcvtzs w10, d2
+; CHECK-SD-NEXT: csel w14, w12, w9, gt
+; CHECK-SD-NEXT: cmp w15, w8
+; CHECK-SD-NEXT: mov d2, v6.d[1]
+; CHECK-SD-NEXT: csel w12, w15, w8, lt
+; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w12, w12, w9, gt
+; CHECK-SD-NEXT: cmp w18, w8
+; CHECK-SD-NEXT: mov v0.h[2], w17
+; CHECK-SD-NEXT: csel w15, w18, w8, lt
+; CHECK-SD-NEXT: fcvtzs w18, d4
; CHECK-SD-NEXT: cmn w15, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w15, w15, w8, gt
-; CHECK-SD-NEXT: cmp w18, w9
-; CHECK-SD-NEXT: csel w17, w18, w9, lt
-; CHECK-SD-NEXT: cmn w17, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w17, w17, w8, gt
-; CHECK-SD-NEXT: cmp w0, w9
-; CHECK-SD-NEXT: csel w18, w0, w9, lt
-; CHECK-SD-NEXT: fcvtzs w0, d0
-; CHECK-SD-NEXT: mov d0, v5.d[1]
-; CHECK-SD-NEXT: cmn w18, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w18, w18, w8, gt
-; CHECK-SD-NEXT: cmp w1, w9
-; CHECK-SD-NEXT: csel w1, w1, w9, lt
-; CHECK-SD-NEXT: cmn w1, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: fcvtzs w3, d0
-; CHECK-SD-NEXT: mov d0, v4.d[1]
-; CHECK-SD-NEXT: csel w1, w1, w8, gt
-; CHECK-SD-NEXT: cmp w0, w9
-; CHECK-SD-NEXT: csel w0, w0, w9, lt
-; CHECK-SD-NEXT: fmov s7, w1
+; CHECK-SD-NEXT: csel w15, w15, w9, gt
+; CHECK-SD-NEXT: cmp w0, w8
+; CHECK-SD-NEXT: csel w0, w0, w8, lt
+; CHECK-SD-NEXT: mov v0.h[3], w11
; CHECK-SD-NEXT: cmn w0, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w0, w0, w8, gt
-; CHECK-SD-NEXT: cmp w2, w9
-; CHECK-SD-NEXT: fcvtzs w5, d0
-; CHECK-SD-NEXT: csel w2, w2, w9, lt
-; CHECK-SD-NEXT: fmov s3, w12
-; CHECK-SD-NEXT: mov v7.s[1], w18
-; CHECK-SD-NEXT: cmn w2, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w2, w2, w8, gt
-; CHECK-SD-NEXT: cmp w3, w9
-; CHECK-SD-NEXT: csel w3, w3, w9, lt
-; CHECK-SD-NEXT: mov v3.s[1], w10
-; CHECK-SD-NEXT: fmov s6, w2
-; CHECK-SD-NEXT: cmn w3, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: fmov s2, w14
-; CHECK-SD-NEXT: csel w3, w3, w8, gt
-; CHECK-SD-NEXT: cmp w4, w9
-; CHECK-SD-NEXT: csel w4, w4, w9, lt
-; CHECK-SD-NEXT: mov v6.s[1], w0
-; CHECK-SD-NEXT: cmn w4, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: mov v2.s[1], w11
-; CHECK-SD-NEXT: csel w12, w4, w8, gt
-; CHECK-SD-NEXT: cmp w5, w9
-; CHECK-SD-NEXT: fmov s1, w16
-; CHECK-SD-NEXT: csel w10, w5, w9, lt
-; CHECK-SD-NEXT: fmov s5, w12
+; CHECK-SD-NEXT: csel w0, w0, w9, gt
+; CHECK-SD-NEXT: cmp w18, w8
+; CHECK-SD-NEXT: csel w18, w18, w8, lt
+; CHECK-SD-NEXT: cmn w18, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: mov v0.h[4], w16
+; CHECK-SD-NEXT: csel w13, w18, w9, gt
+; CHECK-SD-NEXT: cmp w1, w8
+; CHECK-SD-NEXT: fmov s1, w13
+; CHECK-SD-NEXT: csel w13, w1, w8, lt
+; CHECK-SD-NEXT: cmn w13, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w13, w13, w9, gt
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: mov v0.h[5], w14
+; CHECK-SD-NEXT: mov v1.h[1], w0
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w10, w10, w8, gt
-; CHECK-SD-NEXT: cmp w6, w9
-; CHECK-SD-NEXT: mov v1.s[1], w13
-; CHECK-SD-NEXT: csel w9, w6, w9, lt
-; CHECK-SD-NEXT: mov v5.s[1], w3
-; CHECK-SD-NEXT: fmov s0, w17
-; CHECK-SD-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-SD-NEXT: csel w8, w9, w8, gt
-; CHECK-SD-NEXT: fmov s4, w8
-; CHECK-SD-NEXT: mov v0.s[1], w15
-; CHECK-SD-NEXT: adrp x8, .LCPI85_0
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI85_0]
-; CHECK-SD-NEXT: mov v4.s[1], w10
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: mov v0.h[6], w12
+; CHECK-SD-NEXT: mov v1.h[2], w13
+; CHECK-SD-NEXT: fcvtzs w13, d6
+; CHECK-SD-NEXT: mov v0.h[7], w15
+; CHECK-SD-NEXT: cmp w13, w8
+; CHECK-SD-NEXT: mov v1.h[3], w10
+; CHECK-SD-NEXT: fcvtzs w10, d2
+; CHECK-SD-NEXT: csel w11, w13, w8, lt
+; CHECK-SD-NEXT: mov d2, v7.d[1]
+; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v1.h[4], w11
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: fcvtzs w11, d7
+; CHECK-SD-NEXT: csel w10, w10, w8, lt
+; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w10, w10, w9, gt
+; CHECK-SD-NEXT: mov v1.h[5], w10
+; CHECK-SD-NEXT: cmp w11, w8
+; CHECK-SD-NEXT: fcvtzs w10, d2
+; CHECK-SD-NEXT: csel w11, w11, w8, lt
+; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w11, w11, w9, gt
+; CHECK-SD-NEXT: mov v1.h[6], w11
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: csel w8, w10, w8, lt
+; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-NEXT: csel w8, w8, w9, gt
+; CHECK-SD-NEXT: mov v1.h[7], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_signed_v16f64_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index b1b5154a57b4d..099f43edfca6e 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -645,18 +645,18 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-LABEL: test_unsigned_v3f128_v3i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #112
-; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x30, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
+; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w30, -32
-; CHECK-SD-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT: mov v2.16b, v1.16b
+; CHECK-SD-NEXT: stp q1, q0, [sp, #48] // 32-byte Folded Spill
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -681,13 +681,10 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel w19, wzr, w0, lt
; CHECK-SD-NEXT: bl __gttf2
-; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: csinv w8, w19, wzr, le
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: mov v0.s[1], w20
-; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: csinv w21, w19, wzr, le
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
@@ -698,11 +695,12 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: csel w19, wzr, w0, lt
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-SD-NEXT: csinv w8, w19, wzr, le
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: mov v0.s[1], w21
+; CHECK-SD-NEXT: ldp x30, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.s[2], w20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: add sp, sp, #112
; CHECK-SD-NEXT: ret
;
@@ -4048,46 +4046,44 @@ define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) {
define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
; CHECK-SD-LABEL: test_unsigned_v8f64_v8i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov d4, v3.d[1]
-; CHECK-SD-NEXT: mov d5, v2.d[1]
-; CHECK-SD-NEXT: mov w11, #255 // =0xff
-; CHECK-SD-NEXT: fcvtzu w9, d3
-; CHECK-SD-NEXT: mov d3, v1.d[1]
-; CHECK-SD-NEXT: fcvtzu w12, d2
-; CHECK-SD-NEXT: fcvtzu w14, d1
-; CHECK-SD-NEXT: fcvtzu w8, d4
; CHECK-SD-NEXT: mov d4, v0.d[1]
-; CHECK-SD-NEXT: fcvtzu w10, d5
-; CHECK-SD-NEXT: fcvtzu w13, d3
-; CHECK-SD-NEXT: cmp w8, #255
-; CHECK-SD-NEXT: fcvtzu w15, d4
-; CHECK-SD-NEXT: csel w8, w8, w11, lo
+; CHECK-SD-NEXT: fcvtzu w10, d0
+; CHECK-SD-NEXT: mov w8, #255 // =0xff
+; CHECK-SD-NEXT: fcvtzu w11, d1
+; CHECK-SD-NEXT: mov d1, v1.d[1]
+; CHECK-SD-NEXT: fcvtzu w9, d4
; CHECK-SD-NEXT: cmp w9, #255
-; CHECK-SD-NEXT: csel w9, w9, w11, lo
+; CHECK-SD-NEXT: csel w9, w9, w8, lo
; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: fmov s4, w9
-; CHECK-SD-NEXT: csel w9, w10, w11, lo
-; CHECK-SD-NEXT: cmp w12, #255
-; CHECK-SD-NEXT: fcvtzu w10, d0
-; CHECK-SD-NEXT: mov v4.s[1], w8
-; CHECK-SD-NEXT: csel w8, w12, w11, lo
-; CHECK-SD-NEXT: cmp w13, #255
-; CHECK-SD-NEXT: fmov s3, w8
-; CHECK-SD-NEXT: csel w8, w13, w11, lo
-; CHECK-SD-NEXT: cmp w14, #255
-; CHECK-SD-NEXT: mov v3.s[1], w9
-; CHECK-SD-NEXT: csel w9, w14, w11, lo
-; CHECK-SD-NEXT: cmp w15, #255
-; CHECK-SD-NEXT: fmov s2, w9
-; CHECK-SD-NEXT: csel w9, w15, w11, lo
+; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: cmp w11, #255
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: csel w10, w11, w8, lo
+; CHECK-SD-NEXT: mov v0.b[1], w9
+; CHECK-SD-NEXT: fcvtzu w9, d1
+; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: mov v0.b[2], w10
+; CHECK-SD-NEXT: cmp w9, #255
+; CHECK-SD-NEXT: fcvtzu w10, d2
+; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[3], w9
; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: mov v2.s[1], w8
-; CHECK-SD-NEXT: csel w8, w10, w11, lo
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: adrp x8, .LCPI82_0
-; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT: mov v1.s[1], w9
-; CHECK-SD-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
+; CHECK-SD-NEXT: fcvtzu w9, d1
+; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: mov d1, v3.d[1]
+; CHECK-SD-NEXT: mov v0.b[4], w10
+; CHECK-SD-NEXT: cmp w9, #255
+; CHECK-SD-NEXT: fcvtzu w10, d3
+; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[5], w9
+; CHECK-SD-NEXT: cmp w10, #255
+; CHECK-SD-NEXT: fcvtzu w9, d1
+; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: mov v0.b[6], w10
+; CHECK-SD-NEXT: cmp w9, #255
+; CHECK-SD-NEXT: csel w8, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[7], w8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_unsigned_v8f64_v8i8:
@@ -4120,98 +4116,75 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
; CHECK-SD-NEXT: mov d16, v0.d[1]
; CHECK-SD-NEXT: fcvtzu w10, d0
; CHECK-SD-NEXT: mov w8, #255 // =0xff
+; CHECK-SD-NEXT: fcvtzu w11, d1
+; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: fcvtzu w9, d16
-; CHECK-SD-NEXT: mov d16, v1.d[1]
; CHECK-SD-NEXT: cmp w9, #255
; CHECK-SD-NEXT: csel w9, w9, w8, lo
; CHECK-SD-NEXT: cmp w10, #255
; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: cmp w11, #255
; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: fcvtzu w10, d16
-; CHECK-SD-NEXT: mov d16, v2.d[1]
-; CHECK-SD-NEXT: mov v0.s[1], w9
+; CHECK-SD-NEXT: csel w10, w11, w8, lo
+; CHECK-SD-NEXT: mov v0.b[1], w9
; CHECK-SD-NEXT: fcvtzu w9, d1
-; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: mov v0.b[2], w10
; CHECK-SD-NEXT: cmp w9, #255
-; CHECK-SD-NEXT: mov w11, v0.s[1]
-; CHECK-SD-NEXT: csel w9, w9, w8, lo
-; CHECK-SD-NEXT: fmov s1, w9
-; CHECK-SD-NEXT: fcvtzu w9, d16
-; CHECK-SD-NEXT: mov d16, v3.d[1]
-; CHECK-SD-NEXT: mov v0.b[1], w11
-; CHECK-SD-NEXT: mov v1.s[1], w10
; CHECK-SD-NEXT: fcvtzu w10, d2
-; CHECK-SD-NEXT: cmp w9, #255
; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[3], w9
; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: mov w11, v1.s[1]
-; CHECK-SD-NEXT: mov v0.b[2], v1.b[0]
+; CHECK-SD-NEXT: fcvtzu w9, d1
; CHECK-SD-NEXT: csel w10, w10, w8, lo
-; CHECK-SD-NEXT: fmov s2, w10
-; CHECK-SD-NEXT: fcvtzu w10, d16
-; CHECK-SD-NEXT: mov d16, v4.d[1]
-; CHECK-SD-NEXT: mov v0.b[3], w11
-; CHECK-SD-NEXT: mov v2.s[1], w9
-; CHECK-SD-NEXT: fcvtzu w9, d3
+; CHECK-SD-NEXT: mov d1, v3.d[1]
+; CHECK-SD-NEXT: mov v0.b[4], w10
+; CHECK-SD-NEXT: cmp w9, #255
+; CHECK-SD-NEXT: fcvtzu w10, d3
+; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[5], w9
; CHECK-SD-NEXT: cmp w10, #255
+; CHECK-SD-NEXT: fcvtzu w9, d1
; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: mov d1, v4.d[1]
+; CHECK-SD-NEXT: mov v0.b[6], w10
; CHECK-SD-NEXT: cmp w9, #255
-; CHECK-SD-NEXT: mov w11, v2.s[1]
-; CHECK-SD-NEXT: mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT: csel w9, w9, w8, lo
-; CHECK-SD-NEXT: fmov s3, w9
-; CHECK-SD-NEXT: fcvtzu w9, d16
-; CHECK-SD-NEXT: mov d16, v5.d[1]
-; CHECK-SD-NEXT: mov v0.b[5], w11
-; CHECK-SD-NEXT: mov v3.s[1], w10
; CHECK-SD-NEXT: fcvtzu w10, d4
-; CHECK-SD-NEXT: cmp w9, #255
; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[7], w9
; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: mov w11, v3.s[1]
-; CHECK-SD-NEXT: mov v0.b[6], v3.b[0]
+; CHECK-SD-NEXT: fcvtzu w9, d1
; CHECK-SD-NEXT: csel w10, w10, w8, lo
-; CHECK-SD-NEXT: fmov s4, w10
-; CHECK-SD-NEXT: fcvtzu w10, d16
-; CHECK-SD-NEXT: mov v0.b[7], w11
-; CHECK-SD-NEXT: mov v4.s[1], w9
-; CHECK-SD-NEXT: fcvtzu w9, d5
-; CHECK-SD-NEXT: mov d5, v6.d[1]
+; CHECK-SD-NEXT: mov d1, v5.d[1]
+; CHECK-SD-NEXT: mov v0.b[8], w10
+; CHECK-SD-NEXT: cmp w9, #255
+; CHECK-SD-NEXT: fcvtzu w10, d5
+; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[9], w9
; CHECK-SD-NEXT: cmp w10, #255
+; CHECK-SD-NEXT: fcvtzu w9, d1
; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: mov d1, v6.d[1]
+; CHECK-SD-NEXT: mov v0.b[10], w10
; CHECK-SD-NEXT: cmp w9, #255
-; CHECK-SD-NEXT: mov w11, v4.s[1]
-; CHECK-SD-NEXT: mov v0.b[8], v4.b[0]
-; CHECK-SD-NEXT: csel w9, w9, w8, lo
-; CHECK-SD-NEXT: fmov s16, w9
-; CHECK-SD-NEXT: fcvtzu w9, d5
-; CHECK-SD-NEXT: mov d5, v7.d[1]
-; CHECK-SD-NEXT: mov v0.b[9], w11
-; CHECK-SD-NEXT: mov v16.s[1], w10
; CHECK-SD-NEXT: fcvtzu w10, d6
-; CHECK-SD-NEXT: cmp w9, #255
; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[11], w9
; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: mov v0.b[10], v16.b[0]
-; CHECK-SD-NEXT: mov w11, v16.s[1]
+; CHECK-SD-NEXT: fcvtzu w9, d1
; CHECK-SD-NEXT: csel w10, w10, w8, lo
-; CHECK-SD-NEXT: fmov s6, w10
-; CHECK-SD-NEXT: fcvtzu w10, d7
-; CHECK-SD-NEXT: mov v0.b[11], w11
-; CHECK-SD-NEXT: mov v6.s[1], w9
-; CHECK-SD-NEXT: fcvtzu w9, d5
+; CHECK-SD-NEXT: mov d1, v7.d[1]
+; CHECK-SD-NEXT: mov v0.b[12], w10
; CHECK-SD-NEXT: cmp w9, #255
-; CHECK-SD-NEXT: mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT: mov w11, v6.s[1]
+; CHECK-SD-NEXT: fcvtzu w10, d7
; CHECK-SD-NEXT: csel w9, w9, w8, lo
+; CHECK-SD-NEXT: mov v0.b[13], w9
; CHECK-SD-NEXT: cmp w10, #255
-; CHECK-SD-NEXT: csel w8, w10, w8, lo
-; CHECK-SD-NEXT: fmov s5, w8
-; CHECK-SD-NEXT: mov v0.b[13], w11
-; CHECK-SD-NEXT: mov v5.s[1], w9
-; CHECK-SD-NEXT: mov v0.b[14], v5.b[0]
-; CHECK-SD-NEXT: mov w8, v5.s[1]
+; CHECK-SD-NEXT: fcvtzu w9, d1
+; CHECK-SD-NEXT: csel w10, w10, w8, lo
+; CHECK-SD-NEXT: mov v0.b[14], w10
+; CHECK-SD-NEXT: cmp w9, #255
+; CHECK-SD-NEXT: csel w8, w9, w8, lo
; CHECK-SD-NEXT: mov v0.b[15], w8
; CHECK-SD-NEXT: ret
;
@@ -4257,46 +4230,43 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
; CHECK-SD-LABEL: test_unsigned_v8f64_v8i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov d4, v3.d[1]
-; CHECK-SD-NEXT: mov d5, v2.d[1]
+; CHECK-SD-NEXT: mov d4, v0.d[1]
+; CHECK-SD-NEXT: fcvtzu w9, d0
; CHECK-SD-NEXT: mov w10, #65535 // =0xffff
-; CHECK-SD-NEXT: fcvtzu w9, d3
-; CHECK-SD-NEXT: mov d3, v1.d[1]
-; CHECK-SD-NEXT: fcvtzu w12, d2
-; CHECK-SD-NEXT: fcvtzu w14, d1
+; CHECK-SD-NEXT: fcvtzu w11, d1
+; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: fcvtzu w8, d4
-; CHECK-SD-NEXT: mov d4, v0.d[1]
-; CHECK-SD-NEXT: fcvtzu w11, d5
-; CHECK-SD-NEXT: fcvtzu w13, d3
; CHECK-SD-NEXT: cmp w8, w10
-; CHECK-SD-NEXT: fcvtzu w15, d4
; CHECK-SD-NEXT: csel w8, w8, w10, lo
; CHECK-SD-NEXT: cmp w9, w10
; CHECK-SD-NEXT: csel w9, w9, w10, lo
; CHECK-SD-NEXT: cmp w11, w10
-; CHECK-SD-NEXT: fmov s4, w9
+; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: csel w9, w11, w10, lo
-; CHECK-SD-NEXT: cmp w12, w10
-; CHECK-SD-NEXT: fcvtzu w11, d0
-; CHECK-SD-NEXT: mov v4.s[1], w8
-; CHECK-SD-NEXT: csel w8, w12, w10, lo
-; CHECK-SD-NEXT: cmp w13, w10
-; CHECK-SD-NEXT: fmov s3, w8
-; CHECK-SD-NEXT: csel w8, w13, w10, lo
-; CHECK-SD-NEXT: cmp w14, w10
-; CHECK-SD-NEXT: mov v3.s[1], w9
-; CHECK-SD-NEXT: csel w9, w14, w10, lo
-; CHECK-SD-NEXT: cmp w15, w10
-; CHECK-SD-NEXT: fmov s2, w9
-; CHECK-SD-NEXT: csel w9, w15, w10, lo
-; CHECK-SD-NEXT: cmp w11, w10
-; CHECK-SD-NEXT: mov v2.s[1], w8
-; CHECK-SD-NEXT: csel w8, w11, w10, lo
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: adrp x8, .LCPI84_0
-; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI84_0]
-; CHECK-SD-NEXT: mov v1.s[1], w9
-; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
+; CHECK-SD-NEXT: mov v0.h[1], w8
+; CHECK-SD-NEXT: fcvtzu w8, d1
+; CHECK-SD-NEXT: mov d1, v2.d[1]
+; CHECK-SD-NEXT: mov v0.h[2], w9
+; CHECK-SD-NEXT: cmp w8, w10
+; CHECK-SD-NEXT: fcvtzu w9, d2
+; CHECK-SD-NEXT: csel w8, w8, w10, lo
+; CHECK-SD-NEXT: mov v0.h[3], w8
+; CHECK-SD-NEXT: cmp w9, w10
+; CHECK-SD-NEXT: fcvtzu w8, d1
+; CHECK-SD-NEXT: csel w9, w9, w10, lo
+; CHECK-SD-NEXT: mov d1, v3.d[1]
+; CHECK-SD-NEXT: mov v0.h[4], w9
+; CHECK-SD-NEXT: cmp w8, w10
+; CHECK-SD-NEXT: fcvtzu w9, d3
+; CHECK-SD-NEXT: csel w8, w8, w10, lo
+; CHECK-SD-NEXT: mov v0.h[5], w8
+; CHECK-SD-NEXT: cmp w9, w10
+; CHECK-SD-NEXT: fcvtzu w8, d1
+; CHECK-SD-NEXT: csel w9, w9, w10, lo
+; CHECK-SD-NEXT: mov v0.h[6], w9
+; CHECK-SD-NEXT: cmp w8, w10
+; CHECK-SD-NEXT: csel w8, w8, w10, lo
+; CHECK-SD-NEXT: mov v0.h[7], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_unsigned_v8f64_v8i16:
@@ -4325,83 +4295,79 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
; CHECK-SD-LABEL: test_unsigned_v16f64_v16i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov d16, v3.d[1]
-; CHECK-SD-NEXT: mov d17, v2.d[1]
+; CHECK-SD-NEXT: mov d16, v0.d[1]
+; CHECK-SD-NEXT: mov d17, v1.d[1]
; CHECK-SD-NEXT: mov w8, #65535 // =0xffff
-; CHECK-SD-NEXT: fcvtzu w9, d3
-; CHECK-SD-NEXT: mov d3, v1.d[1]
-; CHECK-SD-NEXT: fcvtzu w10, d1
-; CHECK-SD-NEXT: mov d1, v0.d[1]
-; CHECK-SD-NEXT: fcvtzu w11, d2
-; CHECK-SD-NEXT: fcvtzu w12, d0
-; CHECK-SD-NEXT: mov d0, v7.d[1]
-; CHECK-SD-NEXT: mov d2, v6.d[1]
-; CHECK-SD-NEXT: fcvtzu w14, d7
-; CHECK-SD-NEXT: fcvtzu w13, d16
-; CHECK-SD-NEXT: fcvtzu w16, d17
-; CHECK-SD-NEXT: fcvtzu w15, d6
-; CHECK-SD-NEXT: fcvtzu w17, d3
-; CHECK-SD-NEXT: mov d6, v5.d[1]
-; CHECK-SD-NEXT: mov d3, v4.d[1]
-; CHECK-SD-NEXT: fcvtzu w18, d1
-; CHECK-SD-NEXT: cmp w13, w8
-; CHECK-SD-NEXT: csel w13, w13, w8, lo
+; CHECK-SD-NEXT: fcvtzu w10, d0
+; CHECK-SD-NEXT: mov d0, v2.d[1]
+; CHECK-SD-NEXT: fcvtzu w11, d1
+; CHECK-SD-NEXT: mov d1, v3.d[1]
+; CHECK-SD-NEXT: fcvtzu w13, d2
+; CHECK-SD-NEXT: mov d2, v4.d[1]
+; CHECK-SD-NEXT: fcvtzu w18, d4
+; CHECK-SD-NEXT: fcvtzu w9, d16
+; CHECK-SD-NEXT: fcvtzu w12, d17
+; CHECK-SD-NEXT: fcvtzu w16, d1
+; CHECK-SD-NEXT: fcvtzu w17, d2
+; CHECK-SD-NEXT: mov d2, v5.d[1]
; CHECK-SD-NEXT: cmp w9, w8
-; CHECK-SD-NEXT: csel w9, w9, w8, lo
-; CHECK-SD-NEXT: cmp w16, w8
-; CHECK-SD-NEXT: fmov s19, w9
-; CHECK-SD-NEXT: csel w9, w16, w8, lo
+; CHECK-SD-NEXT: csel w14, w9, w8, lo
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: fcvtzu w9, d0
+; CHECK-SD-NEXT: csel w15, w10, w8, lo
; CHECK-SD-NEXT: cmp w11, w8
-; CHECK-SD-NEXT: fcvtzu w16, d0
+; CHECK-SD-NEXT: fcvtzu w10, d3
; CHECK-SD-NEXT: csel w11, w11, w8, lo
-; CHECK-SD-NEXT: cmp w17, w8
-; CHECK-SD-NEXT: mov v19.s[1], w13
-; CHECK-SD-NEXT: csel w13, w17, w8, lo
-; CHECK-SD-NEXT: cmp w10, w8
-; CHECK-SD-NEXT: csel w10, w10, w8, lo
-; CHECK-SD-NEXT: cmp w18, w8
-; CHECK-SD-NEXT: fmov s18, w11
-; CHECK-SD-NEXT: csel w11, w18, w8, lo
; CHECK-SD-NEXT: cmp w12, w8
-; CHECK-SD-NEXT: fcvtzu w17, d2
+; CHECK-SD-NEXT: fmov s0, w15
; CHECK-SD-NEXT: csel w12, w12, w8, lo
+; CHECK-SD-NEXT: cmp w13, w8
+; CHECK-SD-NEXT: csel w13, w13, w8, lo
+; CHECK-SD-NEXT: cmp w9, w8
+; CHECK-SD-NEXT: csel w0, w9, w8, lo
+; CHECK-SD-NEXT: cmp w10, w8
+; CHECK-SD-NEXT: mov v0.h[1], w14
+; CHECK-SD-NEXT: csel w10, w10, w8, lo
; CHECK-SD-NEXT: cmp w16, w8
-; CHECK-SD-NEXT: fcvtzu w18, d6
-; CHECK-SD-NEXT: mov v18.s[1], w9
+; CHECK-SD-NEXT: fcvtzu w14, d2
; CHECK-SD-NEXT: csel w9, w16, w8, lo
-; CHECK-SD-NEXT: cmp w14, w8
-; CHECK-SD-NEXT: fmov s17, w10
-; CHECK-SD-NEXT: csel w10, w14, w8, lo
-; CHECK-SD-NEXT: fcvtzu w16, d5
-; CHECK-SD-NEXT: fmov s23, w10
; CHECK-SD-NEXT: cmp w17, w8
-; CHECK-SD-NEXT: fcvtzu w14, d3
-; CHECK-SD-NEXT: csel w10, w17, w8, lo
-; CHECK-SD-NEXT: cmp w15, w8
-; CHECK-SD-NEXT: fcvtzu w17, d4
-; CHECK-SD-NEXT: mov v17.s[1], w13
-; CHECK-SD-NEXT: mov v23.s[1], w9
-; CHECK-SD-NEXT: csel w9, w15, w8, lo
+; CHECK-SD-NEXT: mov d2, v6.d[1]
+; CHECK-SD-NEXT: csel w16, w17, w8, lo
; CHECK-SD-NEXT: cmp w18, w8
-; CHECK-SD-NEXT: fmov s22, w9
-; CHECK-SD-NEXT: csel w9, w18, w8, lo
-; CHECK-SD-NEXT: cmp w16, w8
-; CHECK-SD-NEXT: fmov s16, w12
-; CHECK-SD-NEXT: mov v22.s[1], w10
-; CHECK-SD-NEXT: csel w10, w16, w8, lo
+; CHECK-SD-NEXT: csel w17, w18, w8, lo
+; CHECK-SD-NEXT: fcvtzu w18, d5
+; CHECK-SD-NEXT: mov v0.h[2], w11
+; CHECK-SD-NEXT: fmov s1, w17
+; CHECK-SD-NEXT: fcvtzu w11, d6
+; CHECK-SD-NEXT: mov v1.h[1], w16
+; CHECK-SD-NEXT: cmp w18, w8
+; CHECK-SD-NEXT: mov v0.h[3], w12
+; CHECK-SD-NEXT: csel w15, w18, w8, lo
; CHECK-SD-NEXT: cmp w14, w8
-; CHECK-SD-NEXT: fmov s21, w10
-; CHECK-SD-NEXT: csel w10, w14, w8, lo
-; CHECK-SD-NEXT: cmp w17, w8
-; CHECK-SD-NEXT: csel w8, w17, w8, lo
-; CHECK-SD-NEXT: mov v16.s[1], w11
-; CHECK-SD-NEXT: mov v21.s[1], w9
-; CHECK-SD-NEXT: fmov s20, w8
-; CHECK-SD-NEXT: adrp x8, .LCPI85_0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI85_0]
-; CHECK-SD-NEXT: mov v20.s[1], w10
-; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT: fcvtzu w12, d2
+; CHECK-SD-NEXT: csel w14, w14, w8, lo
+; CHECK-SD-NEXT: cmp w11, w8
+; CHECK-SD-NEXT: mov d2, v7.d[1]
+; CHECK-SD-NEXT: csel w11, w11, w8, lo
+; CHECK-SD-NEXT: mov v1.h[2], w15
+; CHECK-SD-NEXT: mov v0.h[4], w13
+; CHECK-SD-NEXT: cmp w12, w8
+; CHECK-SD-NEXT: csel w12, w12, w8, lo
+; CHECK-SD-NEXT: mov v1.h[3], w14
+; CHECK-SD-NEXT: mov v0.h[5], w0
+; CHECK-SD-NEXT: mov v1.h[4], w11
+; CHECK-SD-NEXT: fcvtzu w11, d7
+; CHECK-SD-NEXT: mov v0.h[6], w10
+; CHECK-SD-NEXT: mov v1.h[5], w12
+; CHECK-SD-NEXT: cmp w11, w8
+; CHECK-SD-NEXT: fcvtzu w12, d2
+; CHECK-SD-NEXT: csel w11, w11, w8, lo
+; CHECK-SD-NEXT: mov v0.h[7], w9
+; CHECK-SD-NEXT: mov v1.h[6], w11
+; CHECK-SD-NEXT: cmp w12, w8
+; CHECK-SD-NEXT: csel w8, w12, w8, lo
+; CHECK-SD-NEXT: mov v1.h[7], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_unsigned_v16f64_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index b4c38e9f2df3b..1f84c944d7c16 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -304,25 +304,15 @@ entry:
}
define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
-; CHECK-SD-LABEL: fptrunc_v3f64_v3f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fcvt h1, d1
-; CHECK-SD-NEXT: fcvt h0, d0
-; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-SD-NEXT: fcvt h1, d2
-; CHECK-SD-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptrunc_v3f64_v3f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fcvt h0, d0
-; CHECK-GI-NEXT: fcvt h1, d1
-; CHECK-GI-NEXT: fcvt h2, d2
-; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptrunc_v3f64_v3f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt h1, d1
+; CHECK-NEXT: fcvt h2, d2
+; CHECK-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%c = fptrunc <3 x double> %a to <3 x half>
ret <3 x half> %c
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 71b53c662bb22..9d4d654259a31 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4143,11 +4143,11 @@ entry:
define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
; CHECK-SD-LABEL: stofp_v3i128_v3f32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #64
-; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: sub sp, sp, #80
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
@@ -4155,31 +4155,31 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: mov x21, x1
; CHECK-SD-NEXT: mov x22, x0
-; CHECK-SD-NEXT: mov x0, x2
-; CHECK-SD-NEXT: mov x1, x3
-; CHECK-SD-NEXT: mov x19, x5
-; CHECK-SD-NEXT: mov x20, x4
+; CHECK-SD-NEXT: mov x0, x4
+; CHECK-SD-NEXT: mov x1, x5
+; CHECK-SD-NEXT: mov x19, x3
+; CHECK-SD-NEXT: mov x20, x2
; CHECK-SD-NEXT: bl __floattisf
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __floattisf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-SD-NEXT: mov x0, x20
; CHECK-SD-NEXT: mov x1, x19
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __floattisf
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v1.s[1], v0.s[0]
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: add sp, sp, #80
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: stofp_v3i128_v3f32:
@@ -4227,11 +4227,11 @@ entry:
define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
; CHECK-SD-LABEL: utofp_v3i128_v3f32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #64
-; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: sub sp, sp, #80
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
@@ -4239,31 +4239,31 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: mov x21, x1
; CHECK-SD-NEXT: mov x22, x0
-; CHECK-SD-NEXT: mov x0, x2
-; CHECK-SD-NEXT: mov x1, x3
-; CHECK-SD-NEXT: mov x19, x5
-; CHECK-SD-NEXT: mov x20, x4
+; CHECK-SD-NEXT: mov x0, x4
+; CHECK-SD-NEXT: mov x1, x5
+; CHECK-SD-NEXT: mov x19, x3
+; CHECK-SD-NEXT: mov x20, x2
; CHECK-SD-NEXT: bl __floatuntisf
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __floatuntisf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-SD-NEXT: mov x0, x20
; CHECK-SD-NEXT: mov x1, x19
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __floatuntisf
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v1.s[1], v0.s[0]
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: add sp, sp, #80
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: utofp_v3i128_v3f32:
@@ -6035,11 +6035,11 @@ entry:
define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
; CHECK-SD-NOFP16-LABEL: stofp_v3i128_v3f16:
; CHECK-SD-NOFP16: // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT: sub sp, sp, #64
-; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NOFP16-NEXT: sub sp, sp, #80
+; CHECK-SD-NOFP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NOFP16-NEXT: .cfi_offset w19, -8
; CHECK-SD-NOFP16-NEXT: .cfi_offset w20, -16
; CHECK-SD-NOFP16-NEXT: .cfi_offset w21, -24
@@ -6047,40 +6047,41 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
; CHECK-SD-NOFP16-NEXT: .cfi_offset w30, -48
; CHECK-SD-NOFP16-NEXT: mov x21, x1
; CHECK-SD-NOFP16-NEXT: mov x22, x0
-; CHECK-SD-NOFP16-NEXT: mov x0, x2
-; CHECK-SD-NOFP16-NEXT: mov x1, x3
-; CHECK-SD-NOFP16-NEXT: mov x19, x5
-; CHECK-SD-NOFP16-NEXT: mov x20, x4
+; CHECK-SD-NOFP16-NEXT: mov x0, x4
+; CHECK-SD-NOFP16-NEXT: mov x1, x5
+; CHECK-SD-NOFP16-NEXT: mov x19, x3
+; CHECK-SD-NOFP16-NEXT: mov x20, x2
; CHECK-SD-NOFP16-NEXT: bl __floattisf
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
; CHECK-SD-NOFP16-NEXT: mov x0, x22
; CHECK-SD-NOFP16-NEXT: mov x1, x21
-; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NOFP16-NEXT: bl __floattisf
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
-; CHECK-SD-NOFP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NOFP16-NEXT: mov x0, x20
; CHECK-SD-NOFP16-NEXT: mov x1, x19
-; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NOFP16-NEXT: bl __floattisf
-; CHECK-SD-NOFP16-NEXT: fcvt h1, s0
-; CHECK-SD-NOFP16-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NOFP16-NEXT: add sp, sp, #64
+; CHECK-SD-NOFP16-NEXT: add sp, sp, #80
; CHECK-SD-NOFP16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: stofp_v3i128_v3f16:
; CHECK-SD-FP16: // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT: sub sp, sp, #64
-; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-FP16-NEXT: sub sp, sp, #80
+; CHECK-SD-FP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-FP16-NEXT: .cfi_offset w19, -8
; CHECK-SD-FP16-NEXT: .cfi_offset w20, -16
; CHECK-SD-FP16-NEXT: .cfi_offset w21, -24
@@ -6088,31 +6089,32 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
; CHECK-SD-FP16-NEXT: .cfi_offset w30, -48
; CHECK-SD-FP16-NEXT: mov x21, x1
; CHECK-SD-FP16-NEXT: mov x22, x0
-; CHECK-SD-FP16-NEXT: mov x0, x2
-; CHECK-SD-FP16-NEXT: mov x1, x3
-; CHECK-SD-FP16-NEXT: mov x19, x5
-; CHECK-SD-FP16-NEXT: mov x20, x4
+; CHECK-SD-FP16-NEXT: mov x0, x4
+; CHECK-SD-FP16-NEXT: mov x1, x5
+; CHECK-SD-FP16-NEXT: mov x19, x3
+; CHECK-SD-FP16-NEXT: mov x20, x2
; CHECK-SD-FP16-NEXT: bl __floattihf
; CHECK-SD-FP16-NEXT: mov x0, x22
; CHECK-SD-FP16-NEXT: mov x1, x21
; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-FP16-NEXT: bl __floattihf
-; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-SD-FP16-NEXT: mov x0, x20
; CHECK-SD-FP16-NEXT: mov x1, x19
-; CHECK-SD-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-FP16-NEXT: bl __floattihf
; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-SD-FP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-FP16-NEXT: mov v1.h[2], v0.h[0]
-; CHECK-SD-FP16-NEXT: fmov d0, d1
-; CHECK-SD-FP16-NEXT: add sp, sp, #64
+; CHECK-SD-FP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-FP16-NEXT: add sp, sp, #80
; CHECK-SD-FP16-NEXT: ret
;
; CHECK-GI-NOFP16-LABEL: stofp_v3i128_v3f16:
@@ -6200,11 +6202,11 @@ entry:
define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
; CHECK-SD-NOFP16-LABEL: utofp_v3i128_v3f16:
; CHECK-SD-NOFP16: // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT: sub sp, sp, #64
-; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NOFP16-NEXT: sub sp, sp, #80
+; CHECK-SD-NOFP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NOFP16-NEXT: .cfi_offset w19, -8
; CHECK-SD-NOFP16-NEXT: .cfi_offset w20, -16
; CHECK-SD-NOFP16-NEXT: .cfi_offset w21, -24
@@ -6212,40 +6214,41 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
; CHECK-SD-NOFP16-NEXT: .cfi_offset w30, -48
; CHECK-SD-NOFP16-NEXT: mov x21, x1
; CHECK-SD-NOFP16-NEXT: mov x22, x0
-; CHECK-SD-NOFP16-NEXT: mov x0, x2
-; CHECK-SD-NOFP16-NEXT: mov x1, x3
-; CHECK-SD-NOFP16-NEXT: mov x19, x5
-; CHECK-SD-NOFP16-NEXT: mov x20, x4
+; CHECK-SD-NOFP16-NEXT: mov x0, x4
+; CHECK-SD-NOFP16-NEXT: mov x1, x5
+; CHECK-SD-NOFP16-NEXT: mov x19, x3
+; CHECK-SD-NOFP16-NEXT: mov x20, x2
; CHECK-SD-NOFP16-NEXT: bl __floatuntisf
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
; CHECK-SD-NOFP16-NEXT: mov x0, x22
; CHECK-SD-NOFP16-NEXT: mov x1, x21
-; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NOFP16-NEXT: bl __floatuntisf
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
-; CHECK-SD-NOFP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NOFP16-NEXT: mov x0, x20
; CHECK-SD-NOFP16-NEXT: mov x1, x19
-; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NOFP16-NEXT: bl __floatuntisf
-; CHECK-SD-NOFP16-NEXT: fcvt h1, s0
-; CHECK-SD-NOFP16-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NOFP16-NEXT: add sp, sp, #64
+; CHECK-SD-NOFP16-NEXT: add sp, sp, #80
; CHECK-SD-NOFP16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: utofp_v3i128_v3f16:
; CHECK-SD-FP16: // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT: sub sp, sp, #64
-; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-FP16-NEXT: sub sp, sp, #80
+; CHECK-SD-FP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-FP16-NEXT: .cfi_offset w19, -8
; CHECK-SD-FP16-NEXT: .cfi_offset w20, -16
; CHECK-SD-FP16-NEXT: .cfi_offset w21, -24
@@ -6253,31 +6256,32 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
; CHECK-SD-FP16-NEXT: .cfi_offset w30, -48
; CHECK-SD-FP16-NEXT: mov x21, x1
; CHECK-SD-FP16-NEXT: mov x22, x0
-; CHECK-SD-FP16-NEXT: mov x0, x2
-; CHECK-SD-FP16-NEXT: mov x1, x3
-; CHECK-SD-FP16-NEXT: mov x19, x5
-; CHECK-SD-FP16-NEXT: mov x20, x4
+; CHECK-SD-FP16-NEXT: mov x0, x4
+; CHECK-SD-FP16-NEXT: mov x1, x5
+; CHECK-SD-FP16-NEXT: mov x19, x3
+; CHECK-SD-FP16-NEXT: mov x20, x2
; CHECK-SD-FP16-NEXT: bl __floatuntihf
; CHECK-SD-FP16-NEXT: mov x0, x22
; CHECK-SD-FP16-NEXT: mov x1, x21
; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-FP16-NEXT: bl __floatuntihf
-; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-SD-FP16-NEXT: mov x0, x20
; CHECK-SD-FP16-NEXT: mov x1, x19
-; CHECK-SD-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-FP16-NEXT: bl __floatuntihf
; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-SD-FP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-FP16-NEXT: mov v1.h[2], v0.h[0]
-; CHECK-SD-FP16-NEXT: fmov d0, d1
-; CHECK-SD-FP16-NEXT: add sp, sp, #64
+; CHECK-SD-FP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-FP16-NEXT: add sp, sp, #80
; CHECK-SD-FP16-NEXT: ret
;
; CHECK-GI-NOFP16-LABEL: utofp_v3i128_v3f16:
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index ce5b80f3e2be4..2f193cefd7c0c 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -1245,12 +1245,10 @@ entry:
define <2 x i128> @sext_v2i64_v2i128(<2 x i64> %a) {
; CHECK-SD-LABEL: sext_v2i64_v2i128:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov x8, v0.d[1]
-; CHECK-SD-NEXT: dup v1.2d, v0.d[1]
+; CHECK-SD-NEXT: mov x2, v0.d[1]
; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: fmov x2, d1
; CHECK-SD-NEXT: asr x1, x0, #63
-; CHECK-SD-NEXT: asr x3, x8, #63
+; CHECK-SD-NEXT: asr x3, x2, #63
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v2i64_v2i128:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
index ffef6f74f2d36..e4eda2e3a1e32 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
@@ -100,22 +100,17 @@ define void @bitcast_v2i16(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: bitcast_v2i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #32
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
-; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2]
-; NONEON-NOSVE-NEXT: str w8, [sp, #4]
+; NONEON-NOSVE-NEXT: sub sp, sp, #16
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrh w8, [x0]
-; NONEON-NOSVE-NEXT: str w8, [sp]
+; NONEON-NOSVE-NEXT: ldrh w9, [x0, #2]
+; NONEON-NOSVE-NEXT: strh w9, [sp, #2]
+; NONEON-NOSVE-NEXT: strh w8, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #18]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
-; NONEON-NOSVE-NEXT: str d0, [sp, #24]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
; NONEON-NOSVE-NEXT: str w8, [x1]
-; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%load = load volatile <2 x i16>, ptr %a
%cast = bitcast <2 x i16> %load to <2 x half>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 7d6336a43a4fd..4d524bc848de6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -148,75 +148,38 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap) {
;
; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: str q0, [sp]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i8>, ptr %ap
%val = sext <16 x i8> %a to <16 x i32>
@@ -291,18 +254,12 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
; NONEON-NOSVE-LABEL: load_sext_v4i32i256:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #24]
-; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48]
-; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #16]
-; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp x11, x9, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp x12, x13, [sp, #80]
+; NONEON-NOSVE-NEXT: ldpsw x11, x9, [sp, #16]
+; NONEON-NOSVE-NEXT: ldpsw x12, x13, [sp, #24]
; NONEON-NOSVE-NEXT: asr x10, x9, #63
; NONEON-NOSVE-NEXT: asr x14, x11, #63
; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #112]
@@ -315,7 +272,7 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
; NONEON-NOSVE-NEXT: stp x13, x9, [x8, #32]
; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #16]
; NONEON-NOSVE-NEXT: stp x12, x10, [x8]
-; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%a = load <4 x i32>, ptr %ap
%val = sext <4 x i32> %a to <4 x i256>
@@ -327,52 +284,28 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: mov z1.d, z0.d[1]
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: asr x8, x8, #63
-; CHECK-NEXT: fmov d3, x8
-; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: asr x9, x9, #63
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: zip1 z0.d, z0.d, z3.d
-; CHECK-NEXT: mov z3.d, x9
-; CHECK-NEXT: fmov x2, d2
-; CHECK-NEXT: zip1 z1.d, z1.d, z4.d
-; CHECK-NEXT: mov z4.d, z2.d[1]
-; CHECK-NEXT: mov z5.d, z0.d[1]
-; CHECK-NEXT: mov z6.d, z3.d[1]
; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: fmov x6, d3
-; CHECK-NEXT: mov z2.d, z1.d[1]
-; CHECK-NEXT: fmov x3, d4
-; CHECK-NEXT: fmov x1, d5
; CHECK-NEXT: fmov x4, d1
-; CHECK-NEXT: fmov x7, d6
-; CHECK-NEXT: fmov x5, d2
+; CHECK-NEXT: asr x1, x0, #63
+; CHECK-NEXT: mov x2, x1
+; CHECK-NEXT: mov x3, x1
+; CHECK-NEXT: asr x5, x4, #63
+; CHECK-NEXT: mov x6, x5
+; CHECK-NEXT: mov x7, x5
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: load_sext_v2i64i256:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #144
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: str q0, [sp]
-; NONEON-NOSVE-NEXT: ldp x8, x10, [sp]
-; NONEON-NOSVE-NEXT: asr x9, x8, #63
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
-; NONEON-NOSVE-NEXT: asr x8, x10, #63
-; NONEON-NOSVE-NEXT: stp x9, x9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp x10, x8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp x8, x8, [sp, #48]
-; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp x0, x1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp x2, x3, [sp, #80]
-; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp x4, x5, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp x6, x7, [sp, #112]
-; NONEON-NOSVE-NEXT: add sp, sp, #144
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldp x0, x4, [sp], #16
+; NONEON-NOSVE-NEXT: asr x1, x0, #63
+; NONEON-NOSVE-NEXT: asr x5, x4, #63
+; NONEON-NOSVE-NEXT: mov x2, x1
+; NONEON-NOSVE-NEXT: mov x3, x1
+; NONEON-NOSVE-NEXT: mov x6, x5
+; NONEON-NOSVE-NEXT: mov x7, x5
; NONEON-NOSVE-NEXT: ret
%a = load <2 x i64>, ptr %ap
%val = sext <2 x i64> %a to <2 x i256>
@@ -410,88 +343,51 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap) {
;
; NONEON-NOSVE-LABEL: load_zext_v16i16i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #336
-; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT: sub sp, sp, #192
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #316]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #308]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload
-; NONEON-NOSVE-NEXT: str wzr, [sp, #300]
; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #292]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #284]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #276]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #268]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #260]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #120]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #104]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #184]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #176]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #168]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: str d1, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #248]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #240]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT: str d0, [sp, #168]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #216]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #208]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #192]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #332]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #200]
-; NONEON-NOSVE-NEXT: str w8, [sp, #312]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #192]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT: str w9, [sp, #296]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #288]
-; NONEON-NOSVE-NEXT: str w9, [sp, #280]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT: str w9, [sp, #264]
-; NONEON-NOSVE-NEXT: str w8, [sp, #256]
-; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #256]
-; NONEON-NOSVE-NEXT: add sp, sp, #336
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #160]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #160]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #152]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #136]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128]
+; NONEON-NOSVE-NEXT: add sp, sp, #192
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %ap
%val = zext <16 x i16> %a to <16 x i64>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index c96189b960268..56149e99b15f8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -440,25 +440,20 @@ define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) {
; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldr w8, [x0]
-; NONEON-NOSVE-NEXT: str w8, [sp, #-48]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: str w8, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
-; NONEON-NOSVE-NEXT: str d0, [sp, #24]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: str q0, [x1]
-; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x half>, ptr %a
%res = fpext <2 x half> %op1 to <2 x double>
@@ -480,35 +475,27 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr d0, [x0]
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt d1, s0
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
+; NONEON-NOSVE-NEXT: fcvt d0, s0
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
-; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
-; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x half>, ptr %a
%res = fpext <4 x half> %op1 to <4 x double>
@@ -537,61 +524,44 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: str q0, [sp]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt d1, s0
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
+; NONEON-NOSVE-NEXT: fcvt d0, s0
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt d1, s0
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
+; NONEON-NOSVE-NEXT: fcvt d0, s0
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt d1, s0
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
+; NONEON-NOSVE-NEXT: fcvt d0, s0
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #92]
-; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #88]
-; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #84]
-; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #80]
-; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #76]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #72]
-; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #68]
-; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fpext <8 x half> %op1 to <8 x double>
@@ -637,115 +607,79 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #336
-; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT: sub sp, sp, #192
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt d1, s0
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80]
+; NONEON-NOSVE-NEXT: fcvt d0, s0
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72]
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #66]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #64]
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #128]
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #70]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #68]
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120]
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
-; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112]
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
-; NONEON-NOSVE-NEXT: fcvt s1, h0
-; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
; NONEON-NOSVE-NEXT: fcvt s0, h0
-; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT: str d1, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT: str d0, [sp, #168]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #164]
-; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #160]
-; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #156]
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #152]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #148]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #144]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #140]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #136]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #332]
-; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #328]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #188]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #184]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #180]
-; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #176]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #172]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d1, s0
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #168]
+; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt d0, s0
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128]
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #336
+; NONEON-NOSVE-NEXT: add sp, sp, #192
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fpext <16 x half> %op1 to <16 x double>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 40c8ab27c0b02..75911e5ff1569 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -30,50 +30,32 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v8i1_v8i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #22]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrh w10, [sp, #44]
-; NONEON-NOSVE-NEXT: ldrh w12, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrh w14, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrh w11, [sp, #34]
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9]
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #8]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13]
; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1
-; NONEON-NOSVE-NEXT: ldrh w13, [sp, #36]
-; NONEON-NOSVE-NEXT: ldrh w15, [sp, #38]
+; NONEON-NOSVE-NEXT: ldrb w13, [sp, #14]
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #15]
; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #72]
+; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #40]
; NONEON-NOSVE-NEXT: sbfx w8, w14, #0, #1
; NONEON-NOSVE-NEXT: sbfx w10, w15, #0, #1
-; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #32]
; NONEON-NOSVE-NEXT: sbfx w12, w13, #0, #1
; NONEON-NOSVE-NEXT: sbfx w8, w11, #0, #1
-; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #24]
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i1> %a to <8 x i32>
store <8 x i32> %b, ptr %out
@@ -206,14 +188,14 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v32i8_v32i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #272
-; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272
+; NONEON-NOSVE-NEXT: sub sp, sp, #208
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
@@ -230,182 +212,146 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrb w30, [sp, #19]
+; NONEON-NOSVE-NEXT: ldrb w29, [sp, #16]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT: add w5, w14, w14
+; NONEON-NOSVE-NEXT: sxtb w6, w17
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17]
+; NONEON-NOSVE-NEXT: sxtb w5, w5
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT: add w8, w29, w29
-; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT: add w8, w27, w27
-; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w8, w25, w25
+; NONEON-NOSVE-NEXT: ldrb w27, [sp, #30]
+; NONEON-NOSVE-NEXT: strh w6, [sp, #78]
+; NONEON-NOSVE-NEXT: add w6, w30, w30
+; NONEON-NOSVE-NEXT: sxtb w8, w8
+; NONEON-NOSVE-NEXT: ldrb w28, [sp, #31]
+; NONEON-NOSVE-NEXT: sxtb w6, w6
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT: add w8, w23, w23
-; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT: add w8, w21, w21
-; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT: add w8, w19, w19
-; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT: add w9, w28, w28
-; NONEON-NOSVE-NEXT: add w18, w16, w16
-; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w8, w6, w6
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
+; NONEON-NOSVE-NEXT: strh w5, [sp, #72]
+; NONEON-NOSVE-NEXT: add w5, w29, w29
+; NONEON-NOSVE-NEXT: ldrb w25, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w26, [sp, #29]
+; NONEON-NOSVE-NEXT: sxtb w9, w9
+; NONEON-NOSVE-NEXT: sxtb w5, w5
+; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
+; NONEON-NOSVE-NEXT: add w8, w27, w27
+; NONEON-NOSVE-NEXT: ldrb w23, [sp, #26]
+; NONEON-NOSVE-NEXT: strh w6, [sp, #70]
+; NONEON-NOSVE-NEXT: add w6, w28, w28
+; NONEON-NOSVE-NEXT: sxtb w8, w8
+; NONEON-NOSVE-NEXT: ldrb w24, [sp, #27]
+; NONEON-NOSVE-NEXT: sxtb w6, w6
+; NONEON-NOSVE-NEXT: strh w9, [sp, #66]
; NONEON-NOSVE-NEXT: add w9, w26, w26
-; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT: add w8, w4, w4
+; NONEON-NOSVE-NEXT: strh w5, [sp, #64]
+; NONEON-NOSVE-NEXT: add w5, w25, w25
+; NONEON-NOSVE-NEXT: sxtb w9, w9
+; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
+; NONEON-NOSVE-NEXT: sxtb w8, w5
+; NONEON-NOSVE-NEXT: ldrb w21, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w22, [sp, #25]
+; NONEON-NOSVE-NEXT: strh w6, [sp, #62]
+; NONEON-NOSVE-NEXT: add w6, w24, w24
+; NONEON-NOSVE-NEXT: add w5, w23, w23
+; NONEON-NOSVE-NEXT: strh w9, [sp, #58]
+; NONEON-NOSVE-NEXT: sxtb w9, w6
+; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
+; NONEON-NOSVE-NEXT: sxtb w8, w5
+; NONEON-NOSVE-NEXT: ldrb w4, [sp, #38]
+; NONEON-NOSVE-NEXT: ldrb w20, [sp, #39]
+; NONEON-NOSVE-NEXT: add w6, w22, w22
+; NONEON-NOSVE-NEXT: add w5, w21, w21
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #21]
+; NONEON-NOSVE-NEXT: strh w9, [sp, #54]
+; NONEON-NOSVE-NEXT: sxtb w9, w6
+; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
+; NONEON-NOSVE-NEXT: sxtb w8, w5
+; NONEON-NOSVE-NEXT: ldrb w2, [sp, #36]
+; NONEON-NOSVE-NEXT: ldrb w3, [sp, #37]
+; NONEON-NOSVE-NEXT: add w6, w20, w20
+; NONEON-NOSVE-NEXT: add w4, w4, w4
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
+; NONEON-NOSVE-NEXT: add w18, w15, w15
+; NONEON-NOSVE-NEXT: strh w9, [sp, #50]
+; NONEON-NOSVE-NEXT: sxtb w9, w6
+; NONEON-NOSVE-NEXT: sxtb w19, w18
+; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
+; NONEON-NOSVE-NEXT: sxtb w8, w4
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #34]
+; NONEON-NOSVE-NEXT: ldrb w0, [sp, #35]
+; NONEON-NOSVE-NEXT: add w3, w3, w3
+; NONEON-NOSVE-NEXT: add w2, w2, w2
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: strh w9, [sp, #110]
+; NONEON-NOSVE-NEXT: sxtb w9, w3
+; NONEON-NOSVE-NEXT: strh w8, [sp, #108]
+; NONEON-NOSVE-NEXT: sxtb w8, w2
+; NONEON-NOSVE-NEXT: sxtb w7, w16
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #33]
+; NONEON-NOSVE-NEXT: add w0, w0, w0
+; NONEON-NOSVE-NEXT: add w18, w18, w18
+; NONEON-NOSVE-NEXT: strh w9, [sp, #106]
+; NONEON-NOSVE-NEXT: sxtb w9, w0
+; NONEON-NOSVE-NEXT: strh w8, [sp, #104]
+; NONEON-NOSVE-NEXT: sxtb w8, w18
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT: add w9, w24, w24
-; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: add w8, w2, w2
-; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT: add w17, w17, w17
-; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT: add w9, w22, w22
-; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT: add w8, w16, w16
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT: add w17, w30, w30
-; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT: add w9, w20, w20
-; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT: strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #61]
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: strh w9, [sp, #102]
+; NONEON-NOSVE-NEXT: sxtb w9, w17
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #100]
+; NONEON-NOSVE-NEXT: sxtb w8, w16
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT: add w9, w7, w7
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: add w14, w14, w14
+; NONEON-NOSVE-NEXT: strh w9, [sp, #98]
+; NONEON-NOSVE-NEXT: sxtb w9, w15
+; NONEON-NOSVE-NEXT: strh w8, [sp, #96]
+; NONEON-NOSVE-NEXT: sxtb w8, w14
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT: add w9, w5, w5
-; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT: add w9, w3, w3
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT: add w9, w0, w0
-; NONEON-NOSVE-NEXT: strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90]
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87]
-; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85]
-; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #144]
+; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: add w12, w12, w12
+; NONEON-NOSVE-NEXT: strh w9, [sp, #94]
+; NONEON-NOSVE-NEXT: sxtb w9, w13
+; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
+; NONEON-NOSVE-NEXT: sxtb w8, w12
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: strh w9, [sp, #90]
+; NONEON-NOSVE-NEXT: sxtb w9, w11
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
+; NONEON-NOSVE-NEXT: sxtb w8, w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: strh w9, [sp, #86]
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: sxtb w9, w11
+; NONEON-NOSVE-NEXT: strh w7, [sp, #76]
+; NONEON-NOSVE-NEXT: sxtb w8, w10
+; NONEON-NOSVE-NEXT: strh w19, [sp, #74]
+; NONEON-NOSVE-NEXT: strh w9, [sp, #82]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #272
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #208
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
@@ -431,42 +377,24 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v8i8_v8i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #18]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #36]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i8> %a to <8 x i32>
store <8 x i32>%b, ptr %out
@@ -492,75 +420,39 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v16i8_v16i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%b = sext <16 x i8> %a to <16 x i32>
store <16 x i32> %b, ptr %out
@@ -599,14 +491,14 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v32i8_v32i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #464
-; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #368] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #384] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #400] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #416] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #432] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #448] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 464
+; NONEON-NOSVE-NEXT: sub sp, sp, #272
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
@@ -621,258 +513,136 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #18]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #19]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45]
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #17]
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: ldrb w30, [sp, #23]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT: sxtb w6, w17
+; NONEON-NOSVE-NEXT: sxtb w7, w16
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT: add w5, w14, w14
+; NONEON-NOSVE-NEXT: add w18, w15, w15
+; NONEON-NOSVE-NEXT: sxtb w19, w18
+; NONEON-NOSVE-NEXT: ldrb w29, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21]
+; NONEON-NOSVE-NEXT: stp w7, w6, [sp, #104]
+; NONEON-NOSVE-NEXT: add w6, w30, w30
+; NONEON-NOSVE-NEXT: sxtb w5, w5
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT: add w8, w29, w29
-; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT: add w8, w27, w27
-; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w8, w25, w25
+; NONEON-NOSVE-NEXT: ldrb w27, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrb w28, [sp, #27]
+; NONEON-NOSVE-NEXT: sxtb w6, w6
+; NONEON-NOSVE-NEXT: sxtb w8, w8
+; NONEON-NOSVE-NEXT: stp w5, w19, [sp, #96]
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT: add w8, w23, w23
-; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT: add w8, w21, w21
-; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT: add w8, w19, w19
-; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT: add w9, w28, w28
-; NONEON-NOSVE-NEXT: add w18, w16, w16
-; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w8, w6, w6
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
+; NONEON-NOSVE-NEXT: add w5, w29, w29
+; NONEON-NOSVE-NEXT: ldrb w25, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w26, [sp, #25]
+; NONEON-NOSVE-NEXT: sxtb w9, w9
+; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #88]
+; NONEON-NOSVE-NEXT: add w6, w28, w28
+; NONEON-NOSVE-NEXT: sxtb w5, w5
+; NONEON-NOSVE-NEXT: add w8, w27, w27
+; NONEON-NOSVE-NEXT: sxtb w6, w6
+; NONEON-NOSVE-NEXT: sxtb w8, w8
+; NONEON-NOSVE-NEXT: ldrb w23, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrb w24, [sp, #31]
+; NONEON-NOSVE-NEXT: stp w5, w9, [sp, #80]
; NONEON-NOSVE-NEXT: add w9, w26, w26
-; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT: add w8, w4, w4
-; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT: add w9, w24, w24
-; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: add w8, w2, w2
-; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
+; NONEON-NOSVE-NEXT: add w5, w25, w25
+; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #72]
+; NONEON-NOSVE-NEXT: sxtb w9, w9
+; NONEON-NOSVE-NEXT: sxtb w8, w5
+; NONEON-NOSVE-NEXT: ldrb w21, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w22, [sp, #29]
+; NONEON-NOSVE-NEXT: add w6, w24, w24
+; NONEON-NOSVE-NEXT: add w5, w23, w23
+; NONEON-NOSVE-NEXT: ldrb w4, [sp, #34]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT: sxtb w9, w6
+; NONEON-NOSVE-NEXT: sxtb w8, w5
+; NONEON-NOSVE-NEXT: ldrb w20, [sp, #35]
+; NONEON-NOSVE-NEXT: add w6, w22, w22
+; NONEON-NOSVE-NEXT: add w5, w21, w21
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT: sxtb w9, w6
+; NONEON-NOSVE-NEXT: sxtb w8, w5
+; NONEON-NOSVE-NEXT: ldrb w2, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrb w3, [sp, #33]
+; NONEON-NOSVE-NEXT: add w6, w20, w20
+; NONEON-NOSVE-NEXT: add w4, w4, w4
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT: sxtb w9, w6
+; NONEON-NOSVE-NEXT: sxtb w8, w4
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #38]
+; NONEON-NOSVE-NEXT: ldrb w0, [sp, #39]
+; NONEON-NOSVE-NEXT: add w3, w3, w3
+; NONEON-NOSVE-NEXT: add w2, w2, w2
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #36]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168]
+; NONEON-NOSVE-NEXT: sxtb w9, w3
+; NONEON-NOSVE-NEXT: sxtb w8, w2
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #37]
+; NONEON-NOSVE-NEXT: add w0, w0, w0
+; NONEON-NOSVE-NEXT: add w18, w18, w18
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
+; NONEON-NOSVE-NEXT: sxtb w9, w0
+; NONEON-NOSVE-NEXT: sxtb w8, w18
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #42]
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #43]
; NONEON-NOSVE-NEXT: add w17, w17, w17
-; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT: add w9, w22, w22
-; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT: add w8, w16, w16
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT: add w17, w30, w30
-; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT: add w9, w20, w20
-; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT: strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT: add w9, w7, w7
-; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT: add w9, w5, w5
-; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT: add w9, w3, w3
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT: add w9, w0, w0
-; NONEON-NOSVE-NEXT: strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94]
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #448] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #432] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83]
-; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #416] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81]
-; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #400] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #384] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87]
-; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #368] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #182]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #198]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT: str w8, [sp, #284]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #196]
-; NONEON-NOSVE-NEXT: str w8, [sp, #280]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #194]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #276]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #192]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #206]
-; NONEON-NOSVE-NEXT: str w8, [sp, #300]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #204]
-; NONEON-NOSVE-NEXT: str w8, [sp, #296]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #202]
-; NONEON-NOSVE-NEXT: str w8, [sp, #292]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #200]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #180]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #272]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #178]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #176]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #190]
-; NONEON-NOSVE-NEXT: str w8, [sp, #268]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #188]
-; NONEON-NOSVE-NEXT: str w8, [sp, #264]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #186]
-; NONEON-NOSVE-NEXT: str w8, [sp, #260]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #184]
-; NONEON-NOSVE-NEXT: str w8, [sp, #256]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #230]
-; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #240]
-; NONEON-NOSVE-NEXT: str w8, [sp, #348]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #228]
-; NONEON-NOSVE-NEXT: str w8, [sp, #344]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #226]
-; NONEON-NOSVE-NEXT: str w8, [sp, #340]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #224]
-; NONEON-NOSVE-NEXT: str w8, [sp, #336]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #238]
-; NONEON-NOSVE-NEXT: str w8, [sp, #364]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #236]
-; NONEON-NOSVE-NEXT: str w8, [sp, #360]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #234]
-; NONEON-NOSVE-NEXT: str w8, [sp, #356]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #232]
-; NONEON-NOSVE-NEXT: str w8, [sp, #352]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #214]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #336]
-; NONEON-NOSVE-NEXT: str w8, [sp, #316]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #212]
-; NONEON-NOSVE-NEXT: str w8, [sp, #312]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #210]
-; NONEON-NOSVE-NEXT: str w8, [sp, #308]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #222]
-; NONEON-NOSVE-NEXT: str w8, [sp, #332]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #220]
-; NONEON-NOSVE-NEXT: str w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #218]
-; NONEON-NOSVE-NEXT: str w8, [sp, #324]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #216]
-; NONEON-NOSVE-NEXT: str w8, [sp, #320]
-; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #304]
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
+; NONEON-NOSVE-NEXT: sxtb w9, w17
+; NONEON-NOSVE-NEXT: sxtb w8, w16
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w13, [sp, #41]
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: add w14, w14, w14
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
+; NONEON-NOSVE-NEXT: sxtb w9, w15
+; NONEON-NOSVE-NEXT: sxtb w8, w14
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #47]
+; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: add w12, w12, w12
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
+; NONEON-NOSVE-NEXT: sxtb w9, w13
+; NONEON-NOSVE-NEXT: sxtb w8, w12
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
+; NONEON-NOSVE-NEXT: sxtb w9, w11
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sxtb w8, w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80]
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: sxtb w11, w11
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
+; NONEON-NOSVE-NEXT: sxtb w8, w10
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #144]
+; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #112]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #464
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #272
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
@@ -944,57 +714,26 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v8i8_v8i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #176
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 176
+; NONEON-NOSVE-NEXT: sub sp, sp, #80
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: add x8, sp, #144
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #22]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #34]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #38]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #80]
-; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #96]
-; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #144]
-; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #104]
-; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #160]
-; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #80]
-; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #112]
-; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #88]
-; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp q2, q3, [x8]
-; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #9]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #8]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #11]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #10]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #13]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #12]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #15]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #14]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #176
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #80
; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i8> %a to <8 x i64>
store <8 x i64>%b, ptr %out
@@ -1034,109 +773,43 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v16i8_v16i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #368
-; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT: str q0, [sp]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #35]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #34]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #33]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #32]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #39]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #38]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #37]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #36]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #102]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #100]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: str d0, [sp, #360]
-; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #192]
-; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #104]
-; NONEON-NOSVE-NEXT: str d2, [sp, #168]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #216]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #320]
-; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #364]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #360]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #336]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #200]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #320]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #208]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #304]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #184]
-; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #288]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #192]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #256]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #176]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240]
-; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #224]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #25]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #24]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #27]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #26]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #29]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #28]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #31]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #30]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #17]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #96]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #19]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #18]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #21]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #20]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #64]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #23]
+; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64]
; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #368
+; NONEON-NOSVE-NEXT: add sp, sp, #160
; NONEON-NOSVE-NEXT: ret
%b = sext <16 x i8> %a to <16 x i64>
store <16 x i64> %b, ptr %out
@@ -1208,14 +881,14 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: sub sp, sp, #752
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 848
+; NONEON-NOSVE-NEXT: sub sp, sp, #400
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #320] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #336] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #352] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #368] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #384] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 400
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
@@ -1230,345 +903,144 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #17]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47]
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18]
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #19]
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sxtb x19, w17
+; NONEON-NOSVE-NEXT: sxtb x20, w16
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT: add w7, w14, w14
+; NONEON-NOSVE-NEXT: add w18, w15, w15
+; NONEON-NOSVE-NEXT: sxtb x21, w18
+; NONEON-NOSVE-NEXT: ldrb w29, [sp, #22]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23]
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #160]
+; NONEON-NOSVE-NEXT: add w19, w30, w30
+; NONEON-NOSVE-NEXT: sxtb x7, w7
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT: add w8, w29, w29
-; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT: add w8, w27, w27
-; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w8, w25, w25
+; NONEON-NOSVE-NEXT: ldrb w27, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w28, [sp, #25]
+; NONEON-NOSVE-NEXT: sxtb x19, w19
+; NONEON-NOSVE-NEXT: sxtb x8, w8
+; NONEON-NOSVE-NEXT: stp x7, x21, [sp, #144]
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT: add w8, w23, w23
-; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT: add w8, w21, w21
+; NONEON-NOSVE-NEXT: add w7, w29, w29
+; NONEON-NOSVE-NEXT: ldrb w25, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrb w26, [sp, #27]
+; NONEON-NOSVE-NEXT: sxtb x9, w9
+; NONEON-NOSVE-NEXT: stp x8, x19, [sp, #128]
+; NONEON-NOSVE-NEXT: add w19, w28, w28
+; NONEON-NOSVE-NEXT: sxtb x7, w7
+; NONEON-NOSVE-NEXT: add w8, w27, w27
+; NONEON-NOSVE-NEXT: sxtb x19, w19
+; NONEON-NOSVE-NEXT: sxtb x8, w8
+; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT: add w8, w19, w19
-; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT: add w9, w28, w28
-; NONEON-NOSVE-NEXT: add w18, w16, w16
-; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w8, w6, w6
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
+; NONEON-NOSVE-NEXT: stp x7, x9, [sp, #112]
; NONEON-NOSVE-NEXT: add w9, w26, w26
-; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT: add w8, w4, w4
-; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT: add w9, w24, w24
-; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: add w8, w2, w2
-; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT: add w17, w17, w17
-; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT: add w9, w22, w22
-; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT: add w8, w16, w16
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT: add w17, w30, w30
-; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT: add w9, w20, w20
-; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT: strb w18, [sp, #62]
+; NONEON-NOSVE-NEXT: add w7, w25, w25
+; NONEON-NOSVE-NEXT: stp x8, x19, [sp, #96]
+; NONEON-NOSVE-NEXT: sxtb x9, w9
+; NONEON-NOSVE-NEXT: sxtb x8, w7
+; NONEON-NOSVE-NEXT: ldrb w6, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrb w22, [sp, #31]
+; NONEON-NOSVE-NEXT: add w19, w24, w24
+; NONEON-NOSVE-NEXT: add w7, w23, w23
+; NONEON-NOSVE-NEXT: ldrb w4, [sp, #32]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80]
+; NONEON-NOSVE-NEXT: sxtb x9, w19
+; NONEON-NOSVE-NEXT: sxtb x8, w7
+; NONEON-NOSVE-NEXT: ldrb w5, [sp, #33]
+; NONEON-NOSVE-NEXT: add w19, w22, w22
+; NONEON-NOSVE-NEXT: add w6, w6, w6
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT: sxtb x9, w19
+; NONEON-NOSVE-NEXT: sxtb x8, w6
+; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT: add w9, w7, w7
-; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT: add w9, w5, w5
-; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT: add w9, w3, w3
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT: add w9, w0, w0
-; NONEON-NOSVE-NEXT: strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94]
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #178]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #194]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT: str w8, [sp, #276]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #192]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #198]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #284]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #196]
-; NONEON-NOSVE-NEXT: str w8, [sp, #280]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #202]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #272]
-; NONEON-NOSVE-NEXT: str w8, [sp, #292]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #200]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #206]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #400]
-; NONEON-NOSVE-NEXT: str w8, [sp, #300]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #204]
-; NONEON-NOSVE-NEXT: str w8, [sp, #296]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #176]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #288]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #182]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #180]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #186]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #416]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #240]
-; NONEON-NOSVE-NEXT: str w8, [sp, #260]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #184]
-; NONEON-NOSVE-NEXT: str w8, [sp, #256]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #190]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #368]
-; NONEON-NOSVE-NEXT: str w8, [sp, #268]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #188]
-; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #372]
-; NONEON-NOSVE-NEXT: str w8, [sp, #264]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #226]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #256]
-; NONEON-NOSVE-NEXT: str w8, [sp, #340]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #224]
-; NONEON-NOSVE-NEXT: str w8, [sp, #336]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #230]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #384]
-; NONEON-NOSVE-NEXT: str w8, [sp, #348]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #228]
-; NONEON-NOSVE-NEXT: str w8, [sp, #344]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #234]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #336]
-; NONEON-NOSVE-NEXT: str w8, [sp, #356]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #232]
-; NONEON-NOSVE-NEXT: str w8, [sp, #352]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #238]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #464]
-; NONEON-NOSVE-NEXT: str w8, [sp, #364]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #236]
-; NONEON-NOSVE-NEXT: str w8, [sp, #360]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #210]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #352]
-; NONEON-NOSVE-NEXT: str w8, [sp, #308]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #214]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #480]
-; NONEON-NOSVE-NEXT: str w8, [sp, #316]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #212]
-; NONEON-NOSVE-NEXT: str w8, [sp, #312]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #218]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #304]
-; NONEON-NOSVE-NEXT: str w8, [sp, #324]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #216]
-; NONEON-NOSVE-NEXT: str w8, [sp, #320]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #222]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #432]
-; NONEON-NOSVE-NEXT: str w8, [sp, #332]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #220]
-; NONEON-NOSVE-NEXT: str w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #404]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #320]
-; NONEON-NOSVE-NEXT: str x8, [sp, #568]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #400]
-; NONEON-NOSVE-NEXT: str x8, [sp, #560]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #412]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #448]
-; NONEON-NOSVE-NEXT: str x8, [sp, #584]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #408]
-; NONEON-NOSVE-NEXT: str x8, [sp, #576]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #420]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #560]
-; NONEON-NOSVE-NEXT: str x8, [sp, #600]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #416]
-; NONEON-NOSVE-NEXT: str x8, [sp, #592]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #428]
-; NONEON-NOSVE-NEXT: str x8, [sp, #616]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #424]
-; NONEON-NOSVE-NEXT: str x8, [sp, #608]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #368]
-; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #592]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #496]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #380]
-; NONEON-NOSVE-NEXT: str x8, [sp, #520]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #376]
-; NONEON-NOSVE-NEXT: str x8, [sp, #512]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #388]
-; NONEON-NOSVE-NEXT: ldp q4, q5, [sp, #496]
-; NONEON-NOSVE-NEXT: str x8, [sp, #536]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #384]
-; NONEON-NOSVE-NEXT: str x8, [sp, #528]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #396]
-; NONEON-NOSVE-NEXT: str x8, [sp, #552]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #392]
-; NONEON-NOSVE-NEXT: str x8, [sp, #544]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #468]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #528]
-; NONEON-NOSVE-NEXT: str x8, [sp, #696]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #464]
-; NONEON-NOSVE-NEXT: str x8, [sp, #688]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #476]
-; NONEON-NOSVE-NEXT: str x8, [sp, #712]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #472]
-; NONEON-NOSVE-NEXT: str x8, [sp, #704]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #484]
-; NONEON-NOSVE-NEXT: ldp q16, q17, [sp, #688]
-; NONEON-NOSVE-NEXT: str x8, [sp, #728]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #480]
-; NONEON-NOSVE-NEXT: str x8, [sp, #720]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #492]
-; NONEON-NOSVE-NEXT: str x8, [sp, #744]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #488]
-; NONEON-NOSVE-NEXT: str x8, [sp, #736]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #436]
-; NONEON-NOSVE-NEXT: ldp q19, q20, [sp, #720]
-; NONEON-NOSVE-NEXT: str x8, [sp, #632]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #432]
-; NONEON-NOSVE-NEXT: str x8, [sp, #624]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #444]
-; NONEON-NOSVE-NEXT: str x8, [sp, #648]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #440]
-; NONEON-NOSVE-NEXT: str x8, [sp, #640]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #452]
-; NONEON-NOSVE-NEXT: ldp q22, q23, [sp, #624]
-; NONEON-NOSVE-NEXT: str x8, [sp, #664]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #448]
-; NONEON-NOSVE-NEXT: str x8, [sp, #656]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #460]
-; NONEON-NOSVE-NEXT: str x8, [sp, #680]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #456]
-; NONEON-NOSVE-NEXT: str x8, [sp, #672]
-; NONEON-NOSVE-NEXT: ldp q21, q18, [sp, #656]
+; NONEON-NOSVE-NEXT: add w5, w5, w5
+; NONEON-NOSVE-NEXT: add w4, w4, w4
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT: sxtb x9, w5
+; NONEON-NOSVE-NEXT: sxtb x8, w4
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #36]
+; NONEON-NOSVE-NEXT: ldrb w0, [sp, #37]
+; NONEON-NOSVE-NEXT: add w3, w3, w3
+; NONEON-NOSVE-NEXT: add w2, w2, w2
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #38]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288]
+; NONEON-NOSVE-NEXT: sxtb x9, w3
+; NONEON-NOSVE-NEXT: sxtb x8, w2
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #39]
+; NONEON-NOSVE-NEXT: add w0, w0, w0
+; NONEON-NOSVE-NEXT: add w18, w18, w18
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272]
+; NONEON-NOSVE-NEXT: sxtb x9, w0
+; NONEON-NOSVE-NEXT: sxtb x8, w18
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #41]
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256]
+; NONEON-NOSVE-NEXT: sxtb x9, w17
+; NONEON-NOSVE-NEXT: sxtb x8, w16
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #42]
+; NONEON-NOSVE-NEXT: ldrb w13, [sp, #43]
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: add w14, w14, w14
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #44]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240]
+; NONEON-NOSVE-NEXT: sxtb x9, w15
+; NONEON-NOSVE-NEXT: sxtb x8, w14
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #45]
+; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: add w12, w12, w12
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224]
+; NONEON-NOSVE-NEXT: sxtb x9, w13
+; NONEON-NOSVE-NEXT: sxtb x8, w12
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #208]
+; NONEON-NOSVE-NEXT: sxtb x9, w11
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sxtb x8, w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144]
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: sxtb x11, w11
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #192]
+; NONEON-NOSVE-NEXT: sxtb x8, w10
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #80]
+; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #176]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp q17, q16, [sp, #272]
+; NONEON-NOSVE-NEXT: ldp q18, q21, [sp, #176]
+; NONEON-NOSVE-NEXT: ldp q20, q19, [sp, #240]
+; NONEON-NOSVE-NEXT: ldp q23, q22, [sp, #208]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #384] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32]
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #368] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64]
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #352] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96]
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #336] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128]
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #320] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160]
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192]
; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224]
-; NONEON-NOSVE-NEXT: add sp, sp, #752
-; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #400
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
@@ -1636,91 +1108,70 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v16i16_v16i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4]
-; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT: ldrh w5, [sp]
-; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14]
+; NONEON-NOSVE-NEXT: ldrh w16, [sp, #6]
+; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2]
+; NONEON-NOSVE-NEXT: ldrh w3, [sp]
+; NONEON-NOSVE-NEXT: ldrh w4, [sp, #12]
+; NONEON-NOSVE-NEXT: ldrh w5, [sp, #14]
; NONEON-NOSVE-NEXT: add w13, w13, w13
-; NONEON-NOSVE-NEXT: add w14, w14, w14
-; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #46]
-; NONEON-NOSVE-NEXT: add w14, w3, w3
-; NONEON-NOSVE-NEXT: strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT: add w13, w5, w5
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: add w12, w12, w12
+; NONEON-NOSVE-NEXT: sxth w16, w16
+; NONEON-NOSVE-NEXT: sxth w13, w13
+; NONEON-NOSVE-NEXT: add w3, w3, w3
+; NONEON-NOSVE-NEXT: sxth w12, w12
+; NONEON-NOSVE-NEXT: ldrh w0, [sp, #8]
+; NONEON-NOSVE-NEXT: ldrh w2, [sp, #10]
+; NONEON-NOSVE-NEXT: stp w13, w16, [sp, #56]
+; NONEON-NOSVE-NEXT: sxth w13, w3
+; NONEON-NOSVE-NEXT: add w16, w5, w5
+; NONEON-NOSVE-NEXT: add w3, w4, w4
+; NONEON-NOSVE-NEXT: ldrh w17, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrh w18, [sp, #22]
+; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #48]
+; NONEON-NOSVE-NEXT: sxth w12, w16
+; NONEON-NOSVE-NEXT: sxth w13, w3
+; NONEON-NOSVE-NEXT: add w16, w2, w2
+; NONEON-NOSVE-NEXT: add w0, w0, w0
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT: add w14, w4, w4
+; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #40]
+; NONEON-NOSVE-NEXT: sxth w12, w16
+; NONEON-NOSVE-NEXT: sxth w13, w0
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT: add w13, w2, w2
-; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #38]
-; NONEON-NOSVE-NEXT: add w14, w0, w0
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strh w13, [sp, #36]
-; NONEON-NOSVE-NEXT: add w13, w18, w18
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: strh w14, [sp, #34]
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28]
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrh w14, [sp, #16]
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #50]
-; NONEON-NOSVE-NEXT: add w14, w17, w17
-; NONEON-NOSVE-NEXT: add w12, w12, w12
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w13, w16, w16
+; NONEON-NOSVE-NEXT: add w16, w18, w18
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #32]
+; NONEON-NOSVE-NEXT: sxth w12, w16
+; NONEON-NOSVE-NEXT: sxth w13, w17
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: add w14, w14, w14
; NONEON-NOSVE-NEXT: add w11, w11, w11
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
; NONEON-NOSVE-NEXT: add w10, w10, w10
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #62]
-; NONEON-NOSVE-NEXT: add w14, w15, w15
-; NONEON-NOSVE-NEXT: strh w13, [sp, #60]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #58]
-; NONEON-NOSVE-NEXT: strh w12, [sp, #56]
-; NONEON-NOSVE-NEXT: strh w11, [sp, #54]
-; NONEON-NOSVE-NEXT: strh w10, [sp, #52]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT: add w9, w9, w9
+; NONEON-NOSVE-NEXT: add w8, w8, w8
+; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #88]
+; NONEON-NOSVE-NEXT: sxth w12, w15
+; NONEON-NOSVE-NEXT: sxth w13, w14
+; NONEON-NOSVE-NEXT: sxth w11, w11
+; NONEON-NOSVE-NEXT: sxth w10, w10
+; NONEON-NOSVE-NEXT: sxth w9, w9
+; NONEON-NOSVE-NEXT: sxth w8, w8
+; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
@@ -1746,24 +1197,18 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v4i16_v4i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #40]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #8]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #14]
+; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #12]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%b = sext <4 x i16> %a to <4 x i64>
store <4 x i64>%b, ptr %out
@@ -1789,39 +1234,27 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v8i16_v8i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #88]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #80]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #24]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #28]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #18]
+; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #22]
+; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #20]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i16> %a to <8 x i64>
store <8 x i64>%b, ptr %out
@@ -1860,124 +1293,75 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v16i16_v16i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #368
-; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT: sub sp, sp, #160
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4]
-; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT: ldrh w5, [sp]
-; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14]
+; NONEON-NOSVE-NEXT: ldrh w13, [sp]
+; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2]
+; NONEON-NOSVE-NEXT: ldrh w12, [sp, #6]
+; NONEON-NOSVE-NEXT: ldrh w3, [sp, #4]
+; NONEON-NOSVE-NEXT: ldrh w4, [sp, #8]
+; NONEON-NOSVE-NEXT: ldrh w5, [sp, #10]
; NONEON-NOSVE-NEXT: add w13, w13, w13
-; NONEON-NOSVE-NEXT: add w14, w14, w14
-; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #54]
-; NONEON-NOSVE-NEXT: add w14, w3, w3
-; NONEON-NOSVE-NEXT: strh w13, [sp, #52]
-; NONEON-NOSVE-NEXT: add w13, w5, w5
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #50]
-; NONEON-NOSVE-NEXT: add w14, w4, w4
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #48]
-; NONEON-NOSVE-NEXT: add w13, w2, w2
-; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #46]
-; NONEON-NOSVE-NEXT: add w14, w0, w0
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT: add w13, w18, w18
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #58]
-; NONEON-NOSVE-NEXT: add w14, w17, w17
+; NONEON-NOSVE-NEXT: add w16, w16, w16
; NONEON-NOSVE-NEXT: add w12, w12, w12
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w13, w16, w16
+; NONEON-NOSVE-NEXT: sxth x16, w16
+; NONEON-NOSVE-NEXT: sxth x13, w13
+; NONEON-NOSVE-NEXT: add w3, w3, w3
+; NONEON-NOSVE-NEXT: sxth x12, w12
+; NONEON-NOSVE-NEXT: ldrh w0, [sp, #12]
+; NONEON-NOSVE-NEXT: ldrh w2, [sp, #14]
+; NONEON-NOSVE-NEXT: stp x13, x16, [sp, #80]
+; NONEON-NOSVE-NEXT: sxth x13, w3
+; NONEON-NOSVE-NEXT: add w16, w5, w5
+; NONEON-NOSVE-NEXT: add w3, w4, w4
+; NONEON-NOSVE-NEXT: ldrh w17, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrh w18, [sp, #18]
+; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #64]
+; NONEON-NOSVE-NEXT: sxth x12, w16
+; NONEON-NOSVE-NEXT: sxth x13, w3
+; NONEON-NOSVE-NEXT: add w16, w2, w2
+; NONEON-NOSVE-NEXT: add w0, w0, w0
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #48]
+; NONEON-NOSVE-NEXT: sxth x12, w16
+; NONEON-NOSVE-NEXT: sxth x13, w0
+; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrh w11, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrh w14, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrh w15, [sp, #22]
+; NONEON-NOSVE-NEXT: add w16, w18, w18
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #32]
+; NONEON-NOSVE-NEXT: sxth x12, w16
+; NONEON-NOSVE-NEXT: sxth x13, w17
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: add w14, w14, w14
; NONEON-NOSVE-NEXT: add w11, w11, w11
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72]
; NONEON-NOSVE-NEXT: add w10, w10, w10
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #70]
-; NONEON-NOSVE-NEXT: add w14, w15, w15
-; NONEON-NOSVE-NEXT: strh w13, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #66]
-; NONEON-NOSVE-NEXT: strh w12, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w11, [sp, #62]
-; NONEON-NOSVE-NEXT: strh w10, [sp, #60]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #98]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #96]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #102]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #100]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #184]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #104]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: str d0, [sp, #360]
-; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT: str d2, [sp, #200]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #184]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #192]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #176]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #216]
-; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #224]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #320]
-; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #364]
-; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #360]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #336]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #200]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #320]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #208]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #304]
-; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #288]
+; NONEON-NOSVE-NEXT: add w9, w9, w9
+; NONEON-NOSVE-NEXT: add w8, w8, w8
+; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #144]
+; NONEON-NOSVE-NEXT: sxth x12, w15
+; NONEON-NOSVE-NEXT: sxth x13, w14
+; NONEON-NOSVE-NEXT: sxth x11, w11
+; NONEON-NOSVE-NEXT: sxth x10, w10
+; NONEON-NOSVE-NEXT: sxth x9, w9
+; NONEON-NOSVE-NEXT: sxth x8, w8
+; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #128]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
+; NONEON-NOSVE-NEXT: stp x10, x11, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #32]
+; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #368
+; NONEON-NOSVE-NEXT: add sp, sp, #160
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
@@ -2037,43 +1421,38 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: sext_v8i32_v8i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp]
-; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldp w9, w8, [sp]
; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16]
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #72]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #80]
-; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT: add w8, w8, w8
+; NONEON-NOSVE-NEXT: add w9, w9, w9
+; NONEON-NOSVE-NEXT: sxtw x8, w8
+; NONEON-NOSVE-NEXT: sxtw x9, w9
+; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8]
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48]
+; NONEON-NOSVE-NEXT: add w8, w15, w15
+; NONEON-NOSVE-NEXT: add w9, w14, w14
+; NONEON-NOSVE-NEXT: sxtw x8, w8
+; NONEON-NOSVE-NEXT: sxtw x9, w9
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: add w12, w12, w12
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: sxtw x13, w13
+; NONEON-NOSVE-NEXT: sxtw x12, w12
+; NONEON-NOSVE-NEXT: sxtw x11, w11
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80]
+; NONEON-NOSVE-NEXT: sxtw x8, w10
+; NONEON-NOSVE-NEXT: stp x12, x13, [sp, #32]
+; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = add <8 x i32> %a, %a
@@ -2162,14 +1541,14 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v32i8_v32i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #272
-; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272
+; NONEON-NOSVE-NEXT: sub sp, sp, #208
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
@@ -2186,182 +1565,146 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #21]
+; NONEON-NOSVE-NEXT: ldrb w28, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrb w29, [sp, #17]
+; NONEON-NOSVE-NEXT: ldrb w27, [sp, #31]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18]
+; NONEON-NOSVE-NEXT: add w5, w17, w17
+; NONEON-NOSVE-NEXT: ldrb w25, [sp, #29]
+; NONEON-NOSVE-NEXT: ldrb w26, [sp, #30]
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT: add w8, w29, w29
-; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT: add w8, w27, w27
-; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w8, w25, w25
+; NONEON-NOSVE-NEXT: and w5, w5, #0xff
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT: add w8, w23, w23
-; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT: add w8, w21, w21
-; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT: add w8, w19, w19
-; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT: add w9, w28, w28
-; NONEON-NOSVE-NEXT: add w18, w16, w16
-; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w8, w6, w6
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: strh w5, [sp, #74]
+; NONEON-NOSVE-NEXT: add w5, w29, w29
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
+; NONEON-NOSVE-NEXT: add w8, w28, w28
+; NONEON-NOSVE-NEXT: ldrb w24, [sp, #28]
+; NONEON-NOSVE-NEXT: and w5, w5, #0xff
+; NONEON-NOSVE-NEXT: strh w9, [sp, #68]
+; NONEON-NOSVE-NEXT: add w9, w27, w27
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w23, [sp, #27]
+; NONEON-NOSVE-NEXT: strh w5, [sp, #66]
+; NONEON-NOSVE-NEXT: add w5, w26, w26
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
+; NONEON-NOSVE-NEXT: add w8, w25, w25
+; NONEON-NOSVE-NEXT: ldrb w22, [sp, #26]
+; NONEON-NOSVE-NEXT: strh w9, [sp, #62]
+; NONEON-NOSVE-NEXT: and w9, w5, #0xff
+; NONEON-NOSVE-NEXT: add w5, w24, w24
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w21, [sp, #25]
+; NONEON-NOSVE-NEXT: strh w9, [sp, #60]
+; NONEON-NOSVE-NEXT: add w9, w23, w23
+; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: and w8, w5, #0xff
+; NONEON-NOSVE-NEXT: ldrb w20, [sp, #24]
+; NONEON-NOSVE-NEXT: add w5, w22, w22
+; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w19, [sp, #39]
+; NONEON-NOSVE-NEXT: add w9, w21, w21
+; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: and w8, w5, #0xff
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23]
+; NONEON-NOSVE-NEXT: ldrb w4, [sp, #38]
+; NONEON-NOSVE-NEXT: add w5, w20, w20
+; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #22]
+; NONEON-NOSVE-NEXT: ldrb w3, [sp, #37]
+; NONEON-NOSVE-NEXT: add w9, w19, w19
+; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: and w8, w5, #0xff
+; NONEON-NOSVE-NEXT: add w0, w16, w16
+; NONEON-NOSVE-NEXT: ldrb w2, [sp, #36]
+; NONEON-NOSVE-NEXT: add w4, w4, w4
+; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: add w18, w15, w15
+; NONEON-NOSVE-NEXT: and w6, w0, #0xff
+; NONEON-NOSVE-NEXT: ldrb w0, [sp, #35]
+; NONEON-NOSVE-NEXT: add w9, w3, w3
+; NONEON-NOSVE-NEXT: strh w8, [sp, #110]
+; NONEON-NOSVE-NEXT: and w8, w4, #0xff
+; NONEON-NOSVE-NEXT: and w7, w18, #0xff
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #34]
+; NONEON-NOSVE-NEXT: add w2, w2, w2
+; NONEON-NOSVE-NEXT: strh w8, [sp, #108]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #33]
+; NONEON-NOSVE-NEXT: add w9, w0, w0
+; NONEON-NOSVE-NEXT: strh w8, [sp, #106]
+; NONEON-NOSVE-NEXT: and w8, w2, #0xff
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
-; NONEON-NOSVE-NEXT: add w9, w26, w26
-; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT: add w8, w4, w4
-; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT: add w9, w24, w24
-; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: add w8, w2, w2
-; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT: add w17, w17, w17
-; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT: add w9, w22, w22
-; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT: add w8, w16, w16
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT: add w17, w30, w30
-; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT: add w9, w20, w20
-; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT: strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #61]
+; NONEON-NOSVE-NEXT: add w18, w18, w18
+; NONEON-NOSVE-NEXT: strh w8, [sp, #104]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
+; NONEON-NOSVE-NEXT: add w9, w17, w17
+; NONEON-NOSVE-NEXT: strh w8, [sp, #102]
+; NONEON-NOSVE-NEXT: and w8, w18, #0xff
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: strh w8, [sp, #100]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT: add w9, w7, w7
-; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT: add w9, w5, w5
-; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT: add w9, w3, w3
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT: add w9, w0, w0
-; NONEON-NOSVE-NEXT: strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #98]
+; NONEON-NOSVE-NEXT: and w8, w16, #0xff
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
+; NONEON-NOSVE-NEXT: add w14, w14, w14
+; NONEON-NOSVE-NEXT: strh w8, [sp, #96]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #94]
+; NONEON-NOSVE-NEXT: and w8, w14, #0xff
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
+; NONEON-NOSVE-NEXT: add w12, w12, w12
+; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #90]
+; NONEON-NOSVE-NEXT: and w8, w12, #0xff
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90]
+; NONEON-NOSVE-NEXT: ldrb w30, [sp, #20]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #86]
+; NONEON-NOSVE-NEXT: and w8, w10, #0xff
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: strh w6, [sp, #78]
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87]
-; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85]
-; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #144]
+; NONEON-NOSVE-NEXT: add w6, w30, w30
+; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: and w6, w6, #0xff
+; NONEON-NOSVE-NEXT: strh w8, [sp, #82]
+; NONEON-NOSVE-NEXT: and w8, w10, #0xff
+; NONEON-NOSVE-NEXT: strh w7, [sp, #76]
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: strh w6, [sp, #72]
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #272
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #208
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
@@ -2387,42 +1730,24 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v8i8_v8i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #18]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i8> %a to <8 x i32>
store <8 x i32>%b, ptr %out
@@ -2448,75 +1773,39 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v16i8_v16i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #27]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #25]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #31]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #29]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #94]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%b = zext <16 x i8> %a to <16 x i32>
store <16 x i32> %b, ptr %out
@@ -2555,14 +1844,14 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v32i8_v32i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #464
-; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #368] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #384] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #400] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #416] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #432] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #448] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 464
+; NONEON-NOSVE-NEXT: sub sp, sp, #272
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
@@ -2577,258 +1866,136 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45]
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #18]
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #19]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #17]
+; NONEON-NOSVE-NEXT: ldrb w30, [sp, #16]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT: add w18, w15, w15
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22]
+; NONEON-NOSVE-NEXT: add w0, w16, w16
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT: and w19, w18, #0xff
+; NONEON-NOSVE-NEXT: and w7, w0, #0xff
+; NONEON-NOSVE-NEXT: add w6, w17, w17
+; NONEON-NOSVE-NEXT: ldrb w28, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrb w29, [sp, #21]
+; NONEON-NOSVE-NEXT: stp w19, w7, [sp, #104]
+; NONEON-NOSVE-NEXT: add w7, w30, w30
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT: add w8, w29, w29
-; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT: add w8, w27, w27
-; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w8, w25, w25
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT: add w8, w23, w23
-; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT: add w8, w21, w21
-; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT: add w8, w19, w19
-; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT: add w9, w28, w28
-; NONEON-NOSVE-NEXT: add w18, w16, w16
-; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w8, w6, w6
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
-; NONEON-NOSVE-NEXT: add w9, w26, w26
-; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT: add w8, w4, w4
-; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
+; NONEON-NOSVE-NEXT: and w6, w6, #0xff
+; NONEON-NOSVE-NEXT: and w7, w7, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w26, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrb w27, [sp, #27]
+; NONEON-NOSVE-NEXT: stp w7, w6, [sp, #96]
+; NONEON-NOSVE-NEXT: add w6, w29, w29
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #88]
+; NONEON-NOSVE-NEXT: add w8, w28, w28
+; NONEON-NOSVE-NEXT: and w6, w6, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w24, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w25, [sp, #25]
+; NONEON-NOSVE-NEXT: add w9, w27, w27
+; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #80]
+; NONEON-NOSVE-NEXT: add w6, w26, w26
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w22, [sp, #30]
+; NONEON-NOSVE-NEXT: and w6, w6, #0xff
+; NONEON-NOSVE-NEXT: ldrb w23, [sp, #31]
+; NONEON-NOSVE-NEXT: add w8, w25, w25
+; NONEON-NOSVE-NEXT: stp w6, w9, [sp, #72]
; NONEON-NOSVE-NEXT: add w9, w24, w24
-; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: add w8, w2, w2
-; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT: add w17, w17, w17
-; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT: add w9, w22, w22
-; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT: add w8, w16, w16
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT: add w17, w30, w30
-; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w20, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w21, [sp, #29]
+; NONEON-NOSVE-NEXT: add w6, w23, w23
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #64]
+; NONEON-NOSVE-NEXT: add w8, w22, w22
+; NONEON-NOSVE-NEXT: and w9, w6, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w4, [sp, #34]
+; NONEON-NOSVE-NEXT: ldrb w5, [sp, #35]
+; NONEON-NOSVE-NEXT: add w6, w21, w21
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
; NONEON-NOSVE-NEXT: add w9, w20, w20
-; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
+; NONEON-NOSVE-NEXT: and w8, w6, #0xff
+; NONEON-NOSVE-NEXT: ldrb w2, [sp, #32]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w3, [sp, #33]
+; NONEON-NOSVE-NEXT: add w5, w5, w5
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #48]
+; NONEON-NOSVE-NEXT: add w8, w4, w4
+; NONEON-NOSVE-NEXT: and w9, w5, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #38]
+; NONEON-NOSVE-NEXT: ldrb w0, [sp, #39]
+; NONEON-NOSVE-NEXT: add w3, w3, w3
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168]
+; NONEON-NOSVE-NEXT: add w9, w2, w2
+; NONEON-NOSVE-NEXT: and w8, w3, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #36]
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #37]
+; NONEON-NOSVE-NEXT: add w0, w0, w0
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #160]
+; NONEON-NOSVE-NEXT: add w8, w18, w18
+; NONEON-NOSVE-NEXT: and w9, w0, #0xff
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #42]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #43]
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
+; NONEON-NOSVE-NEXT: add w9, w16, w16
+; NONEON-NOSVE-NEXT: and w8, w17, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w13, [sp, #41]
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #144]
; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT: strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT: add w9, w7, w7
-; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
+; NONEON-NOSVE-NEXT: and w9, w15, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46]
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #47]
+; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
+; NONEON-NOSVE-NEXT: add w9, w12, w12
+; NONEON-NOSVE-NEXT: and w8, w13, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #128]
; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT: add w9, w5, w5
-; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT: add w9, w3, w3
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT: add w9, w0, w0
-; NONEON-NOSVE-NEXT: strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94]
+; NONEON-NOSVE-NEXT: and w9, w11, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80]
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #448] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #432] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83]
-; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #416] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81]
-; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #400] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #384] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87]
-; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #368] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #198]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT: str w8, [sp, #284]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #196]
-; NONEON-NOSVE-NEXT: str w8, [sp, #280]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #194]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #276]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #192]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #206]
-; NONEON-NOSVE-NEXT: str w8, [sp, #300]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #204]
-; NONEON-NOSVE-NEXT: str w8, [sp, #296]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #202]
-; NONEON-NOSVE-NEXT: str w8, [sp, #292]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #200]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #180]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #272]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #176]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #190]
-; NONEON-NOSVE-NEXT: str w8, [sp, #268]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #188]
-; NONEON-NOSVE-NEXT: str w8, [sp, #264]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #186]
-; NONEON-NOSVE-NEXT: str w8, [sp, #260]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #184]
-; NONEON-NOSVE-NEXT: str w8, [sp, #256]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #230]
-; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #240]
-; NONEON-NOSVE-NEXT: str w8, [sp, #348]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #228]
-; NONEON-NOSVE-NEXT: str w8, [sp, #344]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #226]
-; NONEON-NOSVE-NEXT: str w8, [sp, #340]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #224]
-; NONEON-NOSVE-NEXT: str w8, [sp, #336]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #238]
-; NONEON-NOSVE-NEXT: str w8, [sp, #364]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #236]
-; NONEON-NOSVE-NEXT: str w8, [sp, #360]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #234]
-; NONEON-NOSVE-NEXT: str w8, [sp, #356]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #232]
-; NONEON-NOSVE-NEXT: str w8, [sp, #352]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #214]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #336]
-; NONEON-NOSVE-NEXT: str w8, [sp, #316]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #212]
-; NONEON-NOSVE-NEXT: str w8, [sp, #312]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #210]
-; NONEON-NOSVE-NEXT: str w8, [sp, #308]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #222]
-; NONEON-NOSVE-NEXT: str w8, [sp, #332]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #220]
-; NONEON-NOSVE-NEXT: str w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #218]
-; NONEON-NOSVE-NEXT: str w8, [sp, #324]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #216]
-; NONEON-NOSVE-NEXT: str w8, [sp, #320]
-; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #304]
+; NONEON-NOSVE-NEXT: and w8, w10, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #144]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #112]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #464
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #272
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
@@ -2858,26 +2025,20 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v4i8_v4i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%b = zext <4 x i8> %a to <4 x i64>
store <4 x i64>%b, ptr %out
@@ -2904,61 +2065,30 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v8i8_v8i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #176
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 176
+; NONEON-NOSVE-NEXT: sub sp, sp, #80
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: add x8, sp, #144
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #22]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #34]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #38]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #96]
-; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56]
-; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #104]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #168]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #160]
-; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #136]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp q2, q3, [x8]
-; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #176
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #80
; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i8> %a to <8 x i64>
store <8 x i64>%b, ptr %out
@@ -2998,129 +2128,51 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v16i8_v16i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #368
-; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT: str q0, [sp]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #332]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #324]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #348]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #340]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #300]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #292]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #316]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #308]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #268]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #260]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #284]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #276]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #152]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #98]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #136]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #120]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #102]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #94]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: str d0, [sp, #360]
-; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216]
-; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #192]
-; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #104]
-; NONEON-NOSVE-NEXT: str w8, [sp, #320]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #364]
-; NONEON-NOSVE-NEXT: str w9, [sp, #328]
-; NONEON-NOSVE-NEXT: str w8, [sp, #344]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #360]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176]
-; NONEON-NOSVE-NEXT: str w8, [sp, #336]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200]
-; NONEON-NOSVE-NEXT: str d2, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #320]
-; NONEON-NOSVE-NEXT: str w9, [sp, #296]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208]
-; NONEON-NOSVE-NEXT: str w9, [sp, #312]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #288]
-; NONEON-NOSVE-NEXT: str w9, [sp, #264]
-; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #252]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #192]
-; NONEON-NOSVE-NEXT: str w9, [sp, #280]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #256]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244]
-; NONEON-NOSVE-NEXT: str w8, [sp, #240]
-; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #224]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #104]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #96]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64]
; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #368
+; NONEON-NOSVE-NEXT: add sp, sp, #160
; NONEON-NOSVE-NEXT: ret
%b = zext <16 x i8> %a to <16 x i64>
store <16 x i64> %b, ptr %out
@@ -3192,14 +2244,14 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT: sub sp, sp, #752
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 848
+; NONEON-NOSVE-NEXT: sub sp, sp, #400
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #320] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #336] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #352] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #368] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #384] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 400
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
@@ -3213,379 +2265,168 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #572]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #564]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #588]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #172]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #292]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #300]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47]
+; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
+; NONEON-NOSVE-NEXT: ldrb w29, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrb w27, [sp, #22]
+; NONEON-NOSVE-NEXT: ldrb w28, [sp, #23]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT: ldrb w26, [sp, #25]
+; NONEON-NOSVE-NEXT: ldrb w25, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrb w24, [sp, #26]
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT: add w8, w29, w29
-; NONEON-NOSVE-NEXT: strb w8, [sp, #58]
+; NONEON-NOSVE-NEXT: ldrb w21, [sp, #27]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: ldrb w19, [sp, #29]
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #140]
+; NONEON-NOSVE-NEXT: add w8, w30, w30
+; NONEON-NOSVE-NEXT: ldrb w20, [sp, #28]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #148]
+; NONEON-NOSVE-NEXT: add w9, w29, w29
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #132]
; NONEON-NOSVE-NEXT: add w8, w27, w27
-; NONEON-NOSVE-NEXT: add w18, w16, w16
-; NONEON-NOSVE-NEXT: strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w8, w25, w25
-; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT: add w8, w23, w23
-; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT: add w8, w21, w21
-; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT: add w8, w19, w19
-; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #59]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #124]
; NONEON-NOSVE-NEXT: add w9, w28, w28
-; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w8, w6, w6
-; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #57]
-; NONEON-NOSVE-NEXT: add w9, w26, w26
-; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT: add w8, w4, w4
-; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT: add w17, w17, w17
-; NONEON-NOSVE-NEXT: strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT: add w9, w24, w24
-; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT: add w8, w2, w2
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT: add w9, w22, w22
-; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT: add w8, w16, w16
-; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT: add w17, w30, w30
-; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #51]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #108]
+; NONEON-NOSVE-NEXT: add w8, w26, w26
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #116]
+; NONEON-NOSVE-NEXT: add w9, w25, w25
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #100]
+; NONEON-NOSVE-NEXT: add w8, w24, w24
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #92]
+; NONEON-NOSVE-NEXT: add w9, w21, w21
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w7, [sp, #30]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #76]
+; NONEON-NOSVE-NEXT: add w8, w19, w19
+; NONEON-NOSVE-NEXT: ldrb w6, [sp, #31]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #84]
; NONEON-NOSVE-NEXT: add w9, w20, w20
-; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT: strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT: strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT: add w9, w7, w7
-; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w4, [sp, #33]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #68]
+; NONEON-NOSVE-NEXT: add w8, w7, w7
+; NONEON-NOSVE-NEXT: ldrb w5, [sp, #32]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #60]
+; NONEON-NOSVE-NEXT: add w9, w6, w6
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #17]
+; NONEON-NOSVE-NEXT: ldrb w3, [sp, #34]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: str w8, [sp, #48]
+; NONEON-NOSVE-NEXT: add w8, w4, w4
+; NONEON-NOSVE-NEXT: ldrb w2, [sp, #35]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #52]
; NONEON-NOSVE-NEXT: add w9, w5, w5
-; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT: add w9, w3, w3
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: add w0, w16, w16
+; NONEON-NOSVE-NEXT: add w22, w18, w18
+; NONEON-NOSVE-NEXT: ldrb w18, [sp, #37]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: str w8, [sp, #296]
+; NONEON-NOSVE-NEXT: add w8, w3, w3
+; NONEON-NOSVE-NEXT: and w23, w0, #0xff
+; NONEON-NOSVE-NEXT: ldrb w0, [sp, #36]
+; NONEON-NOSVE-NEXT: str w9, [sp, #288]
+; NONEON-NOSVE-NEXT: add w9, w2, w2
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w17, [sp, #38]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: str w8, [sp, #272]
+; NONEON-NOSVE-NEXT: add w8, w18, w18
+; NONEON-NOSVE-NEXT: ldrb w16, [sp, #39]
+; NONEON-NOSVE-NEXT: str w9, [sp, #280]
; NONEON-NOSVE-NEXT: add w9, w0, w0
-; NONEON-NOSVE-NEXT: strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w15, [sp, #41]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: str w8, [sp, #264]
+; NONEON-NOSVE-NEXT: add w8, w17, w17
+; NONEON-NOSVE-NEXT: ldrb w14, [sp, #40]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #252]
+; NONEON-NOSVE-NEXT: add w9, w16, w16
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w12, [sp, #42]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #236]
+; NONEON-NOSVE-NEXT: add w8, w15, w15
+; NONEON-NOSVE-NEXT: ldrb w13, [sp, #43]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244]
+; NONEON-NOSVE-NEXT: add w9, w14, w14
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #45]
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #228]
+; NONEON-NOSVE-NEXT: add w8, w12, w12
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #44]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #220]
; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT: strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #204]
+; NONEON-NOSVE-NEXT: add w8, w11, w11
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #212]
+; NONEON-NOSVE-NEXT: add w9, w10, w10
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #196]
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #188]
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: and w22, w22, #0xff
+; NONEON-NOSVE-NEXT: add w8, w8, w8
+; NONEON-NOSVE-NEXT: stp wzr, w22, [sp, #164]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: str wzr, [sp, #580]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93]
-; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #604]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #596]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #620]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #612]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #508]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #500]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #524]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #516]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #540]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #532]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #556]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #548]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #700]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #692]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #716]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #708]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #732]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #724]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #748]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #740]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #636]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #194]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #628]
-; NONEON-NOSVE-NEXT: str w8, [sp, #276]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #192]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #652]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #198]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #284]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #196]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #644]
-; NONEON-NOSVE-NEXT: str w8, [sp, #280]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #202]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #272]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #668]
-; NONEON-NOSVE-NEXT: str w8, [sp, #292]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #200]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #660]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #206]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #400]
-; NONEON-NOSVE-NEXT: str w8, [sp, #300]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #204]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #684]
-; NONEON-NOSVE-NEXT: str w8, [sp, #296]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #176]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #288]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #676]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #180]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #186]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #416]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #240]
-; NONEON-NOSVE-NEXT: str w8, [sp, #260]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #184]
-; NONEON-NOSVE-NEXT: str w8, [sp, #256]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #190]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #368]
-; NONEON-NOSVE-NEXT: str w8, [sp, #268]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #188]
-; NONEON-NOSVE-NEXT: str w8, [sp, #264]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #226]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #256]
-; NONEON-NOSVE-NEXT: str w8, [sp, #340]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #224]
-; NONEON-NOSVE-NEXT: str w8, [sp, #336]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #230]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #384]
-; NONEON-NOSVE-NEXT: str w8, [sp, #348]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #228]
-; NONEON-NOSVE-NEXT: str w8, [sp, #344]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #234]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #336]
-; NONEON-NOSVE-NEXT: str w8, [sp, #356]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #232]
-; NONEON-NOSVE-NEXT: str w8, [sp, #352]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #238]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #464]
-; NONEON-NOSVE-NEXT: str w8, [sp, #364]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #236]
-; NONEON-NOSVE-NEXT: str w8, [sp, #360]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #210]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #352]
-; NONEON-NOSVE-NEXT: str w8, [sp, #308]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #208]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #214]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #480]
-; NONEON-NOSVE-NEXT: str w8, [sp, #316]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #212]
-; NONEON-NOSVE-NEXT: str w8, [sp, #312]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #218]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #304]
-; NONEON-NOSVE-NEXT: str w8, [sp, #324]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #216]
-; NONEON-NOSVE-NEXT: str w8, [sp, #320]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #222]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #432]
-; NONEON-NOSVE-NEXT: str w8, [sp, #332]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #220]
-; NONEON-NOSVE-NEXT: str w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #404]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #320]
-; NONEON-NOSVE-NEXT: str w8, [sp, #568]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #400]
-; NONEON-NOSVE-NEXT: str w8, [sp, #560]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #412]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #448]
-; NONEON-NOSVE-NEXT: str w8, [sp, #584]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #408]
-; NONEON-NOSVE-NEXT: str w8, [sp, #576]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #420]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #560]
-; NONEON-NOSVE-NEXT: str w8, [sp, #600]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #416]
-; NONEON-NOSVE-NEXT: str w8, [sp, #592]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #428]
-; NONEON-NOSVE-NEXT: str w8, [sp, #616]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #424]
-; NONEON-NOSVE-NEXT: str w8, [sp, #608]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #372]
-; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #592]
-; NONEON-NOSVE-NEXT: str w8, [sp, #504]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #368]
-; NONEON-NOSVE-NEXT: str w8, [sp, #496]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #380]
-; NONEON-NOSVE-NEXT: str w8, [sp, #520]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #376]
-; NONEON-NOSVE-NEXT: str w8, [sp, #512]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #388]
-; NONEON-NOSVE-NEXT: ldp q4, q5, [sp, #496]
-; NONEON-NOSVE-NEXT: str w8, [sp, #536]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #384]
-; NONEON-NOSVE-NEXT: str w8, [sp, #528]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #396]
-; NONEON-NOSVE-NEXT: str w8, [sp, #552]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #392]
-; NONEON-NOSVE-NEXT: str w8, [sp, #544]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #468]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #528]
-; NONEON-NOSVE-NEXT: str w8, [sp, #696]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #464]
-; NONEON-NOSVE-NEXT: str w8, [sp, #688]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #476]
-; NONEON-NOSVE-NEXT: str w8, [sp, #712]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #472]
-; NONEON-NOSVE-NEXT: str w8, [sp, #704]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #484]
-; NONEON-NOSVE-NEXT: ldp q16, q17, [sp, #688]
-; NONEON-NOSVE-NEXT: str w8, [sp, #728]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #480]
-; NONEON-NOSVE-NEXT: str w8, [sp, #720]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #492]
-; NONEON-NOSVE-NEXT: str w8, [sp, #744]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #488]
-; NONEON-NOSVE-NEXT: str w8, [sp, #736]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #436]
-; NONEON-NOSVE-NEXT: ldp q19, q20, [sp, #720]
-; NONEON-NOSVE-NEXT: str w8, [sp, #632]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #432]
-; NONEON-NOSVE-NEXT: str w8, [sp, #624]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #444]
-; NONEON-NOSVE-NEXT: str w8, [sp, #648]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #440]
-; NONEON-NOSVE-NEXT: str w8, [sp, #640]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #452]
-; NONEON-NOSVE-NEXT: ldp q22, q23, [sp, #624]
-; NONEON-NOSVE-NEXT: str w8, [sp, #664]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #448]
-; NONEON-NOSVE-NEXT: str w8, [sp, #656]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #460]
-; NONEON-NOSVE-NEXT: str w8, [sp, #680]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #456]
-; NONEON-NOSVE-NEXT: str w8, [sp, #672]
-; NONEON-NOSVE-NEXT: ldp q21, q18, [sp, #656]
+; NONEON-NOSVE-NEXT: stp wzr, w23, [sp, #156]
+; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #180]
+; NONEON-NOSVE-NEXT: and w8, w9, #0xff
+; NONEON-NOSVE-NEXT: str wzr, [sp, #276]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #284]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #260]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #80]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #268]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #48]
+; NONEON-NOSVE-NEXT: str w8, [sp, #176]
+; NONEON-NOSVE-NEXT: ldp q17, q16, [sp, #272]
+; NONEON-NOSVE-NEXT: ldp q18, q21, [sp, #176]
+; NONEON-NOSVE-NEXT: ldp q20, q19, [sp, #240]
+; NONEON-NOSVE-NEXT: ldp q23, q22, [sp, #208]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #384] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32]
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #368] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64]
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #352] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96]
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #336] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128]
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #320] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160]
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192]
; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224]
-; NONEON-NOSVE-NEXT: add sp, sp, #752
-; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #400
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
@@ -3653,91 +2494,70 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v16i16_v16i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT: ldrh w5, [sp]
-; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14]
-; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: ldrh w17, [sp, #4]
+; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2]
+; NONEON-NOSVE-NEXT: ldrh w3, [sp]
+; NONEON-NOSVE-NEXT: ldrh w4, [sp, #12]
+; NONEON-NOSVE-NEXT: ldrh w5, [sp, #14]
; NONEON-NOSVE-NEXT: add w14, w14, w14
-; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #46]
+; NONEON-NOSVE-NEXT: add w17, w17, w17
+; NONEON-NOSVE-NEXT: add w16, w16, w16
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: and w17, w17, #0xffff
+; NONEON-NOSVE-NEXT: and w16, w16, #0xffff
+; NONEON-NOSVE-NEXT: stp w17, w14, [sp, #56]
; NONEON-NOSVE-NEXT: add w14, w3, w3
-; NONEON-NOSVE-NEXT: strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT: add w13, w5, w5
+; NONEON-NOSVE-NEXT: ldrh w0, [sp, #8]
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: ldrh w2, [sp, #10]
+; NONEON-NOSVE-NEXT: add w17, w5, w5
+; NONEON-NOSVE-NEXT: stp w14, w16, [sp, #48]
+; NONEON-NOSVE-NEXT: add w16, w4, w4
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT: add w14, w4, w4
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT: add w13, w2, w2
-; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #38]
-; NONEON-NOSVE-NEXT: add w14, w0, w0
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strh w13, [sp, #36]
-; NONEON-NOSVE-NEXT: add w13, w18, w18
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: strh w14, [sp, #34]
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28]
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #32]
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #50]
-; NONEON-NOSVE-NEXT: add w14, w17, w17
+; NONEON-NOSVE-NEXT: ldrh w13, [sp, #18]
+; NONEON-NOSVE-NEXT: ldrh w15, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrh w18, [sp, #22]
+; NONEON-NOSVE-NEXT: and w14, w17, #0xffff
+; NONEON-NOSVE-NEXT: and w16, w16, #0xffff
+; NONEON-NOSVE-NEXT: add w17, w2, w2
+; NONEON-NOSVE-NEXT: stp w16, w14, [sp, #40]
+; NONEON-NOSVE-NEXT: add w14, w0, w0
+; NONEON-NOSVE-NEXT: and w16, w17, #0xffff
+; NONEON-NOSVE-NEXT: add w17, w18, w18
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: add w13, w13, w13
; NONEON-NOSVE-NEXT: add w12, w12, w12
-; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: add w13, w16, w16
; NONEON-NOSVE-NEXT: add w11, w11, w11
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
; NONEON-NOSVE-NEXT: add w10, w10, w10
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #62]
-; NONEON-NOSVE-NEXT: add w14, w15, w15
-; NONEON-NOSVE-NEXT: strh w13, [sp, #60]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #58]
-; NONEON-NOSVE-NEXT: strh w12, [sp, #56]
-; NONEON-NOSVE-NEXT: strh w11, [sp, #54]
-; NONEON-NOSVE-NEXT: strh w10, [sp, #52]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT: add w9, w9, w9
+; NONEON-NOSVE-NEXT: add w8, w8, w8
+; NONEON-NOSVE-NEXT: stp w14, w16, [sp, #32]
+; NONEON-NOSVE-NEXT: and w14, w17, #0xffff
+; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT: and w13, w13, #0xffff
+; NONEON-NOSVE-NEXT: and w12, w12, #0xffff
+; NONEON-NOSVE-NEXT: and w11, w11, #0xffff
+; NONEON-NOSVE-NEXT: and w10, w10, #0xffff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xffff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xffff
+; NONEON-NOSVE-NEXT: stp w15, w14, [sp, #88]
+; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
@@ -3763,26 +2583,20 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v4i16_v4i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%b = zext <4 x i16> %a to <4 x i64>
store <4 x i64>%b, ptr %out
@@ -3808,43 +2622,31 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v8i16_v8i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #136]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #104]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i16> %a to <8 x i64>
store <8 x i64>%b, ptr %out
@@ -3883,144 +2685,85 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v16i16_v16i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #368
-; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT: sub sp, sp, #160
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #268]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #260]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload
-; NONEON-NOSVE-NEXT: str wzr, [sp, #284]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #92]
+; NONEON-NOSVE-NEXT: str wzr, [sp, #156]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4]
-; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT: ldrh w5, [sp]
-; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14]
-; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: ldrh w14, [sp]
+; NONEON-NOSVE-NEXT: ldrh w15, [sp, #2]
+; NONEON-NOSVE-NEXT: ldrh w4, [sp, #4]
+; NONEON-NOSVE-NEXT: ldrh w5, [sp, #6]
+; NONEON-NOSVE-NEXT: ldrh w2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldrh w3, [sp, #10]
; NONEON-NOSVE-NEXT: add w14, w14, w14
-; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #54]
-; NONEON-NOSVE-NEXT: add w14, w3, w3
-; NONEON-NOSVE-NEXT: strh w13, [sp, #52]
-; NONEON-NOSVE-NEXT: add w13, w5, w5
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #50]
-; NONEON-NOSVE-NEXT: add w14, w4, w4
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #48]
-; NONEON-NOSVE-NEXT: add w13, w2, w2
-; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #46]
+; NONEON-NOSVE-NEXT: add w15, w15, w15
+; NONEON-NOSVE-NEXT: ldrh w18, [sp, #12]
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT: ldrh w0, [sp, #14]
+; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #84]
+; NONEON-NOSVE-NEXT: add w15, w4, w4
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #76]
+; NONEON-NOSVE-NEXT: add w14, w5, w5
+; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #60]
+; NONEON-NOSVE-NEXT: add w15, w3, w3
+; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #68]
+; NONEON-NOSVE-NEXT: add w14, w2, w2
+; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24]
+; NONEON-NOSVE-NEXT: ldrh w11, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrh w13, [sp, #20]
+; NONEON-NOSVE-NEXT: ldrh w12, [sp, #22]
+; NONEON-NOSVE-NEXT: ldrh w16, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrh w17, [sp, #18]
+; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #52]
+; NONEON-NOSVE-NEXT: add w15, w18, w18
+; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #44]
; NONEON-NOSVE-NEXT: add w14, w0, w0
-; NONEON-NOSVE-NEXT: add w9, w9, w9
-; NONEON-NOSVE-NEXT: strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT: add w13, w18, w18
-; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28]
-; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT: add w14, w17, w17
-; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT: strh w9, [sp, #58]
+; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: add w13, w13, w13
+; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #36]
+; NONEON-NOSVE-NEXT: add w14, w16, w16
; NONEON-NOSVE-NEXT: add w12, w12, w12
-; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: str w15, [sp, #32]
+; NONEON-NOSVE-NEXT: add w15, w17, w17
; NONEON-NOSVE-NEXT: add w10, w10, w10
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: add w13, w16, w16
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #82]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #70]
-; NONEON-NOSVE-NEXT: add w14, w15, w15
-; NONEON-NOSVE-NEXT: strh w13, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84]
-; NONEON-NOSVE-NEXT: strh w14, [sp, #66]
-; NONEON-NOSVE-NEXT: strh w12, [sp, #64]
-; NONEON-NOSVE-NEXT: strh w11, [sp, #62]
-; NONEON-NOSVE-NEXT: strh w10, [sp, #60]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #74]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #276]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #332]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #78]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #98]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #96]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #324]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #102]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #100]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #184]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #104]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #90]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #348]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #94]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92]
-; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT: str d0, [sp, #360]
-; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #340]
-; NONEON-NOSVE-NEXT: str w9, [sp, #264]
-; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #252]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #192]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: str d2, [sp, #200]
-; NONEON-NOSVE-NEXT: str w9, [sp, #280]
-; NONEON-NOSVE-NEXT: str w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #300]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #292]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #316]
-; NONEON-NOSVE-NEXT: str wzr, [sp, #308]
-; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244]
-; NONEON-NOSVE-NEXT: str w8, [sp, #240]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216]
-; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #224]
-; NONEON-NOSVE-NEXT: str w8, [sp, #320]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #364]
-; NONEON-NOSVE-NEXT: str w9, [sp, #328]
-; NONEON-NOSVE-NEXT: str w8, [sp, #344]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #360]
-; NONEON-NOSVE-NEXT: str w8, [sp, #336]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200]
-; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #320]
-; NONEON-NOSVE-NEXT: str w9, [sp, #296]
-; NONEON-NOSVE-NEXT: str w8, [sp, #288]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208]
-; NONEON-NOSVE-NEXT: str w9, [sp, #312]
-; NONEON-NOSVE-NEXT: str w8, [sp, #304]
-; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #288]
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: add w9, w9, w9
+; NONEON-NOSVE-NEXT: add w8, w8, w8
+; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT: and w13, w13, #0xffff
+; NONEON-NOSVE-NEXT: and w12, w12, #0xffff
+; NONEON-NOSVE-NEXT: and w10, w10, #0xffff
+; NONEON-NOSVE-NEXT: and w11, w11, #0xffff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xffff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xffff
+; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #148]
+; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #140]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
+; NONEON-NOSVE-NEXT: stp wzr, w12, [sp, #132]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #32]
+; NONEON-NOSVE-NEXT: stp wzr, w13, [sp, #124]
+; NONEON-NOSVE-NEXT: stp wzr, w11, [sp, #116]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128]
+; NONEON-NOSVE-NEXT: stp wzr, w10, [sp, #108]
+; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #100]
+; NONEON-NOSVE-NEXT: str w8, [sp, #96]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #368
+; NONEON-NOSVE-NEXT: add sp, sp, #160
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
@@ -4082,47 +2825,34 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) {
;
; NONEON-NOSVE-LABEL: zext_v8i32_v8i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp]
-; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8]
-; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldp w10, w11, [sp]
+; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8]
+; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #16]
+; NONEON-NOSVE-NEXT: add w10, w10, w10
+; NONEON-NOSVE-NEXT: add w11, w11, w11
+; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT: add w10, w15, w15
+; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #40]
+; NONEON-NOSVE-NEXT: add w10, w14, w14
+; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT: add w10, w13, w13
; NONEON-NOSVE-NEXT: add w9, w9, w9
+; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #88]
+; NONEON-NOSVE-NEXT: add w10, w12, w12
; NONEON-NOSVE-NEXT: add w8, w8, w8
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: add w9, w13, w13
-; NONEON-NOSVE-NEXT: add w8, w12, w12
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: add w9, w15, w15
-; NONEON-NOSVE-NEXT: add w8, w14, w14
-; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: add w9, w11, w11
-; NONEON-NOSVE-NEXT: add w8, w10, w10
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #104]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #152]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #136]
-; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT: stp w11, wzr, [sp, #56]
+; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = add <8 x i32> %a, %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index 46a2ce6ed7109..7df362826d052 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -452,29 +452,23 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr d0, [x0]
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i16>, ptr %a
%res = uitofp <4 x i16> %op1 to <4 x double>
@@ -506,49 +500,36 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: str q0, [sp]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x double>
@@ -602,92 +583,63 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #336
-; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT: sub sp, sp, #192
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: str d1, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT: str d0, [sp, #168]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #332]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
; NONEON-NOSVE-NEXT: ucvtf d1, w8
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192]
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT: ucvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
+; NONEON-NOSVE-NEXT: ucvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52]
; NONEON-NOSVE-NEXT: ucvtf d0, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128]
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #336
+; NONEON-NOSVE-NEXT: add sp, sp, #192
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x double>
@@ -1891,29 +1843,23 @@ define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #48
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr d0, [x0]
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i16>, ptr %a
%res = sitofp <4 x i16> %op1 to <4 x double>
@@ -1945,49 +1891,36 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #160
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: str q0, [sp]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26]
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT: add sp, sp, #160
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x double>
@@ -2041,92 +1974,63 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) {
;
; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #336
-; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT: sub sp, sp, #192
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #50]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #54]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42]
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #42]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #58]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #62]
-; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60]
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT: str d1, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT: str d0, [sp, #168]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #46]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #34]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #32]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #38]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #36]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #332]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192]
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
; NONEON-NOSVE-NEXT: scvtf d1, w8
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #328]
-; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192]
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56]
+; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288]
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT: scvtf d1, w9
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
+; NONEON-NOSVE-NEXT: scvtf d1, w8
+; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52]
; NONEON-NOSVE-NEXT: scvtf d0, w8
-; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256]
-; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256]
+; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
+; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128]
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT: add sp, sp, #336
+; NONEON-NOSVE-NEXT: add sp, sp, #192
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x double>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index e8c9704940c70..e6c6003ee6c69 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -28,23 +28,17 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind {
;
; NONEON-NOSVE-LABEL: alloc_v4i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #48
-; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: sub sp, sp, #32
+; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: mov x19, x0
-; NONEON-NOSVE-NEXT: add x0, sp, #28
+; NONEON-NOSVE-NEXT: add x0, sp, #12
; NONEON-NOSVE-NEXT: bl def
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #12]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
-; NONEON-NOSVE-NEXT: str d0, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16]
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [x19, #1]
; NONEON-NOSVE-NEXT: strb w9, [x19]
-; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%alloc = alloca [4 x i8]
call void @def(ptr %alloc)
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 75c5bee2ae0ab..68a9dff812329 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1245,54 +1245,48 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr x9, [x0, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
-; CHECK-NEXT: and w11, w9, #0xf
-; CHECK-NEXT: ubfx w10, w9, #4, #4
-; CHECK-NEXT: fmov s1, w11
-; CHECK-NEXT: mov.b v1[1], w10
-; CHECK-NEXT: ubfx w10, w9, #8, #4
-; CHECK-NEXT: mov.b v1[2], w10
-; CHECK-NEXT: ubfx w10, w9, #12, #4
-; CHECK-NEXT: mov.b v1[3], w10
-; CHECK-NEXT: ubfx w10, w9, #16, #4
-; CHECK-NEXT: mov.b v1[4], w10
-; CHECK-NEXT: ubfx w10, w9, #20, #4
-; CHECK-NEXT: mov.b v1[5], w10
-; CHECK-NEXT: ubfx w10, w9, #24, #4
-; CHECK-NEXT: mov.b v1[6], w10
-; CHECK-NEXT: lsr w10, w9, #28
-; CHECK-NEXT: mov.b v1[7], w10
-; CHECK-NEXT: ubfx x10, x9, #32, #4
-; CHECK-NEXT: mov.b v1[8], w10
-; CHECK-NEXT: ubfx x10, x9, #36, #4
-; CHECK-NEXT: mov.b v1[9], w10
-; CHECK-NEXT: ubfx x10, x9, #40, #4
-; CHECK-NEXT: mov.b v1[10], w10
-; CHECK-NEXT: ubfx x10, x9, #44, #4
-; CHECK-NEXT: mov.b v1[11], w10
-; CHECK-NEXT: ubfx x10, x9, #48, #4
-; CHECK-NEXT: mov.b v1[12], w10
+; CHECK-NEXT: ubfx x12, x9, #48, #4
; CHECK-NEXT: ubfx x10, x9, #52, #4
-; CHECK-NEXT: mov.b v1[13], w10
-; CHECK-NEXT: ubfx x10, x9, #56, #4
-; CHECK-NEXT: lsr x9, x9, #60
-; CHECK-NEXT: mov.b v1[14], w10
-; CHECK-NEXT: mov.b v1[15], w9
-; CHECK-NEXT: ext.16b v2, v1, v1, #8
-; CHECK-NEXT: zip2.8b v3, v1, v0
-; CHECK-NEXT: zip1.8b v1, v1, v0
-; CHECK-NEXT: zip2.8b v4, v2, v0
-; CHECK-NEXT: zip1.8b v2, v2, v0
-; CHECK-NEXT: ushll.4s v3, v3, #0
+; CHECK-NEXT: ubfx x14, x9, #32, #4
+; CHECK-NEXT: ubfx w15, w9, #16, #4
+; CHECK-NEXT: ubfx x11, x9, #36, #4
+; CHECK-NEXT: ubfx w13, w9, #20, #4
+; CHECK-NEXT: fmov s1, w12
+; CHECK-NEXT: fmov s2, w14
+; CHECK-NEXT: ubfx w12, w9, #4, #4
+; CHECK-NEXT: fmov s3, w15
+; CHECK-NEXT: mov.h v1[1], w10
+; CHECK-NEXT: and w10, w9, #0xf
+; CHECK-NEXT: mov.h v2[1], w11
+; CHECK-NEXT: fmov s4, w10
+; CHECK-NEXT: ubfx x11, x9, #56, #4
+; CHECK-NEXT: mov.h v3[1], w13
+; CHECK-NEXT: ubfx x10, x9, #40, #4
+; CHECK-NEXT: mov.h v4[1], w12
+; CHECK-NEXT: ubfx w12, w9, #24, #4
+; CHECK-NEXT: mov.h v1[2], w11
+; CHECK-NEXT: ubfx w11, w9, #8, #4
+; CHECK-NEXT: mov.h v2[2], w10
+; CHECK-NEXT: lsr x10, x9, #60
+; CHECK-NEXT: mov.h v3[2], w12
+; CHECK-NEXT: ubfx x12, x9, #44, #4
+; CHECK-NEXT: mov.h v4[2], w11
+; CHECK-NEXT: lsr w11, w9, #28
+; CHECK-NEXT: ubfx w9, w9, #12, #4
+; CHECK-NEXT: mov.h v1[3], w10
+; CHECK-NEXT: mov.h v2[3], w12
+; CHECK-NEXT: mov.h v3[3], w11
+; CHECK-NEXT: mov.h v4[3], w9
; CHECK-NEXT: ushll.4s v1, v1, #0
-; CHECK-NEXT: and.16b v3, v3, v0
-; CHECK-NEXT: ushll.4s v4, v4, #0
; CHECK-NEXT: ushll.4s v2, v2, #0
+; CHECK-NEXT: ushll.4s v3, v3, #0
+; CHECK-NEXT: ushll.4s v4, v4, #0
; CHECK-NEXT: and.16b v1, v1, v0
-; CHECK-NEXT: and.16b v4, v4, v0
; CHECK-NEXT: and.16b v2, v2, v0
-; CHECK-NEXT: stp q1, q3, [x1]
-; CHECK-NEXT: stp q2, q4, [x1, #32]
-; CHECK-NEXT: add x1, x1, #64
+; CHECK-NEXT: and.16b v3, v3, v0
+; CHECK-NEXT: and.16b v4, v4, v0
+; CHECK-NEXT: stp q2, q1, [x1, #32]
+; CHECK-NEXT: stp q4, q3, [x1], #64
; CHECK-NEXT: b.ne LBB13_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -1306,59 +1300,54 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: ldr x9, [x0, x8]
; CHECK-BE-NEXT: add x8, x8, #16
; CHECK-BE-NEXT: cmp x8, #128
-; CHECK-BE-NEXT: lsr x10, x9, #60
-; CHECK-BE-NEXT: ubfx x11, x9, #56, #4
-; CHECK-BE-NEXT: fmov s1, w10
-; CHECK-BE-NEXT: ubfx x10, x9, #52, #4
-; CHECK-BE-NEXT: mov v1.b[1], w11
-; CHECK-BE-NEXT: mov v1.b[2], w10
-; CHECK-BE-NEXT: ubfx x10, x9, #48, #4
-; CHECK-BE-NEXT: mov v1.b[3], w10
-; CHECK-BE-NEXT: ubfx x10, x9, #44, #4
-; CHECK-BE-NEXT: mov v1.b[4], w10
-; CHECK-BE-NEXT: ubfx x10, x9, #40, #4
-; CHECK-BE-NEXT: mov v1.b[5], w10
-; CHECK-BE-NEXT: ubfx x10, x9, #36, #4
-; CHECK-BE-NEXT: mov v1.b[6], w10
-; CHECK-BE-NEXT: ubfx x10, x9, #32, #4
-; CHECK-BE-NEXT: mov v1.b[7], w10
-; CHECK-BE-NEXT: lsr w10, w9, #28
-; CHECK-BE-NEXT: mov v1.b[8], w10
-; CHECK-BE-NEXT: ubfx w10, w9, #24, #4
-; CHECK-BE-NEXT: mov v1.b[9], w10
-; CHECK-BE-NEXT: ubfx w10, w9, #20, #4
-; CHECK-BE-NEXT: mov v1.b[10], w10
-; CHECK-BE-NEXT: ubfx w10, w9, #16, #4
-; CHECK-BE-NEXT: mov v1.b[11], w10
-; CHECK-BE-NEXT: ubfx w10, w9, #12, #4
-; CHECK-BE-NEXT: mov v1.b[12], w10
+; CHECK-BE-NEXT: ubfx w12, w9, #12, #4
+; CHECK-BE-NEXT: lsr w14, w9, #28
; CHECK-BE-NEXT: ubfx w10, w9, #8, #4
-; CHECK-BE-NEXT: mov v1.b[13], w10
-; CHECK-BE-NEXT: ubfx w10, w9, #4, #4
-; CHECK-BE-NEXT: and w9, w9, #0xf
-; CHECK-BE-NEXT: mov v1.b[14], w10
+; CHECK-BE-NEXT: ubfx x15, x9, #44, #4
+; CHECK-BE-NEXT: ubfx w11, w9, #24, #4
+; CHECK-BE-NEXT: ubfx x13, x9, #40, #4
+; CHECK-BE-NEXT: fmov s1, w12
+; CHECK-BE-NEXT: lsr x12, x9, #60
+; CHECK-BE-NEXT: fmov s2, w14
+; CHECK-BE-NEXT: fmov s3, w15
+; CHECK-BE-NEXT: fmov s4, w12
+; CHECK-BE-NEXT: ubfx w12, w9, #20, #4
+; CHECK-BE-NEXT: mov v1.h[1], w10
+; CHECK-BE-NEXT: ubfx x10, x9, #56, #4
+; CHECK-BE-NEXT: mov v2.h[1], w11
+; CHECK-BE-NEXT: ubfx w11, w9, #4, #4
+; CHECK-BE-NEXT: mov v3.h[1], w13
+; CHECK-BE-NEXT: mov v4.h[1], w10
+; CHECK-BE-NEXT: ubfx x10, x9, #36, #4
+; CHECK-BE-NEXT: mov v1.h[2], w11
+; CHECK-BE-NEXT: ubfx x11, x9, #52, #4
+; CHECK-BE-NEXT: mov v2.h[2], w12
+; CHECK-BE-NEXT: mov v3.h[2], w10
+; CHECK-BE-NEXT: and w10, w9, #0xf
+; CHECK-BE-NEXT: ubfx w12, w9, #16, #4
+; CHECK-BE-NEXT: mov v4.h[2], w11
+; CHECK-BE-NEXT: ubfx x11, x9, #32, #4
+; CHECK-BE-NEXT: ubfx x9, x9, #48, #4
+; CHECK-BE-NEXT: mov v1.h[3], w10
+; CHECK-BE-NEXT: mov v2.h[3], w12
; CHECK-BE-NEXT: add x10, x1, #32
-; CHECK-BE-NEXT: mov v1.b[15], w9
-; CHECK-BE-NEXT: add x9, x1, #16
-; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT: zip2 v3.8b, v1.8b, v0.8b
-; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b
-; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b
-; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b
-; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-BE-NEXT: mov v3.h[3], w11
+; CHECK-BE-NEXT: mov v4.h[3], w9
+; CHECK-BE-NEXT: add x9, x1, #48
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b
-; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0
+; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b
-; CHECK-BE-NEXT: st1 { v3.4s }, [x9]
-; CHECK-BE-NEXT: add x9, x1, #48
-; CHECK-BE-NEXT: and v4.16b, v4.16b, v0.16b
; CHECK-BE-NEXT: and v2.16b, v2.16b, v0.16b
-; CHECK-BE-NEXT: st1 { v1.4s }, [x1]
-; CHECK-BE-NEXT: add x1, x1, #64
-; CHECK-BE-NEXT: st1 { v4.4s }, [x9]
+; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b
+; CHECK-BE-NEXT: and v4.16b, v4.16b, v0.16b
+; CHECK-BE-NEXT: st1 { v1.4s }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #16
; CHECK-BE-NEXT: st1 { v2.4s }, [x10]
+; CHECK-BE-NEXT: st1 { v4.4s }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: st1 { v3.4s }, [x9]
; CHECK-BE-NEXT: b.ne .LBB13_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
More information about the llvm-commits
mailing list