[llvm] [AArch64] Enable aggressivelyPreferBuildVectorSources (PR #142729)

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 4 00:11:44 PDT 2025


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/142729

This helps to remove some inefficient buildvector lowering by converting extract_vector_elt(buildvector) to the original source. It seems to be a general improvement.

>From 6d8e750f3d4d558251dfb2cc6c561614724a6b61 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 4 Jun 2025 08:07:26 +0100
Subject: [PATCH] [AArch64] Enable aggressivelyPreferBuildVectorSources

This helps to remove some inefficient buildvector lowering by converting
extract_vector_elt(buildvector) to the original source. It seems to be a
general improvement.
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |    4 +
 .../GlobalISel/combine-build-vector.mir       |    8 +-
 .../GlobalISel/combine-extract-vec-elt.mir    |    4 +-
 .../GlobalISel/combine-insert-vec-elt.mir     |    2 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll |  576 ++-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll |  388 +-
 llvm/test/CodeGen/AArch64/fptrunc.ll          |   28 +-
 llvm/test/CodeGen/AArch64/itofp.ll            |  220 +-
 llvm/test/CodeGen/AArch64/sext.ll             |    6 +-
 ...sve-streaming-mode-fixed-length-bitcast.ll |   19 +-
 ...e-streaming-mode-fixed-length-ext-loads.ll |  260 +-
 ...aming-mode-fixed-length-fp-extend-trunc.ll |  234 +-
 ...streaming-mode-fixed-length-int-extends.ll | 3956 ++++++-----------
 ...e-streaming-mode-fixed-length-int-to-fp.ll |  376 +-
 ...-streaming-mode-fixed-length-ld2-alloca.ll |   20 +-
 llvm/test/CodeGen/AArch64/zext-to-tbl.ll      |  167 +-
 16 files changed, 2316 insertions(+), 3952 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index b2174487c2fe8..f193fa05dd161 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -449,6 +449,10 @@ class AArch64TargetLowering : public TargetLowering {
   /// Enable aggressive FMA fusion on targets that want it.
   bool enableAggressiveFMAFusion(EVT VT) const override;
 
+  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
+    return true;
+  }
+
   /// Returns the size of the platform's va_list object.
   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 93f6051c3bd3b..5189582d0b6ac 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -55,13 +55,9 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
     ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
-    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
     ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
-    ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64)
-    ; CHECK-NEXT: $x0 = COPY %extract(s64)
-    ; CHECK-NEXT: $x1 = COPY %extract2(s64)
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+    ; CHECK-NEXT: $x1 = COPY %arg2(s64)
     ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index e2933690c7c55..e81447a1de4b6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -175,10 +175,8 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
     ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
-    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
     ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64)
-    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
     ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
index c000a8e635bc6..86c0575961a17 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -278,8 +278,8 @@ body:             |
     ; CHECK: liveins: $x0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
     ; CHECK-NEXT: RET_ReallyLR
     %3:_(s8) = G_CONSTANT i8 127
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index a33b1ef569fc3..04dfdedb42752 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -726,7 +726,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-LABEL: test_signed_v3f128_v3i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #128
-; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x23, [sp, #80] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
@@ -734,13 +734,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
+; CHECK-SD-NEXT:    .cfi_offset w23, -40
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
-; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    stp q2, q1, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -755,15 +755,15 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov w21, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT:    mov w22, #2147483647 // =0x7fffffff
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w22, wzr, w19, ne
+; CHECK-SD-NEXT:    csel w21, wzr, w19, ne
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -775,16 +775,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
-; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w22
-; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csel w23, wzr, w19, ne
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -796,16 +793,17 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    csel w19, w21, w19, gt
+; CHECK-SD-NEXT:    csel w19, w22, w19, gt
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    bl __unordtf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    csel w8, wzr, w19, ne
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w23
+; CHECK-SD-NEXT:    ldp x30, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[2], w21
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.s[2], w8
 ; CHECK-SD-NEXT:    add sp, sp, #128
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4890,63 +4888,61 @@ define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
 define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-LABEL: test_signed_v8f64_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d4, v3.d[1]
-; CHECK-SD-NEXT:    fcvtzs w11, d3
-; CHECK-SD-NEXT:    mov w9, #127 // =0x7f
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d2
-; CHECK-SD-NEXT:    fcvtzs w15, d1
-; CHECK-SD-NEXT:    fcvtzs w17, d0
-; CHECK-SD-NEXT:    fcvtzs w8, d4
-; CHECK-SD-NEXT:    mov d4, v2.d[1]
-; CHECK-SD-NEXT:    mov d2, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzs w14, d3
-; CHECK-SD-NEXT:    cmp w8, #127
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    fcvtzs w16, d2
-; CHECK-SD-NEXT:    csel w10, w8, w9, lt
-; CHECK-SD-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w9, d4
+; CHECK-SD-NEXT:    cmp w9, #127
+; CHECK-SD-NEXT:    csel w11, w9, w8, lt
+; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    csel w10, w10, w8, gt
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    csel w11, w11, w9, lt
+; CHECK-SD-NEXT:    mov v0.b[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    csel w11, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    csel w12, w12, w9, lt
-; CHECK-SD-NEXT:    fmov s3, w11
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    csel w12, w12, w8, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w13, w13, w9, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    cmn w13, #128
-; CHECK-SD-NEXT:    csel w13, w13, w8, gt
-; CHECK-SD-NEXT:    cmp w14, #127
-; CHECK-SD-NEXT:    csel w14, w14, w9, lt
-; CHECK-SD-NEXT:    fmov s2, w13
-; CHECK-SD-NEXT:    cmn w14, #128
-; CHECK-SD-NEXT:    csel w14, w14, w8, gt
-; CHECK-SD-NEXT:    cmp w15, #127
-; CHECK-SD-NEXT:    csel w15, w15, w9, lt
-; CHECK-SD-NEXT:    mov v2.s[1], w12
-; CHECK-SD-NEXT:    cmn w15, #128
-; CHECK-SD-NEXT:    csel w15, w15, w8, gt
-; CHECK-SD-NEXT:    cmp w16, #127
-; CHECK-SD-NEXT:    csel w11, w16, w9, lt
-; CHECK-SD-NEXT:    fmov s1, w15
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[5], w10
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    csel w10, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w17, #127
-; CHECK-SD-NEXT:    csel w9, w17, w9, lt
-; CHECK-SD-NEXT:    mov v1.s[1], w14
-; CHECK-SD-NEXT:    cmn w9, #128
-; CHECK-SD-NEXT:    csel w8, w9, w8, gt
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
-; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT:    mov v0.s[1], w10
-; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #128
+; CHECK-SD-NEXT:    csel w8, w8, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[7], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v8f64_v8i8:
@@ -4990,11 +4986,9 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    mov d16, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w10, d0
 ; CHECK-SD-NEXT:    mov w8, #127 // =0x7f
-; CHECK-SD-NEXT:    mov d0, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d1
-; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w9, d16
-; CHECK-SD-NEXT:    fcvtzs w12, d0
 ; CHECK-SD-NEXT:    cmp w9, #127
 ; CHECK-SD-NEXT:    csel w11, w9, w8, lt
 ; CHECK-SD-NEXT:    mov w9, #-128 // =0xffffff80
@@ -5006,115 +5000,94 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
 ; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    csel w10, w12, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w12, w13, w8, lt
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    fcvtzs w11, d1
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    fmov s1, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d2
-; CHECK-SD-NEXT:    mov d2, v3.d[1]
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov w13, v0.s[1]
-; CHECK-SD-NEXT:    mov v1.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[1], w13
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v1.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[5], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s2, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d3
-; CHECK-SD-NEXT:    mov d3, v4.d[1]
-; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-SD-NEXT:    mov v2.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v4.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[6], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d4
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d3
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[3], w13
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v2.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s3, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT:    mov d4, v5.d[1]
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[7], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v5.d[1]
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[8], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d5
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    mov v0.b[5], w13
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w11, d4
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v3.s[1]
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
-; CHECK-SD-NEXT:    fmov s4, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d5
+; CHECK-SD-NEXT:    mov v0.b[9], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov d5, v6.d[1]
-; CHECK-SD-NEXT:    mov v4.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
-; CHECK-SD-NEXT:    mov v0.b[7], w13
-; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    csel w10, w10, w9, gt
-; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d5
-; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v6.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov w12, v4.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s5, w11
+; CHECK-SD-NEXT:    mov v0.b[10], w11
 ; CHECK-SD-NEXT:    fcvtzs w11, d6
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov d6, v7.d[1]
-; CHECK-SD-NEXT:    mov v0.b[9], w12
-; CHECK-SD-NEXT:    mov v5.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d6
+; CHECK-SD-NEXT:    mov v0.b[11], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v7.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov v0.b[10], v5.b[0]
-; CHECK-SD-NEXT:    mov w12, v5.s[1]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s6, w11
+; CHECK-SD-NEXT:    mov v0.b[12], w11
+; CHECK-SD-NEXT:    cmp w10, #127
 ; CHECK-SD-NEXT:    fcvtzs w11, d7
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov v0.b[11], w12
-; CHECK-SD-NEXT:    mov v6.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[13], w10
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    csel w8, w11, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #128
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[14], w11
+; CHECK-SD-NEXT:    cmp w10, #127
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w8, #128
-; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT:    mov w11, v6.s[1]
 ; CHECK-SD-NEXT:    csel w8, w8, w9, gt
-; CHECK-SD-NEXT:    fmov s7, w8
-; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v7.s[1], w10
-; CHECK-SD-NEXT:    mov v0.b[14], v7.b[0]
-; CHECK-SD-NEXT:    mov w8, v7.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5179,63 +5152,60 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-SD-LABEL: test_signed_v8f64_v8i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d4, v3.d[1]
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
 ; CHECK-SD-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-SD-NEXT:    fcvtzs w11, d3
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w13, d2
-; CHECK-SD-NEXT:    fcvtzs w15, d1
-; CHECK-SD-NEXT:    fcvtzs w17, d0
+; CHECK-SD-NEXT:    fcvtzs w10, d0
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w9, d4
-; CHECK-SD-NEXT:    mov d4, v2.d[1]
-; CHECK-SD-NEXT:    mov d2, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzs w14, d3
 ; CHECK-SD-NEXT:    cmp w9, w8
-; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    fcvtzs w16, d2
-; CHECK-SD-NEXT:    csel w10, w9, w8, lt
+; CHECK-SD-NEXT:    csel w11, w9, w8, lt
 ; CHECK-SD-NEXT:    mov w9, #-32768 // =0xffff8000
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.h[1], w11
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    mov v0.h[2], w11
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    mov v0.h[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d1
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    cmp w12, w8
-; CHECK-SD-NEXT:    csel w12, w12, w8, lt
-; CHECK-SD-NEXT:    fmov s3, w11
-; CHECK-SD-NEXT:    cmn w12, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    cmp w13, w8
-; CHECK-SD-NEXT:    csel w13, w13, w8, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w13, w13, w9, gt
-; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    csel w14, w14, w8, lt
-; CHECK-SD-NEXT:    fmov s2, w13
-; CHECK-SD-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w14, w14, w9, gt
-; CHECK-SD-NEXT:    cmp w15, w8
-; CHECK-SD-NEXT:    csel w15, w15, w8, lt
-; CHECK-SD-NEXT:    mov v2.s[1], w12
-; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w15, w15, w9, gt
-; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    csel w11, w16, w8, lt
-; CHECK-SD-NEXT:    fmov s1, w15
+; CHECK-SD-NEXT:    mov v0.h[4], w11
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.h[5], w10
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    fcvtzs w10, d1
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w10, w11, w9, gt
-; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    csel w8, w17, w8, lt
-; CHECK-SD-NEXT:    mov v1.s[1], w14
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v0.h[6], w11
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w8, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w8, w8, w9, gt
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI84_0
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI84_0]
-; CHECK-SD-NEXT:    mov v0.s[1], w10
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    mov v0.h[7], w8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v8f64_v8i16:
@@ -5275,116 +5245,112 @@ define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) {
 define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-LABEL: test_signed_v16f64_v16i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d16, v3.d[1]
-; CHECK-SD-NEXT:    mov w9, #32767 // =0x7fff
-; CHECK-SD-NEXT:    fcvtzs w11, d3
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzs w14, d2
-; CHECK-SD-NEXT:    fcvtzs w15, d1
-; CHECK-SD-NEXT:    mov d1, v7.d[1]
-; CHECK-SD-NEXT:    fcvtzs w18, d0
-; CHECK-SD-NEXT:    fcvtzs w1, d7
-; CHECK-SD-NEXT:    fcvtzs w2, d6
-; CHECK-SD-NEXT:    fcvtzs w4, d5
-; CHECK-SD-NEXT:    fcvtzs w6, d4
-; CHECK-SD-NEXT:    fcvtzs w8, d16
-; CHECK-SD-NEXT:    mov d16, v2.d[1]
-; CHECK-SD-NEXT:    mov d2, v0.d[1]
-; CHECK-SD-NEXT:    mov d0, v6.d[1]
+; CHECK-SD-NEXT:    mov d16, v0.d[1]
+; CHECK-SD-NEXT:    mov w8, #32767 // =0x7fff
+; CHECK-SD-NEXT:    fcvtzs w11, d0
+; CHECK-SD-NEXT:    mov d0, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzs w12, d1
+; CHECK-SD-NEXT:    fcvtzs w15, d3
+; CHECK-SD-NEXT:    mov d1, v4.d[1]
+; CHECK-SD-NEXT:    fcvtzs w1, d5
+; CHECK-SD-NEXT:    fcvtzs w9, d16
+; CHECK-SD-NEXT:    fcvtzs w14, d0
+; CHECK-SD-NEXT:    mov d0, v2.d[1]
 ; CHECK-SD-NEXT:    fcvtzs w0, d1
-; CHECK-SD-NEXT:    cmp w8, w9
-; CHECK-SD-NEXT:    fcvtzs w13, d16
-; CHECK-SD-NEXT:    fcvtzs w17, d2
-; CHECK-SD-NEXT:    csel w10, w8, w9, lt
-; CHECK-SD-NEXT:    mov w8, #-32768 // =0xffff8000
+; CHECK-SD-NEXT:    cmp w9, w8
+; CHECK-SD-NEXT:    csel w10, w9, w8, lt
+; CHECK-SD-NEXT:    mov w9, #-32768 // =0xffff8000
 ; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w10, w10, w8, gt
-; CHECK-SD-NEXT:    cmp w11, w9
-; CHECK-SD-NEXT:    csel w11, w11, w9, lt
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w13, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    csel w11, w12, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w12, d2
+; CHECK-SD-NEXT:    mov d2, v5.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w12, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w13, w9
-; CHECK-SD-NEXT:    csel w11, w13, w9, lt
-; CHECK-SD-NEXT:    fcvtzs w13, d3
+; CHECK-SD-NEXT:    csel w17, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w14, w8
+; CHECK-SD-NEXT:    csel w11, w14, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w14, d0
+; CHECK-SD-NEXT:    mov d0, v3.d[1]
 ; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w11, w11, w8, gt
-; CHECK-SD-NEXT:    cmp w14, w9
-; CHECK-SD-NEXT:    csel w14, w14, w9, lt
-; CHECK-SD-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w14, w14, w8, gt
-; CHECK-SD-NEXT:    cmp w13, w9
-; CHECK-SD-NEXT:    csel w13, w13, w9, lt
-; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w13, w13, w8, gt
-; CHECK-SD-NEXT:    cmp w15, w9
-; CHECK-SD-NEXT:    csel w15, w15, w9, lt
-; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w16, w15, w8, gt
-; CHECK-SD-NEXT:    cmp w17, w9
-; CHECK-SD-NEXT:    csel w15, w17, w9, lt
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    csel w12, w12, w8, lt
+; CHECK-SD-NEXT:    cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    fcvtzs w18, d0
+; CHECK-SD-NEXT:    fmov s0, w13
+; CHECK-SD-NEXT:    csel w16, w12, w9, gt
+; CHECK-SD-NEXT:    cmp w14, w8
+; CHECK-SD-NEXT:    csel w12, w14, w8, lt
+; CHECK-SD-NEXT:    cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    mov v0.h[1], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d2
+; CHECK-SD-NEXT:    csel w14, w12, w9, gt
+; CHECK-SD-NEXT:    cmp w15, w8
+; CHECK-SD-NEXT:    mov d2, v6.d[1]
+; CHECK-SD-NEXT:    csel w12, w15, w8, lt
+; CHECK-SD-NEXT:    cmn w12, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    cmp w18, w8
+; CHECK-SD-NEXT:    mov v0.h[2], w17
+; CHECK-SD-NEXT:    csel w15, w18, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w18, d4
 ; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w15, w15, w8, gt
-; CHECK-SD-NEXT:    cmp w18, w9
-; CHECK-SD-NEXT:    csel w17, w18, w9, lt
-; CHECK-SD-NEXT:    cmn w17, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w17, w17, w8, gt
-; CHECK-SD-NEXT:    cmp w0, w9
-; CHECK-SD-NEXT:    csel w18, w0, w9, lt
-; CHECK-SD-NEXT:    fcvtzs w0, d0
-; CHECK-SD-NEXT:    mov d0, v5.d[1]
-; CHECK-SD-NEXT:    cmn w18, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w18, w18, w8, gt
-; CHECK-SD-NEXT:    cmp w1, w9
-; CHECK-SD-NEXT:    csel w1, w1, w9, lt
-; CHECK-SD-NEXT:    cmn w1, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    fcvtzs w3, d0
-; CHECK-SD-NEXT:    mov d0, v4.d[1]
-; CHECK-SD-NEXT:    csel w1, w1, w8, gt
-; CHECK-SD-NEXT:    cmp w0, w9
-; CHECK-SD-NEXT:    csel w0, w0, w9, lt
-; CHECK-SD-NEXT:    fmov s7, w1
+; CHECK-SD-NEXT:    csel w15, w15, w9, gt
+; CHECK-SD-NEXT:    cmp w0, w8
+; CHECK-SD-NEXT:    csel w0, w0, w8, lt
+; CHECK-SD-NEXT:    mov v0.h[3], w11
 ; CHECK-SD-NEXT:    cmn w0, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w0, w0, w8, gt
-; CHECK-SD-NEXT:    cmp w2, w9
-; CHECK-SD-NEXT:    fcvtzs w5, d0
-; CHECK-SD-NEXT:    csel w2, w2, w9, lt
-; CHECK-SD-NEXT:    fmov s3, w12
-; CHECK-SD-NEXT:    mov v7.s[1], w18
-; CHECK-SD-NEXT:    cmn w2, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w2, w2, w8, gt
-; CHECK-SD-NEXT:    cmp w3, w9
-; CHECK-SD-NEXT:    csel w3, w3, w9, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
-; CHECK-SD-NEXT:    fmov s6, w2
-; CHECK-SD-NEXT:    cmn w3, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    fmov s2, w14
-; CHECK-SD-NEXT:    csel w3, w3, w8, gt
-; CHECK-SD-NEXT:    cmp w4, w9
-; CHECK-SD-NEXT:    csel w4, w4, w9, lt
-; CHECK-SD-NEXT:    mov v6.s[1], w0
-; CHECK-SD-NEXT:    cmn w4, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    mov v2.s[1], w11
-; CHECK-SD-NEXT:    csel w12, w4, w8, gt
-; CHECK-SD-NEXT:    cmp w5, w9
-; CHECK-SD-NEXT:    fmov s1, w16
-; CHECK-SD-NEXT:    csel w10, w5, w9, lt
-; CHECK-SD-NEXT:    fmov s5, w12
+; CHECK-SD-NEXT:    csel w0, w0, w9, gt
+; CHECK-SD-NEXT:    cmp w18, w8
+; CHECK-SD-NEXT:    csel w18, w18, w8, lt
+; CHECK-SD-NEXT:    cmn w18, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    mov v0.h[4], w16
+; CHECK-SD-NEXT:    csel w13, w18, w9, gt
+; CHECK-SD-NEXT:    cmp w1, w8
+; CHECK-SD-NEXT:    fmov s1, w13
+; CHECK-SD-NEXT:    csel w13, w1, w8, lt
+; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w13, w13, w9, gt
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    mov v0.h[5], w14
+; CHECK-SD-NEXT:    mov v1.h[1], w0
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w10, w10, w8, gt
-; CHECK-SD-NEXT:    cmp w6, w9
-; CHECK-SD-NEXT:    mov v1.s[1], w13
-; CHECK-SD-NEXT:    csel w9, w6, w9, lt
-; CHECK-SD-NEXT:    mov v5.s[1], w3
-; CHECK-SD-NEXT:    fmov s0, w17
-; CHECK-SD-NEXT:    cmn w9, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    csel w8, w9, w8, gt
-; CHECK-SD-NEXT:    fmov s4, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w15
-; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
-; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI85_0]
-; CHECK-SD-NEXT:    mov v4.s[1], w10
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v0.h[6], w12
+; CHECK-SD-NEXT:    mov v1.h[2], w13
+; CHECK-SD-NEXT:    fcvtzs w13, d6
+; CHECK-SD-NEXT:    mov v0.h[7], w15
+; CHECK-SD-NEXT:    cmp w13, w8
+; CHECK-SD-NEXT:    mov v1.h[3], w10
+; CHECK-SD-NEXT:    fcvtzs w10, d2
+; CHECK-SD-NEXT:    csel w11, w13, w8, lt
+; CHECK-SD-NEXT:    mov d2, v7.d[1]
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v1.h[4], w11
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    fcvtzs w11, d7
+; CHECK-SD-NEXT:    csel w10, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w10, w10, w9, gt
+; CHECK-SD-NEXT:    mov v1.h[5], w10
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    fcvtzs w10, d2
+; CHECK-SD-NEXT:    csel w11, w11, w8, lt
+; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w11, w11, w9, gt
+; CHECK-SD-NEXT:    mov v1.h[6], w11
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    csel w8, w10, w8, lt
+; CHECK-SD-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    csel w8, w8, w9, gt
+; CHECK-SD-NEXT:    mov v1.h[7], w8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v16f64_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index b1b5154a57b4d..099f43edfca6e 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -645,18 +645,18 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v3f128_v3i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #112
-; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x30, x21, [sp, #80] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
+; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    stp q0, q2, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    stp q1, q0, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
-; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT:    str q2, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -681,13 +681,10 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    cmp w19, #0
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
 ; CHECK-SD-NEXT:    bl __gttf2
-; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w20
-; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    csinv w21, w19, wzr, le
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -698,11 +695,12 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, lt
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
+; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w21
+; CHECK-SD-NEXT:    ldp x30, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[2], w20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.s[2], w8
 ; CHECK-SD-NEXT:    add sp, sp, #112
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4048,46 +4046,44 @@ define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) {
 define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v8f64_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d4, v3.d[1]
-; CHECK-SD-NEXT:    mov d5, v2.d[1]
-; CHECK-SD-NEXT:    mov w11, #255 // =0xff
-; CHECK-SD-NEXT:    fcvtzu w9, d3
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzu w12, d2
-; CHECK-SD-NEXT:    fcvtzu w14, d1
-; CHECK-SD-NEXT:    fcvtzu w8, d4
 ; CHECK-SD-NEXT:    mov d4, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzu w10, d5
-; CHECK-SD-NEXT:    fcvtzu w13, d3
-; CHECK-SD-NEXT:    cmp w8, #255
-; CHECK-SD-NEXT:    fcvtzu w15, d4
-; CHECK-SD-NEXT:    csel w8, w8, w11, lo
+; CHECK-SD-NEXT:    fcvtzu w10, d0
+; CHECK-SD-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-NEXT:    fcvtzu w11, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d4
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    csel w9, w9, w11, lo
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    fmov s4, w9
-; CHECK-SD-NEXT:    csel w9, w10, w11, lo
-; CHECK-SD-NEXT:    cmp w12, #255
-; CHECK-SD-NEXT:    fcvtzu w10, d0
-; CHECK-SD-NEXT:    mov v4.s[1], w8
-; CHECK-SD-NEXT:    csel w8, w12, w11, lo
-; CHECK-SD-NEXT:    cmp w13, #255
-; CHECK-SD-NEXT:    fmov s3, w8
-; CHECK-SD-NEXT:    csel w8, w13, w11, lo
-; CHECK-SD-NEXT:    cmp w14, #255
-; CHECK-SD-NEXT:    mov v3.s[1], w9
-; CHECK-SD-NEXT:    csel w9, w14, w11, lo
-; CHECK-SD-NEXT:    cmp w15, #255
-; CHECK-SD-NEXT:    fmov s2, w9
-; CHECK-SD-NEXT:    csel w9, w15, w11, lo
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    cmp w11, #255
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    csel w10, w11, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[2], w10
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    fcvtzu w10, d2
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[3], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov v2.s[1], w8
-; CHECK-SD-NEXT:    csel w8, w10, w11, lo
-; CHECK-SD-NEXT:    fmov s1, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
-; CHECK-SD-NEXT:    ldr d0, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT:    mov v1.s[1], w9
-; CHECK-SD-NEXT:    tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    mov v0.b[4], w10
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    fcvtzu w10, d3
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[5], w9
+; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[6], w10
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w8, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[7], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i8:
@@ -4120,98 +4116,75 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    mov d16, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w10, d0
 ; CHECK-SD-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-NEXT:    fcvtzu w11, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w9, d16
-; CHECK-SD-NEXT:    mov d16, v1.d[1]
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    cmp w11, #255
 ; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d16
-; CHECK-SD-NEXT:    mov d16, v2.d[1]
-; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    csel w10, w11, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
-; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.b[2], w10
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov w11, v0.s[1]
-; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s1, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d16
-; CHECK-SD-NEXT:    mov d16, v3.d[1]
-; CHECK-SD-NEXT:    mov v0.b[1], w11
-; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d2
-; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[3], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov w11, v1.s[1]
-; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s2, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d16
-; CHECK-SD-NEXT:    mov d16, v4.d[1]
-; CHECK-SD-NEXT:    mov v0.b[3], w11
-; CHECK-SD-NEXT:    mov v2.s[1], w9
-; CHECK-SD-NEXT:    fcvtzu w9, d3
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    mov v0.b[4], w10
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    fcvtzu w10, d3
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[5], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    mov d1, v4.d[1]
+; CHECK-SD-NEXT:    mov v0.b[6], w10
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov w11, v2.s[1]
-; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s3, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d16
-; CHECK-SD-NEXT:    mov d16, v5.d[1]
-; CHECK-SD-NEXT:    mov v0.b[5], w11
-; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d4
-; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[7], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov w11, v3.s[1]
-; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
+; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s4, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d16
-; CHECK-SD-NEXT:    mov v0.b[7], w11
-; CHECK-SD-NEXT:    mov v4.s[1], w9
-; CHECK-SD-NEXT:    fcvtzu w9, d5
-; CHECK-SD-NEXT:    mov d5, v6.d[1]
+; CHECK-SD-NEXT:    mov d1, v5.d[1]
+; CHECK-SD-NEXT:    mov v0.b[8], w10
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    fcvtzu w10, d5
+; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[9], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
+; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    mov d1, v6.d[1]
+; CHECK-SD-NEXT:    mov v0.b[10], w10
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov w11, v4.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
-; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s16, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d5
-; CHECK-SD-NEXT:    mov d5, v7.d[1]
-; CHECK-SD-NEXT:    mov v0.b[9], w11
-; CHECK-SD-NEXT:    mov v16.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d6
-; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[11], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov v0.b[10], v16.b[0]
-; CHECK-SD-NEXT:    mov w11, v16.s[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s6, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d7
-; CHECK-SD-NEXT:    mov v0.b[11], w11
-; CHECK-SD-NEXT:    mov v6.s[1], w9
-; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    mov d1, v7.d[1]
+; CHECK-SD-NEXT:    mov v0.b[12], w10
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT:    mov w11, v6.s[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d7
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[13], w9
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    csel w8, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s5, w8
-; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v5.s[1], w9
-; CHECK-SD-NEXT:    mov v0.b[14], v5.b[0]
-; CHECK-SD-NEXT:    mov w8, v5.s[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
+; CHECK-SD-NEXT:    mov v0.b[14], w10
+; CHECK-SD-NEXT:    cmp w9, #255
+; CHECK-SD-NEXT:    csel w8, w9, w8, lo
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4257,46 +4230,43 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
 define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v8f64_v8i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d4, v3.d[1]
-; CHECK-SD-NEXT:    mov d5, v2.d[1]
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w9, d0
 ; CHECK-SD-NEXT:    mov w10, #65535 // =0xffff
-; CHECK-SD-NEXT:    fcvtzu w9, d3
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzu w12, d2
-; CHECK-SD-NEXT:    fcvtzu w14, d1
+; CHECK-SD-NEXT:    fcvtzu w11, d1
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w8, d4
-; CHECK-SD-NEXT:    mov d4, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzu w11, d5
-; CHECK-SD-NEXT:    fcvtzu w13, d3
 ; CHECK-SD-NEXT:    cmp w8, w10
-; CHECK-SD-NEXT:    fcvtzu w15, d4
 ; CHECK-SD-NEXT:    csel w8, w8, w10, lo
 ; CHECK-SD-NEXT:    cmp w9, w10
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    cmp w11, w10
-; CHECK-SD-NEXT:    fmov s4, w9
+; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    csel w9, w11, w10, lo
-; CHECK-SD-NEXT:    cmp w12, w10
-; CHECK-SD-NEXT:    fcvtzu w11, d0
-; CHECK-SD-NEXT:    mov v4.s[1], w8
-; CHECK-SD-NEXT:    csel w8, w12, w10, lo
-; CHECK-SD-NEXT:    cmp w13, w10
-; CHECK-SD-NEXT:    fmov s3, w8
-; CHECK-SD-NEXT:    csel w8, w13, w10, lo
-; CHECK-SD-NEXT:    cmp w14, w10
-; CHECK-SD-NEXT:    mov v3.s[1], w9
-; CHECK-SD-NEXT:    csel w9, w14, w10, lo
-; CHECK-SD-NEXT:    cmp w15, w10
-; CHECK-SD-NEXT:    fmov s2, w9
-; CHECK-SD-NEXT:    csel w9, w15, w10, lo
-; CHECK-SD-NEXT:    cmp w11, w10
-; CHECK-SD-NEXT:    mov v2.s[1], w8
-; CHECK-SD-NEXT:    csel w8, w11, w10, lo
-; CHECK-SD-NEXT:    fmov s1, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI84_0
-; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI84_0]
-; CHECK-SD-NEXT:    mov v1.s[1], w9
-; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
+; CHECK-SD-NEXT:    mov v0.h[1], w8
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    mov v0.h[2], w9
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    fcvtzu w9, d2
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    mov v0.h[3], w8
+; CHECK-SD-NEXT:    cmp w9, w10
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    csel w9, w9, w10, lo
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    mov v0.h[4], w9
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    fcvtzu w9, d3
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    mov v0.h[5], w8
+; CHECK-SD-NEXT:    cmp w9, w10
+; CHECK-SD-NEXT:    fcvtzu w8, d1
+; CHECK-SD-NEXT:    csel w9, w9, w10, lo
+; CHECK-SD-NEXT:    mov v0.h[6], w9
+; CHECK-SD-NEXT:    cmp w8, w10
+; CHECK-SD-NEXT:    csel w8, w8, w10, lo
+; CHECK-SD-NEXT:    mov v0.h[7], w8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i16:
@@ -4325,83 +4295,79 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
 define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v16f64_v16i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov d16, v3.d[1]
-; CHECK-SD-NEXT:    mov d17, v2.d[1]
+; CHECK-SD-NEXT:    mov d16, v0.d[1]
+; CHECK-SD-NEXT:    mov d17, v1.d[1]
 ; CHECK-SD-NEXT:    mov w8, #65535 // =0xffff
-; CHECK-SD-NEXT:    fcvtzu w9, d3
-; CHECK-SD-NEXT:    mov d3, v1.d[1]
-; CHECK-SD-NEXT:    fcvtzu w10, d1
-; CHECK-SD-NEXT:    mov d1, v0.d[1]
-; CHECK-SD-NEXT:    fcvtzu w11, d2
-; CHECK-SD-NEXT:    fcvtzu w12, d0
-; CHECK-SD-NEXT:    mov d0, v7.d[1]
-; CHECK-SD-NEXT:    mov d2, v6.d[1]
-; CHECK-SD-NEXT:    fcvtzu w14, d7
-; CHECK-SD-NEXT:    fcvtzu w13, d16
-; CHECK-SD-NEXT:    fcvtzu w16, d17
-; CHECK-SD-NEXT:    fcvtzu w15, d6
-; CHECK-SD-NEXT:    fcvtzu w17, d3
-; CHECK-SD-NEXT:    mov d6, v5.d[1]
-; CHECK-SD-NEXT:    mov d3, v4.d[1]
-; CHECK-SD-NEXT:    fcvtzu w18, d1
-; CHECK-SD-NEXT:    cmp w13, w8
-; CHECK-SD-NEXT:    csel w13, w13, w8, lo
+; CHECK-SD-NEXT:    fcvtzu w10, d0
+; CHECK-SD-NEXT:    mov d0, v2.d[1]
+; CHECK-SD-NEXT:    fcvtzu w11, d1
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    fcvtzu w13, d2
+; CHECK-SD-NEXT:    mov d2, v4.d[1]
+; CHECK-SD-NEXT:    fcvtzu w18, d4
+; CHECK-SD-NEXT:    fcvtzu w9, d16
+; CHECK-SD-NEXT:    fcvtzu w12, d17
+; CHECK-SD-NEXT:    fcvtzu w16, d1
+; CHECK-SD-NEXT:    fcvtzu w17, d2
+; CHECK-SD-NEXT:    mov d2, v5.d[1]
 ; CHECK-SD-NEXT:    cmp w9, w8
-; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    fmov s19, w9
-; CHECK-SD-NEXT:    csel w9, w16, w8, lo
+; CHECK-SD-NEXT:    csel w14, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    fcvtzu w9, d0
+; CHECK-SD-NEXT:    csel w15, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w11, w8
-; CHECK-SD-NEXT:    fcvtzu w16, d0
+; CHECK-SD-NEXT:    fcvtzu w10, d3
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lo
-; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    mov v19.s[1], w13
-; CHECK-SD-NEXT:    csel w13, w17, w8, lo
-; CHECK-SD-NEXT:    cmp w10, w8
-; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    cmp w18, w8
-; CHECK-SD-NEXT:    fmov s18, w11
-; CHECK-SD-NEXT:    csel w11, w18, w8, lo
 ; CHECK-SD-NEXT:    cmp w12, w8
-; CHECK-SD-NEXT:    fcvtzu w17, d2
+; CHECK-SD-NEXT:    fmov s0, w15
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lo
+; CHECK-SD-NEXT:    cmp w13, w8
+; CHECK-SD-NEXT:    csel w13, w13, w8, lo
+; CHECK-SD-NEXT:    cmp w9, w8
+; CHECK-SD-NEXT:    csel w0, w9, w8, lo
+; CHECK-SD-NEXT:    cmp w10, w8
+; CHECK-SD-NEXT:    mov v0.h[1], w14
+; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    fcvtzu w18, d6
-; CHECK-SD-NEXT:    mov v18.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w14, d2
 ; CHECK-SD-NEXT:    csel w9, w16, w8, lo
-; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    fmov s17, w10
-; CHECK-SD-NEXT:    csel w10, w14, w8, lo
-; CHECK-SD-NEXT:    fcvtzu w16, d5
-; CHECK-SD-NEXT:    fmov s23, w10
 ; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    fcvtzu w14, d3
-; CHECK-SD-NEXT:    csel w10, w17, w8, lo
-; CHECK-SD-NEXT:    cmp w15, w8
-; CHECK-SD-NEXT:    fcvtzu w17, d4
-; CHECK-SD-NEXT:    mov v17.s[1], w13
-; CHECK-SD-NEXT:    mov v23.s[1], w9
-; CHECK-SD-NEXT:    csel w9, w15, w8, lo
+; CHECK-SD-NEXT:    mov d2, v6.d[1]
+; CHECK-SD-NEXT:    csel w16, w17, w8, lo
 ; CHECK-SD-NEXT:    cmp w18, w8
-; CHECK-SD-NEXT:    fmov s22, w9
-; CHECK-SD-NEXT:    csel w9, w18, w8, lo
-; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    fmov s16, w12
-; CHECK-SD-NEXT:    mov v22.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w16, w8, lo
+; CHECK-SD-NEXT:    csel w17, w18, w8, lo
+; CHECK-SD-NEXT:    fcvtzu w18, d5
+; CHECK-SD-NEXT:    mov v0.h[2], w11
+; CHECK-SD-NEXT:    fmov s1, w17
+; CHECK-SD-NEXT:    fcvtzu w11, d6
+; CHECK-SD-NEXT:    mov v1.h[1], w16
+; CHECK-SD-NEXT:    cmp w18, w8
+; CHECK-SD-NEXT:    mov v0.h[3], w12
+; CHECK-SD-NEXT:    csel w15, w18, w8, lo
 ; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    fmov s21, w10
-; CHECK-SD-NEXT:    csel w10, w14, w8, lo
-; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    csel w8, w17, w8, lo
-; CHECK-SD-NEXT:    mov v16.s[1], w11
-; CHECK-SD-NEXT:    mov v21.s[1], w9
-; CHECK-SD-NEXT:    fmov s20, w8
-; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
-; CHECK-SD-NEXT:    mov v20.s[1], w10
-; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    fcvtzu w12, d2
+; CHECK-SD-NEXT:    csel w14, w14, w8, lo
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    mov d2, v7.d[1]
+; CHECK-SD-NEXT:    csel w11, w11, w8, lo
+; CHECK-SD-NEXT:    mov v1.h[2], w15
+; CHECK-SD-NEXT:    mov v0.h[4], w13
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    csel w12, w12, w8, lo
+; CHECK-SD-NEXT:    mov v1.h[3], w14
+; CHECK-SD-NEXT:    mov v0.h[5], w0
+; CHECK-SD-NEXT:    mov v1.h[4], w11
+; CHECK-SD-NEXT:    fcvtzu w11, d7
+; CHECK-SD-NEXT:    mov v0.h[6], w10
+; CHECK-SD-NEXT:    mov v1.h[5], w12
+; CHECK-SD-NEXT:    cmp w11, w8
+; CHECK-SD-NEXT:    fcvtzu w12, d2
+; CHECK-SD-NEXT:    csel w11, w11, w8, lo
+; CHECK-SD-NEXT:    mov v0.h[7], w9
+; CHECK-SD-NEXT:    mov v1.h[6], w11
+; CHECK-SD-NEXT:    cmp w12, w8
+; CHECK-SD-NEXT:    csel w8, w12, w8, lo
+; CHECK-SD-NEXT:    mov v1.h[7], w8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v16f64_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index b4c38e9f2df3b..1f84c944d7c16 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -304,25 +304,15 @@ entry:
 }
 
 define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
-; CHECK-SD-LABEL: fptrunc_v3f64_v3f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fcvt h1, d1
-; CHECK-SD-NEXT:    fcvt h0, d0
-; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-NEXT:    fcvt h1, d2
-; CHECK-SD-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fptrunc_v3f64_v3f16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fcvt h0, d0
-; CHECK-GI-NEXT:    fcvt h1, d1
-; CHECK-GI-NEXT:    fcvt h2, d2
-; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fptrunc_v3f64_v3f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvt h0, d0
+; CHECK-NEXT:    fcvt h1, d1
+; CHECK-NEXT:    fcvt h2, d2
+; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %c = fptrunc <3 x double> %a to <3 x half>
   ret <3 x half> %c
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 71b53c662bb22..9d4d654259a31 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4143,11 +4143,11 @@ entry:
 define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-LABEL: stofp_v3i128_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #64
-; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
@@ -4155,31 +4155,31 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    mov x21, x1
 ; CHECK-SD-NEXT:    mov x22, x0
-; CHECK-SD-NEXT:    mov x0, x2
-; CHECK-SD-NEXT:    mov x1, x3
-; CHECK-SD-NEXT:    mov x19, x5
-; CHECK-SD-NEXT:    mov x20, x4
+; CHECK-SD-NEXT:    mov x0, x4
+; CHECK-SD-NEXT:    mov x1, x5
+; CHECK-SD-NEXT:    mov x19, x3
+; CHECK-SD-NEXT:    mov x20, x2
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    mov x0, x22
 ; CHECK-SD-NEXT:    mov x1, x21
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
-; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
-; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.s[1], v0.s[0]
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    add sp, sp, #80
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i128_v3f32:
@@ -4227,11 +4227,11 @@ entry:
 define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-LABEL: utofp_v3i128_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #64
-; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
@@ -4239,31 +4239,31 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    mov x21, x1
 ; CHECK-SD-NEXT:    mov x22, x0
-; CHECK-SD-NEXT:    mov x0, x2
-; CHECK-SD-NEXT:    mov x1, x3
-; CHECK-SD-NEXT:    mov x19, x5
-; CHECK-SD-NEXT:    mov x20, x4
+; CHECK-SD-NEXT:    mov x0, x4
+; CHECK-SD-NEXT:    mov x1, x5
+; CHECK-SD-NEXT:    mov x19, x3
+; CHECK-SD-NEXT:    mov x20, x2
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    mov x0, x22
 ; CHECK-SD-NEXT:    mov x1, x21
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
-; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
-; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.s[1], v0.s[0]
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    add sp, sp, #80
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i128_v3f32:
@@ -6035,11 +6035,11 @@ entry:
 define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-LABEL: stofp_v3i128_v3f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    sub sp, sp, #64
-; CHECK-SD-NOFP16-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NOFP16-NEXT:    sub sp, sp, #80
+; CHECK-SD-NOFP16-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w21, -24
@@ -6047,40 +6047,41 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NOFP16-NEXT:    mov x21, x1
 ; CHECK-SD-NOFP16-NEXT:    mov x22, x0
-; CHECK-SD-NOFP16-NEXT:    mov x0, x2
-; CHECK-SD-NOFP16-NEXT:    mov x1, x3
-; CHECK-SD-NOFP16-NEXT:    mov x19, x5
-; CHECK-SD-NOFP16-NEXT:    mov x20, x4
+; CHECK-SD-NOFP16-NEXT:    mov x0, x4
+; CHECK-SD-NOFP16-NEXT:    mov x1, x5
+; CHECK-SD-NOFP16-NEXT:    mov x19, x3
+; CHECK-SD-NOFP16-NEXT:    mov x20, x2
 ; CHECK-SD-NOFP16-NEXT:    bl __floattisf
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
 ; CHECK-SD-NOFP16-NEXT:    mov x0, x22
 ; CHECK-SD-NOFP16-NEXT:    mov x1, x21
-; CHECK-SD-NOFP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NOFP16-NEXT:    bl __floattisf
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-SD-NOFP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov x0, x20
 ; CHECK-SD-NOFP16-NEXT:    mov x1, x19
-; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-NOFP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NOFP16-NEXT:    bl __floattisf
-; CHECK-SD-NOFP16-NEXT:    fcvt h1, s0
-; CHECK-SD-NOFP16-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NOFP16-NEXT:    add sp, sp, #64
+; CHECK-SD-NOFP16-NEXT:    add sp, sp, #80
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: stofp_v3i128_v3f16:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    sub sp, sp, #64
-; CHECK-SD-FP16-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-FP16-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-FP16-NEXT:    sub sp, sp, #80
+; CHECK-SD-FP16-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w21, -24
@@ -6088,31 +6089,32 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-FP16-NEXT:    mov x21, x1
 ; CHECK-SD-FP16-NEXT:    mov x22, x0
-; CHECK-SD-FP16-NEXT:    mov x0, x2
-; CHECK-SD-FP16-NEXT:    mov x1, x3
-; CHECK-SD-FP16-NEXT:    mov x19, x5
-; CHECK-SD-FP16-NEXT:    mov x20, x4
+; CHECK-SD-FP16-NEXT:    mov x0, x4
+; CHECK-SD-FP16-NEXT:    mov x1, x5
+; CHECK-SD-FP16-NEXT:    mov x19, x3
+; CHECK-SD-FP16-NEXT:    mov x20, x2
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x22
 ; CHECK-SD-FP16-NEXT:    mov x1, x21
 ; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
-; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
-; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-FP16-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-SD-FP16-NEXT:    fmov d0, d1
-; CHECK-SD-FP16-NEXT:    add sp, sp, #64
+; CHECK-SD-FP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-FP16-NEXT:    add sp, sp, #80
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v3i128_v3f16:
@@ -6200,11 +6202,11 @@ entry:
 define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-LABEL: utofp_v3i128_v3f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    sub sp, sp, #64
-; CHECK-SD-NOFP16-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NOFP16-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NOFP16-NEXT:    sub sp, sp, #80
+; CHECK-SD-NOFP16-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w21, -24
@@ -6212,40 +6214,41 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NOFP16-NEXT:    mov x21, x1
 ; CHECK-SD-NOFP16-NEXT:    mov x22, x0
-; CHECK-SD-NOFP16-NEXT:    mov x0, x2
-; CHECK-SD-NOFP16-NEXT:    mov x1, x3
-; CHECK-SD-NOFP16-NEXT:    mov x19, x5
-; CHECK-SD-NOFP16-NEXT:    mov x20, x4
+; CHECK-SD-NOFP16-NEXT:    mov x0, x4
+; CHECK-SD-NOFP16-NEXT:    mov x1, x5
+; CHECK-SD-NOFP16-NEXT:    mov x19, x3
+; CHECK-SD-NOFP16-NEXT:    mov x20, x2
 ; CHECK-SD-NOFP16-NEXT:    bl __floatuntisf
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
 ; CHECK-SD-NOFP16-NEXT:    mov x0, x22
 ; CHECK-SD-NOFP16-NEXT:    mov x1, x21
-; CHECK-SD-NOFP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NOFP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NOFP16-NEXT:    bl __floatuntisf
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-SD-NOFP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov x0, x20
 ; CHECK-SD-NOFP16-NEXT:    mov x1, x19
-; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-NOFP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NOFP16-NEXT:    bl __floatuntisf
-; CHECK-SD-NOFP16-NEXT:    fcvt h1, s0
-; CHECK-SD-NOFP16-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NOFP16-NEXT:    add sp, sp, #64
+; CHECK-SD-NOFP16-NEXT:    add sp, sp, #80
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: utofp_v3i128_v3f16:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    sub sp, sp, #64
-; CHECK-SD-FP16-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-FP16-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-FP16-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-FP16-NEXT:    sub sp, sp, #80
+; CHECK-SD-FP16-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w21, -24
@@ -6253,31 +6256,32 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-FP16-NEXT:    mov x21, x1
 ; CHECK-SD-FP16-NEXT:    mov x22, x0
-; CHECK-SD-FP16-NEXT:    mov x0, x2
-; CHECK-SD-FP16-NEXT:    mov x1, x3
-; CHECK-SD-FP16-NEXT:    mov x19, x5
-; CHECK-SD-FP16-NEXT:    mov x20, x4
+; CHECK-SD-FP16-NEXT:    mov x0, x4
+; CHECK-SD-FP16-NEXT:    mov x1, x5
+; CHECK-SD-FP16-NEXT:    mov x19, x3
+; CHECK-SD-FP16-NEXT:    mov x20, x2
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x22
 ; CHECK-SD-FP16-NEXT:    mov x1, x21
 ; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-FP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
-; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
-; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-FP16-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-SD-FP16-NEXT:    fmov d0, d1
-; CHECK-SD-FP16-NEXT:    add sp, sp, #64
+; CHECK-SD-FP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-FP16-NEXT:    add sp, sp, #80
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v3i128_v3f16:
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index ce5b80f3e2be4..2f193cefd7c0c 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -1245,12 +1245,10 @@ entry:
 define <2 x i128> @sext_v2i64_v2i128(<2 x i64> %a) {
 ; CHECK-SD-LABEL: sext_v2i64_v2i128:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov x8, v0.d[1]
-; CHECK-SD-NEXT:    dup v1.2d, v0.d[1]
+; CHECK-SD-NEXT:    mov x2, v0.d[1]
 ; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    fmov x2, d1
 ; CHECK-SD-NEXT:    asr x1, x0, #63
-; CHECK-SD-NEXT:    asr x3, x8, #63
+; CHECK-SD-NEXT:    asr x3, x2, #63
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v2i64_v2i128:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
index ffef6f74f2d36..e4eda2e3a1e32 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
@@ -100,22 +100,17 @@ define void @bitcast_v2i16(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: bitcast_v2i16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #32
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
-; NONEON-NOSVE-NEXT:    ldrh w8, [x0, #2]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #4]
+; NONEON-NOSVE-NEXT:    sub sp, sp, #16
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldrh w8, [x0]
-; NONEON-NOSVE-NEXT:    str w8, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w9, [x0, #2]
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #2]
+; NONEON-NOSVE-NEXT:    strh w8, [sp]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp]
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #18]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
 ; NONEON-NOSVE-NEXT:    str w8, [x1]
-; NONEON-NOSVE-NEXT:    add sp, sp, #32
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %load = load volatile <2 x i16>, ptr %a
   %cast = bitcast <2 x i16> %load to <2 x half>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 7d6336a43a4fd..4d524bc848de6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -148,75 +148,38 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap)  {
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    str q0, [sp]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #19]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #17]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #21]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <16 x i8>, ptr %ap
   %val = sext <16 x i8> %a to <16 x i32>
@@ -291,18 +254,12 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
 ; NONEON-NOSVE-LABEL: load_sext_v4i32i256:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldpsw x9, x10, [sp, #24]
-; NONEON-NOSVE-NEXT:    stp x9, x10, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldpsw x9, x10, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp x9, x10, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp x11, x9, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp x12, x13, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldpsw x11, x9, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldpsw x12, x13, [sp, #24]
 ; NONEON-NOSVE-NEXT:    asr x10, x9, #63
 ; NONEON-NOSVE-NEXT:    asr x14, x11, #63
 ; NONEON-NOSVE-NEXT:    stp x10, x10, [x8, #112]
@@ -315,7 +272,7 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
 ; NONEON-NOSVE-NEXT:    stp x13, x9, [x8, #32]
 ; NONEON-NOSVE-NEXT:    stp x10, x10, [x8, #16]
 ; NONEON-NOSVE-NEXT:    stp x12, x10, [x8]
-; NONEON-NOSVE-NEXT:    add sp, sp, #96
+; NONEON-NOSVE-NEXT:    add sp, sp, #32
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <4 x i32>, ptr %ap
   %val = sext <4 x i32> %a to <4 x i256>
@@ -327,52 +284,28 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0]
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    asr x8, x8, #63
-; CHECK-NEXT:    fmov d3, x8
-; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    asr x9, x9, #63
-; CHECK-NEXT:    fmov d4, x9
-; CHECK-NEXT:    zip1 z0.d, z0.d, z3.d
-; CHECK-NEXT:    mov z3.d, x9
-; CHECK-NEXT:    fmov x2, d2
-; CHECK-NEXT:    zip1 z1.d, z1.d, z4.d
-; CHECK-NEXT:    mov z4.d, z2.d[1]
-; CHECK-NEXT:    mov z5.d, z0.d[1]
-; CHECK-NEXT:    mov z6.d, z3.d[1]
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    fmov x6, d3
-; CHECK-NEXT:    mov z2.d, z1.d[1]
-; CHECK-NEXT:    fmov x3, d4
-; CHECK-NEXT:    fmov x1, d5
 ; CHECK-NEXT:    fmov x4, d1
-; CHECK-NEXT:    fmov x7, d6
-; CHECK-NEXT:    fmov x5, d2
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    mov x2, x1
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    asr x5, x4, #63
+; CHECK-NEXT:    mov x6, x5
+; CHECK-NEXT:    mov x7, x5
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v2i64i256:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #144
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 144
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    str q0, [sp]
-; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp]
-; NONEON-NOSVE-NEXT:    asr x9, x8, #63
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #32]
-; NONEON-NOSVE-NEXT:    asr x8, x10, #63
-; NONEON-NOSVE-NEXT:    stp x9, x9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp x10, x8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp x8, x8, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp x0, x1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp x2, x3, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp x4, x5, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp x6, x7, [sp, #112]
-; NONEON-NOSVE-NEXT:    add sp, sp, #144
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT:    ldp x0, x4, [sp], #16
+; NONEON-NOSVE-NEXT:    asr x1, x0, #63
+; NONEON-NOSVE-NEXT:    asr x5, x4, #63
+; NONEON-NOSVE-NEXT:    mov x2, x1
+; NONEON-NOSVE-NEXT:    mov x3, x1
+; NONEON-NOSVE-NEXT:    mov x6, x5
+; NONEON-NOSVE-NEXT:    mov x7, x5
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <2 x i64>, ptr %ap
   %val = sext <2 x i64> %a to <2 x i256>
@@ -410,88 +343,51 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap)  {
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v16i16i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #336
-; NONEON-NOSVE-NEXT:    str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT:    sub sp, sp, #192
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 192
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #316]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #308]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #320] // 8-byte Folded Reload
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #300]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #292]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #284]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #276]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #50]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #268]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #260]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #120]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #104]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #58]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #36]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #184]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #62]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #168]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    str d1, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #248]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #232]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #216]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #208]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #200]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #312]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #192]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #296]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT:    ldp q5, q4, [sp, #288]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #280]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #264]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #256]
-; NONEON-NOSVE-NEXT:    add sp, sp, #336
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #160]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    ldp q5, q4, [sp, #160]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #152]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #144]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #136]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #128]
+; NONEON-NOSVE-NEXT:    add sp, sp, #192
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <16 x i16>, ptr %ap
   %val = zext <16 x i16> %a to <16 x i64>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index c96189b960268..56149e99b15f8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -440,25 +440,20 @@ define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    ldr w8, [x0]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #-48]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT:    str w8, [sp, #-32]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp]
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    str q0, [x1]
-; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    add sp, sp, #32
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <2 x half>, ptr %a
   %res = fpext <2 x half> %op1 to <2 x double>
@@ -480,35 +475,27 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldr d0, [x0]
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvt d1, s0
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #24]
+; NONEON-NOSVE-NEXT:    fcvt d0, s0
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
-; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
-; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <4 x half>, ptr %a
   %res = fpext <4 x half> %op1 to <4 x double>
@@ -537,61 +524,44 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    str q0, [sp]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvt d1, s0
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #56]
+; NONEON-NOSVE-NEXT:    fcvt d0, s0
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvt d1, s0
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #48]
+; NONEON-NOSVE-NEXT:    fcvt d0, s0
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvt d1, s0
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #40]
+; NONEON-NOSVE-NEXT:    fcvt d0, s0
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #92]
-; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #88]
-; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #84]
-; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #80]
-; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #76]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #72]
-; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #68]
-; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <8 x half>, ptr %a
   %res = fpext <8 x half> %op1 to <8 x double>
@@ -637,115 +607,79 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #336
-; NONEON-NOSVE-NEXT:    str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT:    sub sp, sp, #192
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 192
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #320] // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
-; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
-; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #88]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #42]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
+; NONEON-NOSVE-NEXT:    fcvt d1, s0
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #40]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #80]
+; NONEON-NOSVE-NEXT:    fcvt d0, s0
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldr h0, [sp, #46]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #44]
-; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #66]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #64]
-; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #70]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #68]
-; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
-; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
-; NONEON-NOSVE-NEXT:    fcvt s1, h0
-; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
-; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT:    str d1, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #164]
-; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #160]
-; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #156]
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #152]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #44]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #148]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #34]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #144]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #140]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #38]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #136]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #36]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #332]
-; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #192]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #64]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #328]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #188]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #184]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #180]
-; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #288]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #160]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #160]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #172]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d1, s0
-; NONEON-NOSVE-NEXT:    ldr s0, [sp, #168]
+; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
+; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvt d0, s0
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #256]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #128]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #336
+; NONEON-NOSVE-NEXT:    add sp, sp, #192
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <16 x half>, ptr %a
   %res = fpext <16 x half> %op1 to <16 x double>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 40c8ab27c0b02..75911e5ff1569 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -30,50 +30,32 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i1_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #34]
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #9]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #13]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
 ; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #15]
 ; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
 ; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    stp w10, w8, [sp, #72]
+; NONEON-NOSVE-NEXT:    stp w10, w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w14, #0, #1
 ; NONEON-NOSVE-NEXT:    sbfx w10, w15, #0, #1
-; NONEON-NOSVE-NEXT:    stp w8, w12, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, w12, [sp, #32]
 ; NONEON-NOSVE-NEXT:    sbfx w12, w13, #0, #1
 ; NONEON-NOSVE-NEXT:    sbfx w8, w11, #0, #1
-; NONEON-NOSVE-NEXT:    stp w12, w10, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w12, w10, [sp, #24]
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <8 x i1> %a to <8 x i32>
   store <8 x i32> %b, ptr %out
@@ -206,14 +188,14 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v32i8_v32i16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #272
-; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #176] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #192] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #208] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #224] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #240] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #256] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 272
+; NONEON-NOSVE-NEXT:    sub sp, sp, #208
+; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #112] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #128] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #144] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #160] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 208
 ; NONEON-NOSVE-NEXT:    .cfi_offset w19, -8
 ; NONEON-NOSVE-NEXT:    .cfi_offset w20, -16
 ; NONEON-NOSVE-NEXT:    .cfi_offset w21, -24
@@ -230,182 +212,146 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    add w5, w14, w14
+; NONEON-NOSVE-NEXT:    sxtb w6, w17
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #17]
+; NONEON-NOSVE-NEXT:    sxtb w5, w5
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    add w8, w29, w29
-; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w8, w25, w25
+; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #30]
+; NONEON-NOSVE-NEXT:    strh w6, [sp, #78]
+; NONEON-NOSVE-NEXT:    add w6, w30, w30
+; NONEON-NOSVE-NEXT:    sxtb w8, w8
+; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #31]
+; NONEON-NOSVE-NEXT:    sxtb w6, w6
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w8, w23, w23
-; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w8, w21, w21
-; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w8, w19, w19
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT:    add w9, w28, w28
-; NONEON-NOSVE-NEXT:    add w18, w16, w16
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w8, w6, w6
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #57]
+; NONEON-NOSVE-NEXT:    strh w5, [sp, #72]
+; NONEON-NOSVE-NEXT:    add w5, w29, w29
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #29]
+; NONEON-NOSVE-NEXT:    sxtb w9, w9
+; NONEON-NOSVE-NEXT:    sxtb w5, w5
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
+; NONEON-NOSVE-NEXT:    add w8, w27, w27
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #26]
+; NONEON-NOSVE-NEXT:    strh w6, [sp, #70]
+; NONEON-NOSVE-NEXT:    add w6, w28, w28
+; NONEON-NOSVE-NEXT:    sxtb w8, w8
+; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #27]
+; NONEON-NOSVE-NEXT:    sxtb w6, w6
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #66]
 ; NONEON-NOSVE-NEXT:    add w9, w26, w26
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT:    add w8, w4, w4
+; NONEON-NOSVE-NEXT:    strh w5, [sp, #64]
+; NONEON-NOSVE-NEXT:    add w5, w25, w25
+; NONEON-NOSVE-NEXT:    sxtb w9, w9
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    sxtb w8, w5
+; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #25]
+; NONEON-NOSVE-NEXT:    strh w6, [sp, #62]
+; NONEON-NOSVE-NEXT:    add w6, w24, w24
+; NONEON-NOSVE-NEXT:    add w5, w23, w23
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #58]
+; NONEON-NOSVE-NEXT:    sxtb w9, w6
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    sxtb w8, w5
+; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #39]
+; NONEON-NOSVE-NEXT:    add w6, w22, w22
+; NONEON-NOSVE-NEXT:    add w5, w21, w21
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #21]
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #54]
+; NONEON-NOSVE-NEXT:    sxtb w9, w6
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    sxtb w8, w5
+; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #36]
+; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #37]
+; NONEON-NOSVE-NEXT:    add w6, w20, w20
+; NONEON-NOSVE-NEXT:    add w4, w4, w4
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
+; NONEON-NOSVE-NEXT:    add w18, w15, w15
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #50]
+; NONEON-NOSVE-NEXT:    sxtb w9, w6
+; NONEON-NOSVE-NEXT:    sxtb w19, w18
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    sxtb w8, w4
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #34]
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #35]
+; NONEON-NOSVE-NEXT:    add w3, w3, w3
+; NONEON-NOSVE-NEXT:    add w2, w2, w2
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #110]
+; NONEON-NOSVE-NEXT:    sxtb w9, w3
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #108]
+; NONEON-NOSVE-NEXT:    sxtb w8, w2
+; NONEON-NOSVE-NEXT:    sxtb w7, w16
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #33]
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    add w18, w18, w18
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #106]
+; NONEON-NOSVE-NEXT:    sxtb w9, w0
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #104]
+; NONEON-NOSVE-NEXT:    sxtb w8, w18
 ; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT:    add w9, w24, w24
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    add w8, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT:    add w9, w22, w22
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT:    add w8, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT:    add w17, w30, w30
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT:    add w9, w20, w20
-; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT:    strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #61]
 ; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #47]
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #102]
+; NONEON-NOSVE-NEXT:    sxtb w9, w17
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #100]
+; NONEON-NOSVE-NEXT:    sxtb w8, w16
 ; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT:    add w9, w7, w7
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #98]
+; NONEON-NOSVE-NEXT:    sxtb w9, w15
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #96]
+; NONEON-NOSVE-NEXT:    sxtb w8, w14
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT:    add w9, w5, w5
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT:    add w9, w3, w3
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #95]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT:    add w9, w0, w0
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #94]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #93]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #91]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #90]
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #256] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #89]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #240] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #87]
-; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #224] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #86]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #208] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #85]
-; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #192] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #176] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #83]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #82]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #81]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #111]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #110]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #109]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #108]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #107]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #106]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #105]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #104]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #103]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #102]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #101]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #99]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #97]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #144]
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #94]
+; NONEON-NOSVE-NEXT:    sxtb w9, w13
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
+; NONEON-NOSVE-NEXT:    sxtb w8, w12
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #90]
+; NONEON-NOSVE-NEXT:    sxtb w9, w11
+; NONEON-NOSVE-NEXT:    ldr w11, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
+; NONEON-NOSVE-NEXT:    sxtb w8, w10
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #86]
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    sxtb w9, w11
+; NONEON-NOSVE-NEXT:    strh w7, [sp, #76]
+; NONEON-NOSVE-NEXT:    sxtb w8, w10
+; NONEON-NOSVE-NEXT:    strh w19, [sp, #74]
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #82]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #176] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
+; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #160] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #144] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #272
+; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #128] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #112] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #208
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -431,42 +377,24 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i8_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #11]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #9]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #15]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #14]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #13]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <8 x i8> %a to <8 x i32>
   store <8 x i32>%b, ptr %out
@@ -492,75 +420,39 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i8_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #19]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #17]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #21]
 ; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <16 x i8> %a to <16 x i32>
   store <16 x i32> %b, ptr %out
@@ -599,14 +491,14 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v32i8_v32i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #464
-; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #368] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #384] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #400] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #416] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #432] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #448] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 464
+; NONEON-NOSVE-NEXT:    sub sp, sp, #272
+; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #208] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #224] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #240] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #256] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 272
 ; NONEON-NOSVE-NEXT:    .cfi_offset w19, -8
 ; NONEON-NOSVE-NEXT:    .cfi_offset w20, -16
 ; NONEON-NOSVE-NEXT:    .cfi_offset w21, -24
@@ -621,258 +513,136 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -96
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #45]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #17]
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #23]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT:    sxtb w6, w17
+; NONEON-NOSVE-NEXT:    sxtb w7, w16
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    add w5, w14, w14
+; NONEON-NOSVE-NEXT:    add w18, w15, w15
+; NONEON-NOSVE-NEXT:    sxtb w19, w18
+; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #21]
+; NONEON-NOSVE-NEXT:    stp w7, w6, [sp, #104]
+; NONEON-NOSVE-NEXT:    add w6, w30, w30
+; NONEON-NOSVE-NEXT:    sxtb w5, w5
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    add w8, w29, w29
-; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w8, w25, w25
+; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #27]
+; NONEON-NOSVE-NEXT:    sxtb w6, w6
+; NONEON-NOSVE-NEXT:    sxtb w8, w8
+; NONEON-NOSVE-NEXT:    stp w5, w19, [sp, #96]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w8, w23, w23
-; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w8, w21, w21
-; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w8, w19, w19
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT:    add w9, w28, w28
-; NONEON-NOSVE-NEXT:    add w18, w16, w16
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w8, w6, w6
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #57]
+; NONEON-NOSVE-NEXT:    add w5, w29, w29
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #25]
+; NONEON-NOSVE-NEXT:    sxtb w9, w9
+; NONEON-NOSVE-NEXT:    stp w8, w6, [sp, #88]
+; NONEON-NOSVE-NEXT:    add w6, w28, w28
+; NONEON-NOSVE-NEXT:    sxtb w5, w5
+; NONEON-NOSVE-NEXT:    add w8, w27, w27
+; NONEON-NOSVE-NEXT:    sxtb w6, w6
+; NONEON-NOSVE-NEXT:    sxtb w8, w8
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #31]
+; NONEON-NOSVE-NEXT:    stp w5, w9, [sp, #80]
 ; NONEON-NOSVE-NEXT:    add w9, w26, w26
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT:    add w8, w4, w4
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT:    add w9, w24, w24
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    add w8, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
+; NONEON-NOSVE-NEXT:    add w5, w25, w25
+; NONEON-NOSVE-NEXT:    stp w8, w6, [sp, #72]
+; NONEON-NOSVE-NEXT:    sxtb w9, w9
+; NONEON-NOSVE-NEXT:    sxtb w8, w5
+; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #29]
+; NONEON-NOSVE-NEXT:    add w6, w24, w24
+; NONEON-NOSVE-NEXT:    add w5, w23, w23
+; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #34]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT:    sxtb w9, w6
+; NONEON-NOSVE-NEXT:    sxtb w8, w5
+; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #35]
+; NONEON-NOSVE-NEXT:    add w6, w22, w22
+; NONEON-NOSVE-NEXT:    add w5, w21, w21
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT:    sxtb w9, w6
+; NONEON-NOSVE-NEXT:    sxtb w8, w5
+; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #33]
+; NONEON-NOSVE-NEXT:    add w6, w20, w20
+; NONEON-NOSVE-NEXT:    add w4, w4, w4
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT:    sxtb w9, w6
+; NONEON-NOSVE-NEXT:    sxtb w8, w4
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #39]
+; NONEON-NOSVE-NEXT:    add w3, w3, w3
+; NONEON-NOSVE-NEXT:    add w2, w2, w2
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #36]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #168]
+; NONEON-NOSVE-NEXT:    sxtb w9, w3
+; NONEON-NOSVE-NEXT:    sxtb w8, w2
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #37]
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    add w18, w18, w18
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #160]
+; NONEON-NOSVE-NEXT:    sxtb w9, w0
+; NONEON-NOSVE-NEXT:    sxtb w8, w18
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #43]
 ; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT:    add w9, w22, w22
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT:    add w8, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT:    add w17, w30, w30
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT:    add w9, w20, w20
-; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT:    strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT:    add w9, w7, w7
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT:    add w9, w5, w5
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT:    add w9, w3, w3
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #91]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT:    add w9, w0, w0
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #90]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #89]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #95]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #94]
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #448] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #93]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #432] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #83]
-; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #416] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #82]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #81]
-; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #400] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #384] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #87]
-; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #368] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #86]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #85]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #107]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #106]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #105]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #182]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #111]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #110]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #109]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #108]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #99]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #97]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #103]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #102]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #101]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #198]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #284]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #196]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #280]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #194]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #276]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #192]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #206]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #300]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #204]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #296]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #202]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #292]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #200]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #180]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #272]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #178]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #190]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #268]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #188]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #264]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #186]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #260]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #184]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #230]
-; NONEON-NOSVE-NEXT:    ldp q3, q4, [sp, #240]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #348]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #228]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #344]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #226]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #340]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #224]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #238]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #364]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #236]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #234]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #356]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #232]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #352]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #214]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #336]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #316]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #212]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #312]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #210]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #308]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #222]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #220]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #218]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #324]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #216]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #304]
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
+; NONEON-NOSVE-NEXT:    sxtb w9, w17
+; NONEON-NOSVE-NEXT:    sxtb w8, w16
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #41]
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
+; NONEON-NOSVE-NEXT:    sxtb w9, w15
+; NONEON-NOSVE-NEXT:    sxtb w8, w14
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #47]
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
+; NONEON-NOSVE-NEXT:    sxtb w9, w13
+; NONEON-NOSVE-NEXT:    sxtb w8, w12
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
+; NONEON-NOSVE-NEXT:    sxtb w9, w11
+; NONEON-NOSVE-NEXT:    ldr w11, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    sxtb w8, w10
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #80]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    sxtb w11, w11
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
+; NONEON-NOSVE-NEXT:    sxtb w8, w10
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #144]
+; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #256] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #112]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #240] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
+; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #224] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
+; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #208] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #464
+; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #272
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -944,57 +714,26 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i8_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #176
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 176
+; NONEON-NOSVE-NEXT:    sub sp, sp, #80
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    add x8, sp, #144
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrsh w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrsh w10, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrsh w10, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrsh w10, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #36]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldpsw x9, x10, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp x9, x10, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldpsw x9, x10, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp x9, x10, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldpsw x9, x10, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp x9, x10, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldpsw x9, x10, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp x9, x10, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp q2, q3, [x8]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #9]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #11]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #10]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #13]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #12]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #15]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #176
+; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT:    add sp, sp, #80
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <8 x i8> %a to <8 x i64>
   store <8 x i64>%b, ptr %out
@@ -1034,109 +773,43 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i8_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #368
-; NONEON-NOSVE-NEXT:    str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT:    str q0, [sp]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #352] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-160]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #35]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #34]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #33]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #39]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #38]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #37]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #102]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldp d2, d0, [sp, #104]
-; NONEON-NOSVE-NEXT:    str d2, [sp, #168]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #176]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #216]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldrsw x9, [sp, #364]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #360]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #200]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #320]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #208]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #184]
-; NONEON-NOSVE-NEXT:    ldp q3, q4, [sp, #288]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #192]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #256]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #224]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #25]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #24]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #144]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #27]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #26]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #29]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #128]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #31]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #30]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #21]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrsb x9, [sp, #23]
+; NONEON-NOSVE-NEXT:    ldrsb x8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x0, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x0, #64]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x0, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #368
+; NONEON-NOSVE-NEXT:    add sp, sp, #160
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <16 x i8> %a to <16 x i64>
   store <16 x i64> %b, ptr %out
@@ -1208,14 +881,14 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sub sp, sp, #752
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 848
+; NONEON-NOSVE-NEXT:    sub sp, sp, #400
+; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #304] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #320] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #336] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #352] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #368] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #384] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 400
 ; NONEON-NOSVE-NEXT:    .cfi_offset w19, -8
 ; NONEON-NOSVE-NEXT:    .cfi_offset w20, -16
 ; NONEON-NOSVE-NEXT:    .cfi_offset w21, -24
@@ -1230,345 +903,144 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -96
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #19]
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT:    sxtb x19, w17
+; NONEON-NOSVE-NEXT:    sxtb x20, w16
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT:    add w7, w14, w14
+; NONEON-NOSVE-NEXT:    add w18, w15, w15
+; NONEON-NOSVE-NEXT:    sxtb x21, w18
+; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #23]
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #160]
+; NONEON-NOSVE-NEXT:    add w19, w30, w30
+; NONEON-NOSVE-NEXT:    sxtb x7, w7
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    add w8, w29, w29
-; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w8, w25, w25
+; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #25]
+; NONEON-NOSVE-NEXT:    sxtb x19, w19
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    stp x7, x21, [sp, #144]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w8, w23, w23
-; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w8, w21, w21
+; NONEON-NOSVE-NEXT:    add w7, w29, w29
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #27]
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    stp x8, x19, [sp, #128]
+; NONEON-NOSVE-NEXT:    add w19, w28, w28
+; NONEON-NOSVE-NEXT:    sxtb x7, w7
+; NONEON-NOSVE-NEXT:    add w8, w27, w27
+; NONEON-NOSVE-NEXT:    sxtb x19, w19
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w8, w19, w19
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT:    add w9, w28, w28
-; NONEON-NOSVE-NEXT:    add w18, w16, w16
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w8, w6, w6
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #57]
+; NONEON-NOSVE-NEXT:    stp x7, x9, [sp, #112]
 ; NONEON-NOSVE-NEXT:    add w9, w26, w26
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT:    add w8, w4, w4
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT:    add w9, w24, w24
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    add w8, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT:    add w9, w22, w22
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT:    add w8, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT:    add w17, w30, w30
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT:    add w9, w20, w20
-; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT:    strb w18, [sp, #62]
+; NONEON-NOSVE-NEXT:    add w7, w25, w25
+; NONEON-NOSVE-NEXT:    stp x8, x19, [sp, #96]
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w7
+; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #31]
+; NONEON-NOSVE-NEXT:    add w19, w24, w24
+; NONEON-NOSVE-NEXT:    add w7, w23, w23
+; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #80]
+; NONEON-NOSVE-NEXT:    sxtb x9, w19
+; NONEON-NOSVE-NEXT:    sxtb x8, w7
+; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #33]
+; NONEON-NOSVE-NEXT:    add w19, w22, w22
+; NONEON-NOSVE-NEXT:    add w6, w6, w6
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT:    sxtb x9, w19
+; NONEON-NOSVE-NEXT:    sxtb x8, w6
+; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT:    add w9, w7, w7
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT:    add w9, w5, w5
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT:    add w9, w3, w3
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #91]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT:    add w9, w0, w0
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #90]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #89]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #95]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #94]
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #93]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #83]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #82]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #81]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #87]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #86]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #85]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #107]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #106]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #105]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #178]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #111]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #110]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #109]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #108]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #99]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #97]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #103]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #102]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #101]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #194]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #276]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #192]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #198]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #284]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #196]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #280]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #202]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #272]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #292]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #200]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #206]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #400]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #300]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #204]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #296]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #176]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #288]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #182]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #180]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #186]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #416]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #240]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #260]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #184]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #190]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #368]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #268]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #188]
-; NONEON-NOSVE-NEXT:    ldrsw x9, [sp, #372]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #264]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #226]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #256]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #340]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #224]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #230]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #384]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #348]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #228]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #344]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #234]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #336]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #356]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #232]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #352]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #238]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #464]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #364]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #236]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #210]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #352]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #308]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #214]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #480]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #316]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #212]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #312]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #218]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #304]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #324]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #216]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #222]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #432]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #220]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #404]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #320]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #568]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #400]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #560]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #412]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #448]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #584]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #408]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #576]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #420]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #560]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #600]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #416]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #592]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #428]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #616]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #424]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #608]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #368]
-; NONEON-NOSVE-NEXT:    ldp q2, q3, [sp, #592]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #496]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #380]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #520]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #376]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #512]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #388]
-; NONEON-NOSVE-NEXT:    ldp q4, q5, [sp, #496]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #536]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #384]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #528]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #396]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #552]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #392]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #544]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #468]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #528]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #696]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #464]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #688]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #476]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #712]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #472]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #704]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #484]
-; NONEON-NOSVE-NEXT:    ldp q16, q17, [sp, #688]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #728]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #480]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #720]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #492]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #744]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #488]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #736]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #436]
-; NONEON-NOSVE-NEXT:    ldp q19, q20, [sp, #720]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #632]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #432]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #624]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #444]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #648]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #440]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #640]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #452]
-; NONEON-NOSVE-NEXT:    ldp q22, q23, [sp, #624]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #664]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #448]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #656]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #460]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #680]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #456]
-; NONEON-NOSVE-NEXT:    str x8, [sp, #672]
-; NONEON-NOSVE-NEXT:    ldp q21, q18, [sp, #656]
+; NONEON-NOSVE-NEXT:    add w5, w5, w5
+; NONEON-NOSVE-NEXT:    add w4, w4, w4
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT:    sxtb x9, w5
+; NONEON-NOSVE-NEXT:    sxtb x8, w4
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #36]
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #37]
+; NONEON-NOSVE-NEXT:    add w3, w3, w3
+; NONEON-NOSVE-NEXT:    add w2, w2, w2
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #288]
+; NONEON-NOSVE-NEXT:    sxtb x9, w3
+; NONEON-NOSVE-NEXT:    sxtb x8, w2
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #39]
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    add w18, w18, w18
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #272]
+; NONEON-NOSVE-NEXT:    sxtb x9, w0
+; NONEON-NOSVE-NEXT:    sxtb x8, w18
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #41]
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #256]
+; NONEON-NOSVE-NEXT:    sxtb x9, w17
+; NONEON-NOSVE-NEXT:    sxtb x8, w16
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #43]
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #44]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #240]
+; NONEON-NOSVE-NEXT:    sxtb x9, w15
+; NONEON-NOSVE-NEXT:    sxtb x8, w14
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #45]
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #224]
+; NONEON-NOSVE-NEXT:    sxtb x9, w13
+; NONEON-NOSVE-NEXT:    sxtb x8, w12
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #208]
+; NONEON-NOSVE-NEXT:    sxtb x9, w11
+; NONEON-NOSVE-NEXT:    ldr w11, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    sxtb x8, w10
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    sxtb x11, w11
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #192]
+; NONEON-NOSVE-NEXT:    sxtb x8, w10
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp q5, q4, [sp, #80]
+; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp q17, q16, [sp, #272]
+; NONEON-NOSVE-NEXT:    ldp q18, q21, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldp q20, q19, [sp, #240]
+; NONEON-NOSVE-NEXT:    ldp q23, q22, [sp, #208]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #384] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1, #32]
+; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #368] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q4, q5, [x1, #64]
+; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #352] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #96]
+; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #336] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q16, q17, [x1, #128]
+; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #320] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q19, q20, [x1, #160]
+; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q22, q23, [x1, #192]
 ; NONEON-NOSVE-NEXT:    stp q21, q18, [x1, #224]
-; NONEON-NOSVE-NEXT:    add sp, sp, #752
-; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #400
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -1636,91 +1108,70 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i16_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
+; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #4]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT:    ldrh w5, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #6]
+; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrh w3, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #12]
+; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #14]
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
-; NONEON-NOSVE-NEXT:    add w14, w14, w14
-; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    add w14, w3, w3
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w13, w5, w5
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    sxth w16, w16
+; NONEON-NOSVE-NEXT:    sxth w13, w13
+; NONEON-NOSVE-NEXT:    add w3, w3, w3
+; NONEON-NOSVE-NEXT:    sxth w12, w12
+; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #10]
+; NONEON-NOSVE-NEXT:    stp w13, w16, [sp, #56]
+; NONEON-NOSVE-NEXT:    sxth w13, w3
+; NONEON-NOSVE-NEXT:    add w16, w5, w5
+; NONEON-NOSVE-NEXT:    add w3, w4, w4
+; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp w13, w12, [sp, #48]
+; NONEON-NOSVE-NEXT:    sxth w12, w16
+; NONEON-NOSVE-NEXT:    sxth w13, w3
+; NONEON-NOSVE-NEXT:    add w16, w2, w2
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT:    add w14, w4, w4
+; NONEON-NOSVE-NEXT:    stp w13, w12, [sp, #40]
+; NONEON-NOSVE-NEXT:    sxth w12, w16
+; NONEON-NOSVE-NEXT:    sxth w13, w0
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT:    add w13, w2, w2
-; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #38]
-; NONEON-NOSVE-NEXT:    add w14, w0, w0
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #36]
-; NONEON-NOSVE-NEXT:    add w13, w18, w18
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w14, w17, w17
-; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w13, w16, w16
+; NONEON-NOSVE-NEXT:    add w16, w18, w18
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    stp w13, w12, [sp, #32]
+; NONEON-NOSVE-NEXT:    sxth w12, w16
+; NONEON-NOSVE-NEXT:    sxth w13, w17
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #62]
-; NONEON-NOSVE-NEXT:    add w14, w15, w15
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #60]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #58]
-; NONEON-NOSVE-NEXT:    strh w12, [sp, #56]
-; NONEON-NOSVE-NEXT:    strh w11, [sp, #54]
-; NONEON-NOSVE-NEXT:    strh w10, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    stp w13, w12, [sp, #88]
+; NONEON-NOSVE-NEXT:    sxth w12, w15
+; NONEON-NOSVE-NEXT:    sxth w13, w14
+; NONEON-NOSVE-NEXT:    sxth w11, w11
+; NONEON-NOSVE-NEXT:    sxth w10, w10
+; NONEON-NOSVE-NEXT:    sxth w9, w9
+; NONEON-NOSVE-NEXT:    sxth w8, w8
+; NONEON-NOSVE-NEXT:    stp w13, w12, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #72]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <16 x i16>, ptr %in
   %b = add <16 x i16> %a, %a
@@ -1746,24 +1197,18 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v4i16_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrsh x9, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldrsh x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrsh x9, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrsh x8, [sp, #12]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <4 x i16> %a to <4 x i64>
   store <4 x i64>%b, ptr %out
@@ -1789,39 +1234,27 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i16_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldrsh x9, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrsh x8, [sp, #24]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrsh x9, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrsh x8, [sp, #28]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrsh x9, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrsh x8, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrsh x9, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrsh x8, [sp, #20]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %b = sext <8 x i16> %a to <8 x i64>
   store <8 x i64>%b, ptr %out
@@ -1860,124 +1293,75 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i16_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #368
-; NONEON-NOSVE-NEXT:    str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT:    sub sp, sp, #160
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #352] // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #4]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT:    ldrh w5, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrh w13, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #6]
+; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #4]
+; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #10]
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
-; NONEON-NOSVE-NEXT:    add w14, w14, w14
-; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w14, w3, w3
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w13, w5, w5
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w14, w4, w4
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w13, w2, w2
-; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    add w14, w0, w0
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w13, w18, w18
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w14, w17, w17
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
 ; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w13, w16, w16
+; NONEON-NOSVE-NEXT:    sxth x16, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    add w3, w3, w3
+; NONEON-NOSVE-NEXT:    sxth x12, w12
+; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #12]
+; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp x13, x16, [sp, #80]
+; NONEON-NOSVE-NEXT:    sxth x13, w3
+; NONEON-NOSVE-NEXT:    add w16, w5, w5
+; NONEON-NOSVE-NEXT:    add w3, w4, w4
+; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #64]
+; NONEON-NOSVE-NEXT:    sxth x12, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w3
+; NONEON-NOSVE-NEXT:    add w16, w2, w2
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #48]
+; NONEON-NOSVE-NEXT:    sxth x12, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w0
+; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #22]
+; NONEON-NOSVE-NEXT:    add w16, w18, w18
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #32]
+; NONEON-NOSVE-NEXT:    sxth x12, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w17
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #72]
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w14, w15, w15
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #66]
-; NONEON-NOSVE-NEXT:    strh w12, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w11, [sp, #62]
-; NONEON-NOSVE-NEXT:    strh w10, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #98]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #102]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #184]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT:    str d2, [sp, #200]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #184]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #192]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #256]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #216]
-; NONEON-NOSVE-NEXT:    ldp q3, q4, [sp, #224]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldrsw x9, [sp, #364]
-; NONEON-NOSVE-NEXT:    ldrsw x8, [sp, #360]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #200]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #320]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #208]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #288]
+; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #144]
+; NONEON-NOSVE-NEXT:    sxth x12, w15
+; NONEON-NOSVE-NEXT:    sxth x13, w14
+; NONEON-NOSVE-NEXT:    sxth x11, w11
+; NONEON-NOSVE-NEXT:    sxth x10, w10
+; NONEON-NOSVE-NEXT:    sxth x9, w9
+; NONEON-NOSVE-NEXT:    sxth x8, w8
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp x10, x11, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #368
+; NONEON-NOSVE-NEXT:    add sp, sp, #160
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <16 x i16>, ptr %in
   %b = add <16 x i16> %a, %a
@@ -2037,43 +1421,38 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i32_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp]
-; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
+; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp]
 ; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #16]
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
 ; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #24]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldpsw x8, x9, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    sxtw x8, w8
+; NONEON-NOSVE-NEXT:    sxtw x9, w9
+; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
+; NONEON-NOSVE-NEXT:    add w8, w15, w15
+; NONEON-NOSVE-NEXT:    add w9, w14, w14
+; NONEON-NOSVE-NEXT:    sxtw x8, w8
+; NONEON-NOSVE-NEXT:    sxtw x9, w9
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    sxtw x13, w13
+; NONEON-NOSVE-NEXT:    sxtw x12, w12
+; NONEON-NOSVE-NEXT:    sxtw x11, w11
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #80]
+; NONEON-NOSVE-NEXT:    sxtw x8, w10
+; NONEON-NOSVE-NEXT:    stp x12, x13, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   %b = add <8 x i32> %a, %a
@@ -2162,14 +1541,14 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v32i8_v32i16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #272
-; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #176] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #192] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #208] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #224] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #240] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #256] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 272
+; NONEON-NOSVE-NEXT:    sub sp, sp, #208
+; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #112] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #128] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #144] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #160] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 208
 ; NONEON-NOSVE-NEXT:    .cfi_offset w19, -8
 ; NONEON-NOSVE-NEXT:    .cfi_offset w20, -16
 ; NONEON-NOSVE-NEXT:    .cfi_offset w21, -24
@@ -2186,182 +1565,146 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #21]
+; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #31]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #18]
+; NONEON-NOSVE-NEXT:    add w5, w17, w17
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #29]
+; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #30]
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    add w8, w29, w29
-; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w8, w25, w25
+; NONEON-NOSVE-NEXT:    and w5, w5, #0xff
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w8, w23, w23
-; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w8, w21, w21
-; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w8, w19, w19
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT:    add w9, w28, w28
-; NONEON-NOSVE-NEXT:    add w18, w16, w16
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w8, w6, w6
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    strh w5, [sp, #74]
+; NONEON-NOSVE-NEXT:    add w5, w29, w29
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
+; NONEON-NOSVE-NEXT:    add w8, w28, w28
+; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #28]
+; NONEON-NOSVE-NEXT:    and w5, w5, #0xff
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #68]
+; NONEON-NOSVE-NEXT:    add w9, w27, w27
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #27]
+; NONEON-NOSVE-NEXT:    strh w5, [sp, #66]
+; NONEON-NOSVE-NEXT:    add w5, w26, w26
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
+; NONEON-NOSVE-NEXT:    add w8, w25, w25
+; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #26]
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #62]
+; NONEON-NOSVE-NEXT:    and w9, w5, #0xff
+; NONEON-NOSVE-NEXT:    add w5, w24, w24
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #25]
+; NONEON-NOSVE-NEXT:    strh w9, [sp, #60]
+; NONEON-NOSVE-NEXT:    add w9, w23, w23
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    and w8, w5, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #24]
+; NONEON-NOSVE-NEXT:    add w5, w22, w22
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #39]
+; NONEON-NOSVE-NEXT:    add w9, w21, w21
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    and w8, w5, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #23]
+; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #38]
+; NONEON-NOSVE-NEXT:    add w5, w20, w20
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #37]
+; NONEON-NOSVE-NEXT:    add w9, w19, w19
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    and w8, w5, #0xff
+; NONEON-NOSVE-NEXT:    add w0, w16, w16
+; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #36]
+; NONEON-NOSVE-NEXT:    add w4, w4, w4
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    add w18, w15, w15
+; NONEON-NOSVE-NEXT:    and w6, w0, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #35]
+; NONEON-NOSVE-NEXT:    add w9, w3, w3
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #110]
+; NONEON-NOSVE-NEXT:    and w8, w4, #0xff
+; NONEON-NOSVE-NEXT:    and w7, w18, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #34]
+; NONEON-NOSVE-NEXT:    add w2, w2, w2
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #108]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #33]
+; NONEON-NOSVE-NEXT:    add w9, w0, w0
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #106]
+; NONEON-NOSVE-NEXT:    and w8, w2, #0xff
 ; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #57]
-; NONEON-NOSVE-NEXT:    add w9, w26, w26
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT:    add w8, w4, w4
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT:    add w9, w24, w24
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    add w8, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT:    add w9, w22, w22
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT:    add w8, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT:    add w17, w30, w30
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #51]
-; NONEON-NOSVE-NEXT:    add w9, w20, w20
-; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT:    strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #61]
+; NONEON-NOSVE-NEXT:    add w18, w18, w18
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #104]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
 ; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #47]
+; NONEON-NOSVE-NEXT:    add w9, w17, w17
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #102]
+; NONEON-NOSVE-NEXT:    and w8, w18, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #100]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
 ; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT:    add w9, w7, w7
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT:    add w9, w5, w5
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT:    add w9, w3, w3
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #95]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT:    add w9, w0, w0
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #94]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
 ; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #93]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #98]
+; NONEON-NOSVE-NEXT:    and w8, w16, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #96]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
 ; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
+; NONEON-NOSVE-NEXT:    and w8, w14, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
 ; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #91]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
+; NONEON-NOSVE-NEXT:    and w8, w12, #0xff
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #90]
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #20]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
+; NONEON-NOSVE-NEXT:    and w8, w10, #0xff
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    strh w6, [sp, #78]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #256] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #89]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #240] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #87]
-; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #224] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #86]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #208] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #85]
-; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #192] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #176] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #83]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #82]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #81]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #111]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #110]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #109]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #108]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #107]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #106]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #105]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #104]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #103]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #102]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #101]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #99]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #97]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #144]
+; NONEON-NOSVE-NEXT:    add w6, w30, w30
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    and w6, w6, #0xff
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
+; NONEON-NOSVE-NEXT:    and w8, w10, #0xff
+; NONEON-NOSVE-NEXT:    strh w7, [sp, #76]
+; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    strh w6, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #176] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #160] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #144] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
+; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #128] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #272
+; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #112] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #208
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -2387,42 +1730,24 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i8_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <8 x i8> %a to <8 x i32>
   store <8 x i32>%b, ptr %out
@@ -2448,75 +1773,39 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i8_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #17]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #21]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <16 x i8> %a to <16 x i32>
   store <16 x i32> %b, ptr %out
@@ -2555,14 +1844,14 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v32i8_v32i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #464
-; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #368] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #384] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #400] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #416] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #432] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #448] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 464
+; NONEON-NOSVE-NEXT:    sub sp, sp, #272
+; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #192] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #208] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #224] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #240] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #256] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 272
 ; NONEON-NOSVE-NEXT:    .cfi_offset w19, -8
 ; NONEON-NOSVE-NEXT:    .cfi_offset w20, -16
 ; NONEON-NOSVE-NEXT:    .cfi_offset w21, -24
@@ -2577,258 +1866,136 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -96
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #45]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT:    add w18, w15, w15
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #22]
+; NONEON-NOSVE-NEXT:    add w0, w16, w16
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    and w19, w18, #0xff
+; NONEON-NOSVE-NEXT:    and w7, w0, #0xff
+; NONEON-NOSVE-NEXT:    add w6, w17, w17
+; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #21]
+; NONEON-NOSVE-NEXT:    stp w19, w7, [sp, #104]
+; NONEON-NOSVE-NEXT:    add w7, w30, w30
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    add w8, w29, w29
-; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w8, w25, w25
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w8, w23, w23
-; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w8, w21, w21
-; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w8, w19, w19
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #59]
-; NONEON-NOSVE-NEXT:    add w9, w28, w28
-; NONEON-NOSVE-NEXT:    add w18, w16, w16
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w8, w6, w6
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #57]
-; NONEON-NOSVE-NEXT:    add w9, w26, w26
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT:    add w8, w4, w4
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #55]
+; NONEON-NOSVE-NEXT:    and w6, w6, #0xff
+; NONEON-NOSVE-NEXT:    and w7, w7, #0xff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #27]
+; NONEON-NOSVE-NEXT:    stp w7, w6, [sp, #96]
+; NONEON-NOSVE-NEXT:    add w6, w29, w29
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #88]
+; NONEON-NOSVE-NEXT:    add w8, w28, w28
+; NONEON-NOSVE-NEXT:    and w6, w6, #0xff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #25]
+; NONEON-NOSVE-NEXT:    add w9, w27, w27
+; NONEON-NOSVE-NEXT:    stp w8, w6, [sp, #80]
+; NONEON-NOSVE-NEXT:    add w6, w26, w26
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #30]
+; NONEON-NOSVE-NEXT:    and w6, w6, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #31]
+; NONEON-NOSVE-NEXT:    add w8, w25, w25
+; NONEON-NOSVE-NEXT:    stp w6, w9, [sp, #72]
 ; NONEON-NOSVE-NEXT:    add w9, w24, w24
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    add w8, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT:    add w9, w22, w22
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT:    add w8, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT:    add w17, w30, w30
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #51]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #29]
+; NONEON-NOSVE-NEXT:    add w6, w23, w23
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #64]
+; NONEON-NOSVE-NEXT:    add w8, w22, w22
+; NONEON-NOSVE-NEXT:    and w9, w6, #0xff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #34]
+; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #35]
+; NONEON-NOSVE-NEXT:    add w6, w21, w21
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
 ; NONEON-NOSVE-NEXT:    add w9, w20, w20
-; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
+; NONEON-NOSVE-NEXT:    and w8, w6, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #32]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #33]
+; NONEON-NOSVE-NEXT:    add w5, w5, w5
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    add w8, w4, w4
+; NONEON-NOSVE-NEXT:    and w9, w5, #0xff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #39]
+; NONEON-NOSVE-NEXT:    add w3, w3, w3
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #168]
+; NONEON-NOSVE-NEXT:    add w9, w2, w2
+; NONEON-NOSVE-NEXT:    and w8, w3, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #36]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #37]
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #160]
+; NONEON-NOSVE-NEXT:    add w8, w18, w18
+; NONEON-NOSVE-NEXT:    and w9, w0, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #42]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #43]
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
+; NONEON-NOSVE-NEXT:    add w9, w16, w16
+; NONEON-NOSVE-NEXT:    and w8, w17, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #41]
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #144]
 ; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT:    strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT:    add w9, w7, w7
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
+; NONEON-NOSVE-NEXT:    and w9, w15, #0xff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #46]
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #47]
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
+; NONEON-NOSVE-NEXT:    add w9, w12, w12
+; NONEON-NOSVE-NEXT:    and w8, w13, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #128]
 ; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
-; NONEON-NOSVE-NEXT:    add w9, w5, w5
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT:    add w9, w3, w3
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #91]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
-; NONEON-NOSVE-NEXT:    add w9, w0, w0
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #90]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #89]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
-; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #95]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #94]
+; NONEON-NOSVE-NEXT:    and w9, w11, #0xff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
+; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #80]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #448] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #93]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #432] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #83]
-; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #416] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #82]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #81]
-; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #400] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #384] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #87]
-; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #368] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #86]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #85]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #107]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #106]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #105]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #182]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #111]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #110]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #109]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #108]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #99]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #97]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #103]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #102]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #101]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #198]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #284]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #196]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #280]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #194]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #276]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #192]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #206]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #300]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #204]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #296]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #202]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #292]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #200]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #180]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #272]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #178]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #190]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #268]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #188]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #264]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #186]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #260]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #184]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #230]
-; NONEON-NOSVE-NEXT:    ldp q3, q4, [sp, #240]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #348]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #228]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #344]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #226]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #340]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #224]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #238]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #364]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #236]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #234]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #356]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #232]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #352]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #214]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #336]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #316]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #212]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #312]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #210]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #308]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #222]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #220]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #218]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #324]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #216]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #304]
+; NONEON-NOSVE-NEXT:    and w8, w10, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #144]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #112]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #256] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
+; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #240] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
+; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #224] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #464
+; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #208] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #192] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #272
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -2858,26 +2025,20 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v4i8_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <4 x i8> %a to <4 x i64>
   store <4 x i64>%b, ptr %out
@@ -2904,61 +2065,30 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i8_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #176
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 176
+; NONEON-NOSVE-NEXT:    sub sp, sp, #80
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    add x8, sp, #144
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp w9, w10, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #168]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp q2, q3, [x8]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #176
+; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT:    add sp, sp, #80
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <8 x i8> %a to <8 x i64>
   store <8 x i64>%b, ptr %out
@@ -2998,129 +2128,51 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i8_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #368
-; NONEON-NOSVE-NEXT:    str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT:    str q0, [sp]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #352] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-160]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #332]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #324]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #348]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #35]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #340]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #300]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #34]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #292]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #33]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #316]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #308]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #39]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #268]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #38]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #260]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #37]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #284]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #276]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #152]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #98]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #144]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #136]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #128]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #128]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #120]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #102]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #216]
-; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #192]
-; NONEON-NOSVE-NEXT:    ldp d2, d0, [sp, #104]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #364]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #328]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #344]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #360]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #176]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #200]
-; NONEON-NOSVE-NEXT:    str d2, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #320]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #296]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #312]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT:    ldp q3, q4, [sp, #288]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #264]
-; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #252]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #192]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #280]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #256]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #232]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #244]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #224]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #104]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x0, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x0, #64]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x0, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #368
+; NONEON-NOSVE-NEXT:    add sp, sp, #160
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <16 x i8> %a to <16 x i64>
   store <16 x i64> %b, ptr %out
@@ -3192,14 +2244,14 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sub sp, sp, #752
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 848
+; NONEON-NOSVE-NEXT:    sub sp, sp, #400
+; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #304] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #320] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #336] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #352] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #368] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #384] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 400
 ; NONEON-NOSVE-NEXT:    .cfi_offset w19, -8
 ; NONEON-NOSVE-NEXT:    .cfi_offset w20, -16
 ; NONEON-NOSVE-NEXT:    .cfi_offset w21, -24
@@ -3213,379 +2265,168 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_offset w30, -88
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -96
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #572]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #564]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #588]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #172]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #292]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #300]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
-; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
+; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #23]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #25]
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #26]
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #22]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    add w8, w29, w29
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    ldrb w21, [sp, #27]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w19, [sp, #29]
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #140]
+; NONEON-NOSVE-NEXT:    add w8, w30, w30
+; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #28]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #148]
+; NONEON-NOSVE-NEXT:    add w9, w29, w29
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #132]
 ; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    add w18, w16, w16
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w8, w25, w25
-; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #36]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w8, w23, w23
-; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #31]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w8, w21, w21
-; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w8, w19, w19
-; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #59]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #124]
 ; NONEON-NOSVE-NEXT:    add w9, w28, w28
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #23]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w8, w6, w6
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #27]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #57]
-; NONEON-NOSVE-NEXT:    add w9, w26, w26
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
-; NONEON-NOSVE-NEXT:    add w8, w4, w4
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrb w20, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #55]
-; NONEON-NOSVE-NEXT:    add w9, w24, w24
-; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    add w8, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #53]
-; NONEON-NOSVE-NEXT:    add w9, w22, w22
-; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #39]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
-; NONEON-NOSVE-NEXT:    add w8, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #37]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #63]
-; NONEON-NOSVE-NEXT:    add w17, w30, w30
-; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #51]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #108]
+; NONEON-NOSVE-NEXT:    add w8, w26, w26
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #116]
+; NONEON-NOSVE-NEXT:    add w9, w25, w25
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #100]
+; NONEON-NOSVE-NEXT:    add w8, w24, w24
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #92]
+; NONEON-NOSVE-NEXT:    add w9, w21, w21
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w7, [sp, #30]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #76]
+; NONEON-NOSVE-NEXT:    add w8, w19, w19
+; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #31]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #84]
 ; NONEON-NOSVE-NEXT:    add w9, w20, w20
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #33]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #47]
-; NONEON-NOSVE-NEXT:    strb w18, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #45]
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
-; NONEON-NOSVE-NEXT:    strb w17, [sp, #61]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #49]
-; NONEON-NOSVE-NEXT:    add w9, w7, w7
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #33]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #68]
+; NONEON-NOSVE-NEXT:    add w8, w7, w7
+; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #60]
+; NONEON-NOSVE-NEXT:    add w9, w6, w6
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #34]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    str w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    add w8, w4, w4
+; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #35]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #52]
 ; NONEON-NOSVE-NEXT:    add w9, w5, w5
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
-; NONEON-NOSVE-NEXT:    add w9, w3, w3
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #91]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    add w0, w16, w16
+; NONEON-NOSVE-NEXT:    add w22, w18, w18
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #37]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    str w8, [sp, #296]
+; NONEON-NOSVE-NEXT:    add w8, w3, w3
+; NONEON-NOSVE-NEXT:    and w23, w0, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #36]
+; NONEON-NOSVE-NEXT:    str w9, [sp, #288]
+; NONEON-NOSVE-NEXT:    add w9, w2, w2
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #38]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
+; NONEON-NOSVE-NEXT:    add w8, w18, w18
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #39]
+; NONEON-NOSVE-NEXT:    str w9, [sp, #280]
 ; NONEON-NOSVE-NEXT:    add w9, w0, w0
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #134]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #90]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #89]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #41]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    str w8, [sp, #264]
+; NONEON-NOSVE-NEXT:    add w8, w17, w17
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #40]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #252]
+; NONEON-NOSVE-NEXT:    add w9, w16, w16
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #42]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #236]
+; NONEON-NOSVE-NEXT:    add w8, w15, w15
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #43]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #244]
+; NONEON-NOSVE-NEXT:    add w9, w14, w14
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #45]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #228]
+; NONEON-NOSVE-NEXT:    add w8, w12, w12
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #44]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #220]
 ; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #130]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #95]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #142]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #94]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #204]
+; NONEON-NOSVE-NEXT:    add w8, w11, w11
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #212]
+; NONEON-NOSVE-NEXT:    add w9, w10, w10
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xff
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #196]
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #188]
+; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT:    and w22, w22, #0xff
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    stp wzr, w22, [sp, #164]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #580]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #93]
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #138]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #604]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #83]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #596]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #118]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #82]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #128]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #116]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #81]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #620]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #114]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #192]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #87]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #612]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #126]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #86]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #508]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #124]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #85]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #500]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #122]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #524]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #107]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #112]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #516]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #106]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #540]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #164]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #105]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #176]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #178]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #532]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #111]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #556]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #110]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #548]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #172]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #109]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #700]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #108]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #692]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #99]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #160]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #716]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #98]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #708]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #97]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #224]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #732]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #103]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #724]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #102]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #748]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #101]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #740]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #636]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #194]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #144]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #628]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #276]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #192]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #652]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #198]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #284]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #196]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #644]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #280]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #202]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #272]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #668]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #292]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #200]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #660]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #206]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #400]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #300]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #204]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #684]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #296]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #176]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #288]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #676]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #182]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #180]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #248]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #186]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #416]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #240]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #260]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #184]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #190]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #368]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #268]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #188]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #264]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #226]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #256]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #340]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #224]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #230]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #384]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #348]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #228]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #344]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #234]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #336]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #356]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #232]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #352]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #238]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #464]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #364]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #236]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #210]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #352]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #308]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #214]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #480]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #316]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #212]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #312]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #218]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #304]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #324]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #216]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #222]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #432]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #220]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #404]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #320]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #568]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #400]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #560]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #412]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #448]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #584]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #408]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #576]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #420]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #560]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #600]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #416]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #592]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #428]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #616]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #424]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #608]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #372]
-; NONEON-NOSVE-NEXT:    ldp q2, q3, [sp, #592]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #504]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #368]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #496]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #380]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #520]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #376]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #512]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #388]
-; NONEON-NOSVE-NEXT:    ldp q4, q5, [sp, #496]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #536]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #384]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #528]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #396]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #552]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #392]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #544]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #468]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #528]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #696]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #464]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #688]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #476]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #712]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #472]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #704]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #484]
-; NONEON-NOSVE-NEXT:    ldp q16, q17, [sp, #688]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #728]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #480]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #720]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #492]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #744]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #488]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #736]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #436]
-; NONEON-NOSVE-NEXT:    ldp q19, q20, [sp, #720]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #632]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #432]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #624]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #444]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #648]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #440]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #640]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #452]
-; NONEON-NOSVE-NEXT:    ldp q22, q23, [sp, #624]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #664]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #448]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #656]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #460]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #680]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #456]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #672]
-; NONEON-NOSVE-NEXT:    ldp q21, q18, [sp, #656]
+; NONEON-NOSVE-NEXT:    stp wzr, w23, [sp, #156]
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #180]
+; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #276]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #284]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #260]
+; NONEON-NOSVE-NEXT:    ldp q5, q4, [sp, #80]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #268]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #48]
+; NONEON-NOSVE-NEXT:    str w8, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldp q17, q16, [sp, #272]
+; NONEON-NOSVE-NEXT:    ldp q18, q21, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldp q20, q19, [sp, #240]
+; NONEON-NOSVE-NEXT:    ldp q23, q22, [sp, #208]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #384] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1, #32]
+; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #368] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q4, q5, [x1, #64]
+; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #352] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #96]
+; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #336] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q16, q17, [x1, #128]
+; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #320] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q19, q20, [x1, #160]
+; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q22, q23, [x1, #192]
 ; NONEON-NOSVE-NEXT:    stp q21, q18, [x1, #224]
-; NONEON-NOSVE-NEXT:    add sp, sp, #752
-; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #400
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <32 x i8>, ptr %in
   %b = add <32 x i8> %a, %a
@@ -3653,91 +2494,70 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i16_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #4]
+; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT:    ldrh w5, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #14]
-; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #4]
+; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrh w3, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #12]
+; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #14]
 ; NONEON-NOSVE-NEXT:    add w14, w14, w14
-; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #46]
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    and w17, w17, #0xffff
+; NONEON-NOSVE-NEXT:    and w16, w16, #0xffff
+; NONEON-NOSVE-NEXT:    stp w17, w14, [sp, #56]
 ; NONEON-NOSVE-NEXT:    add w14, w3, w3
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w13, w5, w5
+; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #8]
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #10]
+; NONEON-NOSVE-NEXT:    add w17, w5, w5
+; NONEON-NOSVE-NEXT:    stp w14, w16, [sp, #48]
+; NONEON-NOSVE-NEXT:    add w16, w4, w4
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT:    add w14, w4, w4
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT:    add w13, w2, w2
-; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #38]
-; NONEON-NOSVE-NEXT:    add w14, w0, w0
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #36]
-; NONEON-NOSVE-NEXT:    add w13, w18, w18
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w14, w17, w17
+; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #22]
+; NONEON-NOSVE-NEXT:    and w14, w17, #0xffff
+; NONEON-NOSVE-NEXT:    and w16, w16, #0xffff
+; NONEON-NOSVE-NEXT:    add w17, w2, w2
+; NONEON-NOSVE-NEXT:    stp w16, w14, [sp, #40]
+; NONEON-NOSVE-NEXT:    add w14, w0, w0
+; NONEON-NOSVE-NEXT:    and w16, w17, #0xffff
+; NONEON-NOSVE-NEXT:    add w17, w18, w18
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
 ; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w13, w16, w16
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #62]
-; NONEON-NOSVE-NEXT:    add w14, w15, w15
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #60]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #58]
-; NONEON-NOSVE-NEXT:    strh w12, [sp, #56]
-; NONEON-NOSVE-NEXT:    strh w11, [sp, #54]
-; NONEON-NOSVE-NEXT:    strh w10, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    stp w14, w16, [sp, #32]
+; NONEON-NOSVE-NEXT:    and w14, w17, #0xffff
+; NONEON-NOSVE-NEXT:    and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT:    and w13, w13, #0xffff
+; NONEON-NOSVE-NEXT:    and w12, w12, #0xffff
+; NONEON-NOSVE-NEXT:    and w11, w11, #0xffff
+; NONEON-NOSVE-NEXT:    and w10, w10, #0xffff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xffff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xffff
+; NONEON-NOSVE-NEXT:    stp w15, w14, [sp, #88]
+; NONEON-NOSVE-NEXT:    stp w12, w13, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #72]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <16 x i16>, ptr %in
   %b = add <16 x i16> %a, %a
@@ -3763,26 +2583,20 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v4i16_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <4 x i16> %a to <4 x i64>
   store <4 x i64>%b, ptr %out
@@ -3808,43 +2622,31 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i16_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-160]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #30]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #56]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %b = zext <8 x i16> %a to <8 x i64>
   store <8 x i64>%b, ptr %out
@@ -3883,144 +2685,85 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i16_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #368
-; NONEON-NOSVE-NEXT:    str x29, [sp, #352] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 368
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT:    sub sp, sp, #160
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #268]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #260]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #352] // 8-byte Folded Reload
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #284]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #92]
+; NONEON-NOSVE-NEXT:    str wzr, [sp, #156]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #4]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #6]
-; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #2]
-; NONEON-NOSVE-NEXT:    ldrh w5, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #12]
-; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #14]
-; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    ldrh w14, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #4]
+; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #6]
+; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #10]
 ; NONEON-NOSVE-NEXT:    add w14, w14, w14
-; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #10]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #54]
-; NONEON-NOSVE-NEXT:    add w14, w3, w3
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #52]
-; NONEON-NOSVE-NEXT:    add w13, w5, w5
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #50]
-; NONEON-NOSVE-NEXT:    add w14, w4, w4
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #26]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #48]
-; NONEON-NOSVE-NEXT:    add w13, w2, w2
-; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #22]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #46]
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #12]
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp wzr, w15, [sp, #84]
+; NONEON-NOSVE-NEXT:    add w15, w4, w4
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    stp wzr, w14, [sp, #76]
+; NONEON-NOSVE-NEXT:    add w14, w5, w5
+; NONEON-NOSVE-NEXT:    and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    stp wzr, w15, [sp, #60]
+; NONEON-NOSVE-NEXT:    add w15, w3, w3
+; NONEON-NOSVE-NEXT:    stp wzr, w14, [sp, #68]
+; NONEON-NOSVE-NEXT:    add w14, w2, w2
+; NONEON-NOSVE-NEXT:    and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp wzr, w15, [sp, #52]
+; NONEON-NOSVE-NEXT:    add w15, w18, w18
+; NONEON-NOSVE-NEXT:    stp wzr, w14, [sp, #44]
 ; NONEON-NOSVE-NEXT:    add w14, w0, w0
-; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w13, w18, w18
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #40]
-; NONEON-NOSVE-NEXT:    add w14, w17, w17
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #20]
-; NONEON-NOSVE-NEXT:    strh w9, [sp, #58]
+; NONEON-NOSVE-NEXT:    and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    stp wzr, w14, [sp, #36]
+; NONEON-NOSVE-NEXT:    add w14, w16, w16
 ; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    str w15, [sp, #32]
+; NONEON-NOSVE-NEXT:    add w15, w17, w17
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    add w13, w16, w16
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #82]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #80]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #70]
-; NONEON-NOSVE-NEXT:    add w14, w15, w15
-; NONEON-NOSVE-NEXT:    strh w13, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #86]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #84]
-; NONEON-NOSVE-NEXT:    strh w14, [sp, #66]
-; NONEON-NOSVE-NEXT:    strh w12, [sp, #64]
-; NONEON-NOSVE-NEXT:    strh w11, [sp, #62]
-; NONEON-NOSVE-NEXT:    strh w10, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #74]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #72]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #276]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #332]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #78]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #76]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #98]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #96]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #324]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #102]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #100]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #184]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #90]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #88]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #348]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #94]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #92]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #360]
-; NONEON-NOSVE-NEXT:    ldp d2, d0, [sp, #136]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #340]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #264]
-; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #252]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #192]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    str d2, [sp, #200]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #280]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #300]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #256]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #292]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #232]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #316]
-; NONEON-NOSVE-NEXT:    str wzr, [sp, #308]
-; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #244]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #240]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #216]
-; NONEON-NOSVE-NEXT:    ldp q3, q4, [sp, #224]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #320]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #364]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #328]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #344]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #360]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #336]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #200]
-; NONEON-NOSVE-NEXT:    ldp q6, q7, [sp, #320]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #296]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #288]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #208]
-; NONEON-NOSVE-NEXT:    str w9, [sp, #312]
-; NONEON-NOSVE-NEXT:    str w8, [sp, #304]
-; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #288]
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    and w14, w14, #0xffff
+; NONEON-NOSVE-NEXT:    and w15, w15, #0xffff
+; NONEON-NOSVE-NEXT:    and w13, w13, #0xffff
+; NONEON-NOSVE-NEXT:    and w12, w12, #0xffff
+; NONEON-NOSVE-NEXT:    and w10, w10, #0xffff
+; NONEON-NOSVE-NEXT:    and w11, w11, #0xffff
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xffff
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xffff
+; NONEON-NOSVE-NEXT:    stp wzr, w15, [sp, #148]
+; NONEON-NOSVE-NEXT:    stp wzr, w14, [sp, #140]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp wzr, w12, [sp, #132]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp wzr, w13, [sp, #124]
+; NONEON-NOSVE-NEXT:    stp wzr, w11, [sp, #116]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #128]
+; NONEON-NOSVE-NEXT:    stp wzr, w10, [sp, #108]
+; NONEON-NOSVE-NEXT:    stp wzr, w9, [sp, #100]
+; NONEON-NOSVE-NEXT:    str w8, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #368
+; NONEON-NOSVE-NEXT:    add sp, sp, #160
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <16 x i16>, ptr %in
   %b = add <16 x i16> %a, %a
@@ -4082,47 +2825,34 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) {
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i32_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp]
-; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #16]
+; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp]
+; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #16]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
+; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #48]
+; NONEON-NOSVE-NEXT:    add w10, w15, w15
+; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #40]
+; NONEON-NOSVE-NEXT:    add w10, w14, w14
+; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    add w10, w13, w13
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #88]
+; NONEON-NOSVE-NEXT:    add w10, w12, w12
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    add w9, w13, w13
-; NONEON-NOSVE-NEXT:    add w8, w12, w12
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    add w9, w15, w15
-; NONEON-NOSVE-NEXT:    add w8, w14, w14
-; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    add w9, w11, w11
-; NONEON-NOSVE-NEXT:    add w8, w10, w10
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #104]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #144]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #128]
+; NONEON-NOSVE-NEXT:    stp w11, wzr, [sp, #56]
+; NONEON-NOSVE-NEXT:    stp w10, wzr, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w9, wzr, [sp, #72]
+; NONEON-NOSVE-NEXT:    stp w8, wzr, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   %b = add <8 x i32> %a, %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index 46a2ce6ed7109..7df362826d052 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -452,29 +452,23 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldr d0, [x0]
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <4 x i16>, ptr %a
   %res = uitofp <4 x i16> %op1 to <4 x double>
@@ -506,49 +500,36 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    str q0, [sp]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <8 x i16>, ptr %a
   %res = uitofp <8 x i16> %op1 to <8 x double>
@@ -602,92 +583,63 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #336
-; NONEON-NOSVE-NEXT:    str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT:    sub sp, sp, #192
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 192
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #320] // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #50]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #58]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    str d1, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #192]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ucvtf d1, w8
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #192]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #176]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #160]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #160]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #288]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT:    ucvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
+; NONEON-NOSVE-NEXT:    ucvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #256]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #128]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #336
+; NONEON-NOSVE-NEXT:    add sp, sp, #192
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <16 x i16>, ptr %a
   %res = uitofp <16 x i16> %op1 to <16 x double>
@@ -1891,29 +1843,23 @@ define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT:    sub sp, sp, #48
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldr d0, [x0]
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #12]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <4 x i16>, ptr %a
   %res = sitofp <4 x i16> %op1 to <4 x double>
@@ -1945,49 +1891,36 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #160
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    str q0, [sp]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-96]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #64]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #64]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #128]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #20]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
-; NONEON-NOSVE-NEXT:    add sp, sp, #160
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <8 x i16>, ptr %a
   %res = sitofp <8 x i16> %op1 to <8 x double>
@@ -2041,92 +1974,63 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) {
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #336
-; NONEON-NOSVE-NEXT:    str x29, [sp, #320] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 336
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
+; NONEON-NOSVE-NEXT:    sub sp, sp, #192
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 192
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #320] // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #40]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #50]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #88]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #66]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #64]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #72]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #70]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #68]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #58]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #136]
-; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp, #120]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
-; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #62]
-; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104]
-; NONEON-NOSVE-NEXT:    str d1, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #104]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #160]
-; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #176]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #168]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #240]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #224]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #136]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #208]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #332]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #192]
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    scvtf d1, w8
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #328]
-; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #192]
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #64]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #184]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #304]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #176]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #60]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #176]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #288]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #160]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #160]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #168]
-; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #288]
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #272]
-; NONEON-NOSVE-NEXT:    scvtf d1, w9
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #144]
+; NONEON-NOSVE-NEXT:    scvtf d1, w8
+; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #52]
 ; NONEON-NOSVE-NEXT:    scvtf d0, w8
-; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #256]
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #224]
-; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #256]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #128]
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldp q2, q5, [sp, #128]
 ; NONEON-NOSVE-NEXT:    stp q3, q4, [x1, #32]
 ; NONEON-NOSVE-NEXT:    stp q6, q7, [x1, #64]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x1]
 ; NONEON-NOSVE-NEXT:    stp q5, q2, [x1, #96]
-; NONEON-NOSVE-NEXT:    add sp, sp, #336
+; NONEON-NOSVE-NEXT:    add sp, sp, #192
 ; NONEON-NOSVE-NEXT:    ret
   %op1 = load <16 x i16>, ptr %a
   %res = sitofp <16 x i16> %op1 to <16 x double>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index e8c9704940c70..e6c6003ee6c69 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -28,23 +28,17 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: alloc_v4i8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #48
-; NONEON-NOSVE-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT:    sub sp, sp, #32
+; NONEON-NOSVE-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; NONEON-NOSVE-NEXT:    mov x19, x0
-; NONEON-NOSVE-NEXT:    add x0, sp, #28
+; NONEON-NOSVE-NEXT:    add x0, sp, #12
 ; NONEON-NOSVE-NEXT:    bl def
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    strh w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    str d0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [x19, #1]
 ; NONEON-NOSVE-NEXT:    strb w9, [x19]
-; NONEON-NOSVE-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
-; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT:    add sp, sp, #32
 ; NONEON-NOSVE-NEXT:    ret
   %alloc = alloca [4 x i8]
   call void @def(ptr %alloc)
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 46a2459485987..8d39ba231ac91 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1245,54 +1245,48 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    ldr x9, [x0, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #128
-; CHECK-NEXT:    and w11, w9, #0xf
-; CHECK-NEXT:    ubfx w10, w9, #4, #4
-; CHECK-NEXT:    fmov s1, w11
-; CHECK-NEXT:    mov.b v1[1], w10
-; CHECK-NEXT:    ubfx w10, w9, #8, #4
-; CHECK-NEXT:    mov.b v1[2], w10
-; CHECK-NEXT:    ubfx w10, w9, #12, #4
-; CHECK-NEXT:    mov.b v1[3], w10
-; CHECK-NEXT:    ubfx w10, w9, #16, #4
-; CHECK-NEXT:    mov.b v1[4], w10
-; CHECK-NEXT:    ubfx w10, w9, #20, #4
-; CHECK-NEXT:    mov.b v1[5], w10
-; CHECK-NEXT:    ubfx w10, w9, #24, #4
-; CHECK-NEXT:    mov.b v1[6], w10
-; CHECK-NEXT:    lsr w10, w9, #28
-; CHECK-NEXT:    mov.b v1[7], w10
-; CHECK-NEXT:    ubfx x10, x9, #32, #4
-; CHECK-NEXT:    mov.b v1[8], w10
-; CHECK-NEXT:    ubfx x10, x9, #36, #4
-; CHECK-NEXT:    mov.b v1[9], w10
-; CHECK-NEXT:    ubfx x10, x9, #40, #4
-; CHECK-NEXT:    mov.b v1[10], w10
-; CHECK-NEXT:    ubfx x10, x9, #44, #4
-; CHECK-NEXT:    mov.b v1[11], w10
-; CHECK-NEXT:    ubfx x10, x9, #48, #4
-; CHECK-NEXT:    mov.b v1[12], w10
+; CHECK-NEXT:    ubfx x12, x9, #48, #4
 ; CHECK-NEXT:    ubfx x10, x9, #52, #4
-; CHECK-NEXT:    mov.b v1[13], w10
-; CHECK-NEXT:    ubfx x10, x9, #56, #4
-; CHECK-NEXT:    lsr x9, x9, #60
-; CHECK-NEXT:    mov.b v1[14], w10
-; CHECK-NEXT:    mov.b v1[15], w9
-; CHECK-NEXT:    ext.16b v2, v1, v1, #8
-; CHECK-NEXT:    zip2.8b v3, v1, v0
-; CHECK-NEXT:    zip1.8b v1, v1, v0
-; CHECK-NEXT:    zip2.8b v4, v2, v0
-; CHECK-NEXT:    zip1.8b v2, v2, v0
-; CHECK-NEXT:    ushll.4s v3, v3, #0
+; CHECK-NEXT:    ubfx x14, x9, #32, #4
+; CHECK-NEXT:    ubfx w15, w9, #16, #4
+; CHECK-NEXT:    ubfx x11, x9, #36, #4
+; CHECK-NEXT:    ubfx w13, w9, #20, #4
+; CHECK-NEXT:    fmov s1, w12
+; CHECK-NEXT:    fmov s2, w14
+; CHECK-NEXT:    ubfx w12, w9, #4, #4
+; CHECK-NEXT:    fmov s3, w15
+; CHECK-NEXT:    mov.h v1[1], w10
+; CHECK-NEXT:    and w10, w9, #0xf
+; CHECK-NEXT:    mov.h v2[1], w11
+; CHECK-NEXT:    fmov s4, w10
+; CHECK-NEXT:    ubfx x11, x9, #56, #4
+; CHECK-NEXT:    mov.h v3[1], w13
+; CHECK-NEXT:    ubfx x10, x9, #40, #4
+; CHECK-NEXT:    mov.h v4[1], w12
+; CHECK-NEXT:    ubfx w12, w9, #24, #4
+; CHECK-NEXT:    mov.h v1[2], w11
+; CHECK-NEXT:    ubfx w11, w9, #8, #4
+; CHECK-NEXT:    mov.h v2[2], w10
+; CHECK-NEXT:    lsr x10, x9, #60
+; CHECK-NEXT:    mov.h v3[2], w12
+; CHECK-NEXT:    ubfx x12, x9, #44, #4
+; CHECK-NEXT:    mov.h v4[2], w11
+; CHECK-NEXT:    lsr w11, w9, #28
+; CHECK-NEXT:    ubfx w9, w9, #12, #4
+; CHECK-NEXT:    mov.h v1[3], w10
+; CHECK-NEXT:    mov.h v2[3], w12
+; CHECK-NEXT:    mov.h v3[3], w11
+; CHECK-NEXT:    mov.h v4[3], w9
 ; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    and.16b v3, v3, v0
-; CHECK-NEXT:    ushll.4s v4, v4, #0
 ; CHECK-NEXT:    ushll.4s v2, v2, #0
+; CHECK-NEXT:    ushll.4s v3, v3, #0
+; CHECK-NEXT:    ushll.4s v4, v4, #0
 ; CHECK-NEXT:    and.16b v1, v1, v0
-; CHECK-NEXT:    and.16b v4, v4, v0
 ; CHECK-NEXT:    and.16b v2, v2, v0
-; CHECK-NEXT:    stp q1, q3, [x1]
-; CHECK-NEXT:    stp q2, q4, [x1, #32]
-; CHECK-NEXT:    add x1, x1, #64
+; CHECK-NEXT:    and.16b v3, v3, v0
+; CHECK-NEXT:    and.16b v4, v4, v0
+; CHECK-NEXT:    stp q2, q1, [x1, #32]
+; CHECK-NEXT:    stp q4, q3, [x1], #64
 ; CHECK-NEXT:    b.ne LBB13_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
 ; CHECK-NEXT:    ret
@@ -1306,59 +1300,54 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
 ; CHECK-BE-NEXT:    ldr x9, [x0, x8]
 ; CHECK-BE-NEXT:    add x8, x8, #16
 ; CHECK-BE-NEXT:    cmp x8, #128
-; CHECK-BE-NEXT:    lsr x10, x9, #60
-; CHECK-BE-NEXT:    ubfx x11, x9, #56, #4
-; CHECK-BE-NEXT:    fmov s1, w10
-; CHECK-BE-NEXT:    ubfx x10, x9, #52, #4
-; CHECK-BE-NEXT:    mov v1.b[1], w11
-; CHECK-BE-NEXT:    mov v1.b[2], w10
-; CHECK-BE-NEXT:    ubfx x10, x9, #48, #4
-; CHECK-BE-NEXT:    mov v1.b[3], w10
-; CHECK-BE-NEXT:    ubfx x10, x9, #44, #4
-; CHECK-BE-NEXT:    mov v1.b[4], w10
-; CHECK-BE-NEXT:    ubfx x10, x9, #40, #4
-; CHECK-BE-NEXT:    mov v1.b[5], w10
-; CHECK-BE-NEXT:    ubfx x10, x9, #36, #4
-; CHECK-BE-NEXT:    mov v1.b[6], w10
-; CHECK-BE-NEXT:    ubfx x10, x9, #32, #4
-; CHECK-BE-NEXT:    mov v1.b[7], w10
-; CHECK-BE-NEXT:    lsr w10, w9, #28
-; CHECK-BE-NEXT:    mov v1.b[8], w10
-; CHECK-BE-NEXT:    ubfx w10, w9, #24, #4
-; CHECK-BE-NEXT:    mov v1.b[9], w10
-; CHECK-BE-NEXT:    ubfx w10, w9, #20, #4
-; CHECK-BE-NEXT:    mov v1.b[10], w10
-; CHECK-BE-NEXT:    ubfx w10, w9, #16, #4
-; CHECK-BE-NEXT:    mov v1.b[11], w10
-; CHECK-BE-NEXT:    ubfx w10, w9, #12, #4
-; CHECK-BE-NEXT:    mov v1.b[12], w10
+; CHECK-BE-NEXT:    ubfx w12, w9, #12, #4
+; CHECK-BE-NEXT:    lsr w14, w9, #28
 ; CHECK-BE-NEXT:    ubfx w10, w9, #8, #4
-; CHECK-BE-NEXT:    mov v1.b[13], w10
-; CHECK-BE-NEXT:    ubfx w10, w9, #4, #4
-; CHECK-BE-NEXT:    and w9, w9, #0xf
-; CHECK-BE-NEXT:    mov v1.b[14], w10
+; CHECK-BE-NEXT:    ubfx x15, x9, #44, #4
+; CHECK-BE-NEXT:    ubfx w11, w9, #24, #4
+; CHECK-BE-NEXT:    ubfx x13, x9, #40, #4
+; CHECK-BE-NEXT:    fmov s1, w12
+; CHECK-BE-NEXT:    lsr x12, x9, #60
+; CHECK-BE-NEXT:    fmov s2, w14
+; CHECK-BE-NEXT:    fmov s3, w15
+; CHECK-BE-NEXT:    fmov s4, w12
+; CHECK-BE-NEXT:    ubfx w12, w9, #20, #4
+; CHECK-BE-NEXT:    mov v1.h[1], w10
+; CHECK-BE-NEXT:    ubfx x10, x9, #56, #4
+; CHECK-BE-NEXT:    mov v2.h[1], w11
+; CHECK-BE-NEXT:    ubfx w11, w9, #4, #4
+; CHECK-BE-NEXT:    mov v3.h[1], w13
+; CHECK-BE-NEXT:    mov v4.h[1], w10
+; CHECK-BE-NEXT:    ubfx x10, x9, #36, #4
+; CHECK-BE-NEXT:    mov v1.h[2], w11
+; CHECK-BE-NEXT:    ubfx x11, x9, #52, #4
+; CHECK-BE-NEXT:    mov v2.h[2], w12
+; CHECK-BE-NEXT:    mov v3.h[2], w10
+; CHECK-BE-NEXT:    and w10, w9, #0xf
+; CHECK-BE-NEXT:    ubfx w12, w9, #16, #4
+; CHECK-BE-NEXT:    mov v4.h[2], w11
+; CHECK-BE-NEXT:    ubfx x11, x9, #32, #4
+; CHECK-BE-NEXT:    ubfx x9, x9, #48, #4
+; CHECK-BE-NEXT:    mov v1.h[3], w10
+; CHECK-BE-NEXT:    mov v2.h[3], w12
 ; CHECK-BE-NEXT:    add x10, x1, #32
-; CHECK-BE-NEXT:    mov v1.b[15], w9
-; CHECK-BE-NEXT:    add x9, x1, #16
-; CHECK-BE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    zip2 v3.8b, v1.8b, v0.8b
-; CHECK-BE-NEXT:    zip1 v1.8b, v1.8b, v0.8b
-; CHECK-BE-NEXT:    zip2 v4.8b, v2.8b, v0.8b
-; CHECK-BE-NEXT:    zip1 v2.8b, v2.8b, v0.8b
-; CHECK-BE-NEXT:    ushll v3.4s, v3.4h, #0
+; CHECK-BE-NEXT:    mov v3.h[3], w11
+; CHECK-BE-NEXT:    mov v4.h[3], w9
+; CHECK-BE-NEXT:    add x9, x1, #48
 ; CHECK-BE-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-BE-NEXT:    and v3.16b, v3.16b, v0.16b
-; CHECK-BE-NEXT:    ushll v4.4s, v4.4h, #0
 ; CHECK-BE-NEXT:    ushll v2.4s, v2.4h, #0
+; CHECK-BE-NEXT:    ushll v3.4s, v3.4h, #0
+; CHECK-BE-NEXT:    ushll v4.4s, v4.4h, #0
 ; CHECK-BE-NEXT:    and v1.16b, v1.16b, v0.16b
-; CHECK-BE-NEXT:    st1 { v3.4s }, [x9]
-; CHECK-BE-NEXT:    add x9, x1, #48
-; CHECK-BE-NEXT:    and v4.16b, v4.16b, v0.16b
 ; CHECK-BE-NEXT:    and v2.16b, v2.16b, v0.16b
-; CHECK-BE-NEXT:    st1 { v1.4s }, [x1]
-; CHECK-BE-NEXT:    add x1, x1, #64
-; CHECK-BE-NEXT:    st1 { v4.4s }, [x9]
+; CHECK-BE-NEXT:    and v3.16b, v3.16b, v0.16b
+; CHECK-BE-NEXT:    and v4.16b, v4.16b, v0.16b
+; CHECK-BE-NEXT:    st1 { v1.4s }, [x9]
+; CHECK-BE-NEXT:    add x9, x1, #16
 ; CHECK-BE-NEXT:    st1 { v2.4s }, [x10]
+; CHECK-BE-NEXT:    st1 { v4.4s }, [x1]
+; CHECK-BE-NEXT:    add x1, x1, #64
+; CHECK-BE-NEXT:    st1 { v3.4s }, [x9]
 ; CHECK-BE-NEXT:    b.ne .LBB13_1
 ; CHECK-BE-NEXT:  // %bb.2: // %exit
 ; CHECK-BE-NEXT:    ret



More information about the llvm-commits mailing list