[llvm] b42fe67 - [DAG] Add users of operand of simplified extract_vector_elt to worklist (#100074)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 08:34:12 PDT 2024


Author: David Green
Date: 2024-07-23T16:34:09+01:00
New Revision: b42fe6740ec696dca0e3dc914d2638088caa3f53

URL: https://github.com/llvm/llvm-project/commit/b42fe6740ec696dca0e3dc914d2638088caa3f53
DIFF: https://github.com/llvm/llvm-project/commit/b42fe6740ec696dca0e3dc914d2638088caa3f53.diff

LOG: [DAG] Add users of operand of simplified extract_vector_elt to worklist (#100074)

This helps to ensure we revisit the last extract_element uses of a node
so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0).

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
    llvm/test/CodeGen/AArch64/arm64-vabs.ll
    llvm/test/CodeGen/AArch64/cmp-select-sign.ll
    llvm/test/CodeGen/AArch64/fptoi.ll
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
    llvm/test/CodeGen/AArch64/nontemporal-load.ll
    llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
    llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
    llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
    llvm/test/CodeGen/AArch64/usub_sat_vec.ll
    llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
    llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
    llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
    llvm/test/CodeGen/Thumb2/mve-vst3.ll
    llvm/test/CodeGen/Thumb2/mve-vst4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa9032ea2574c..cd0440077f526 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22533,6 +22533,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
       Index == VecOp.getOperand(2)) {
     SDValue Elt = VecOp.getOperand(1);
+    AddUsersToWorklist(VecOp.getNode());
     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 00cc6b21ccaf8..abf2e1272d645 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -951,10 +951,8 @@ define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
 ; CHECK-SD-LABEL: sext_v1x64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov x8, d0
-; CHECK-SD-NEXT:    asr x1, x8, #63
-; CHECK-SD-NEXT:    mov.d v0[1], x1
 ; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    asr x1, x0, #63
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v1x64:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 178c229d04e47..62a79e3547b29 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1802,28 +1802,25 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    mov.d x8, v0[1]
 ; CHECK-NEXT:    mov.d x9, v1[1]
 ; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    fmov x11, d1
-; CHECK-NEXT:    asr x12, x10, #63
-; CHECK-NEXT:    asr x13, x11, #63
-; CHECK-NEXT:    subs x10, x10, x11
+; CHECK-NEXT:    fmov x12, d1
+; CHECK-NEXT:    asr x14, x10, #63
 ; CHECK-NEXT:    asr x11, x8, #63
-; CHECK-NEXT:    asr x14, x9, #63
-; CHECK-NEXT:    sbc x12, x12, x13
+; CHECK-NEXT:    asr x13, x9, #63
+; CHECK-NEXT:    asr x15, x12, #63
 ; CHECK-NEXT:    subs x8, x8, x9
-; CHECK-NEXT:    sbc x9, x11, x14
-; CHECK-NEXT:    asr x13, x12, #63
-; CHECK-NEXT:    asr x11, x9, #63
-; CHECK-NEXT:    eor x10, x10, x13
-; CHECK-NEXT:    eor x8, x8, x11
-; CHECK-NEXT:    eor x9, x9, x11
-; CHECK-NEXT:    subs x2, x8, x11
-; CHECK-NEXT:    eor x8, x12, x13
-; CHECK-NEXT:    sbc x3, x9, x11
-; CHECK-NEXT:    subs x9, x10, x13
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    sbc x1, x8, x13
-; CHECK-NEXT:    mov.d v0[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    sbc x9, x11, x13
+; CHECK-NEXT:    subs x10, x10, x12
+; CHECK-NEXT:    sbc x11, x14, x15
+; CHECK-NEXT:    asr x13, x9, #63
+; CHECK-NEXT:    asr x12, x11, #63
+; CHECK-NEXT:    eor x8, x8, x13
+; CHECK-NEXT:    eor x9, x9, x13
+; CHECK-NEXT:    eor x10, x10, x12
+; CHECK-NEXT:    eor x11, x11, x12
+; CHECK-NEXT:    subs x0, x10, x12
+; CHECK-NEXT:    sbc x1, x11, x12
+; CHECK-NEXT:    subs x2, x8, x13
+; CHECK-NEXT:    sbc x3, x9, x13
 ; CHECK-NEXT:    ret
   %aext = sext <2 x i64> %a to <2 x i128>
   %bext = sext <2 x i64> %b to <2 x i128>

diff  --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index 22440b79bdcd4..b4f179e992a0d 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -241,21 +241,18 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
 define <4 x i65> @sign_4xi65(<4 x i65> %a) {
 ; CHECK-LABEL: sign_4xi65:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sbfx x8, x1, #0, #1
-; CHECK-NEXT:    sbfx x9, x5, #0, #1
-; CHECK-NEXT:    sbfx x10, x3, #0, #1
-; CHECK-NEXT:    lsr x1, x8, #63
-; CHECK-NEXT:    orr x8, x8, #0x1
-; CHECK-NEXT:    lsr x3, x10, #63
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    sbfx x8, x7, #0, #1
-; CHECK-NEXT:    lsr x5, x9, #63
-; CHECK-NEXT:    orr x2, x10, #0x1
-; CHECK-NEXT:    orr x4, x9, #0x1
-; CHECK-NEXT:    lsr x7, x8, #63
-; CHECK-NEXT:    orr x6, x8, #0x1
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    sbfx x8, x5, #0, #1
+; CHECK-NEXT:    sbfx x9, x3, #0, #1
+; CHECK-NEXT:    sbfx x10, x1, #0, #1
+; CHECK-NEXT:    sbfx x11, x7, #0, #1
+; CHECK-NEXT:    lsr x1, x10, #63
+; CHECK-NEXT:    lsr x3, x9, #63
+; CHECK-NEXT:    lsr x5, x8, #63
+; CHECK-NEXT:    lsr x7, x11, #63
+; CHECK-NEXT:    orr x0, x10, #0x1
+; CHECK-NEXT:    orr x2, x9, #0x1
+; CHECK-NEXT:    orr x4, x8, #0x1
+; CHECK-NEXT:    orr x6, x11, #0x1
 ; CHECK-NEXT:    ret
   %c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
   %res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>

diff  --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 4723ac01d6021..0c880592d955b 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -2287,20 +2287,19 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov d0, v0.d[1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl __fixdfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2345,20 +2344,19 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov d0, v0.d[1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl __fixunsdfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2407,28 +2405,26 @@ define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    .cfi_offset b8, -56
 ; CHECK-SD-NEXT:    .cfi_offset b9, -64
-; CHECK-SD-NEXT:    fmov d9, d0
-; CHECK-SD-NEXT:    fmov d0, d1
 ; CHECK-SD-NEXT:    fmov d8, d2
+; CHECK-SD-NEXT:    fmov d9, d1
 ; CHECK-SD-NEXT:    bl __fixdfti
-; CHECK-SD-NEXT:    fmov d0, d8
+; CHECK-SD-NEXT:    fmov d0, d9
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
 ; CHECK-SD-NEXT:    bl __fixdfti
-; CHECK-SD-NEXT:    fmov d0, d9
+; CHECK-SD-NEXT:    fmov d0, d8
 ; CHECK-SD-NEXT:    mov x21, x0
 ; CHECK-SD-NEXT:    mov x22, x1
 ; CHECK-SD-NEXT:    bl __fixdfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    mov x4, x21
-; CHECK-SD-NEXT:    mov x5, x22
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov x4, x0
+; CHECK-SD-NEXT:    mov x5, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ldp d9, d8, [sp], #64 // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2488,28 +2484,26 @@ define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    .cfi_offset b8, -56
 ; CHECK-SD-NEXT:    .cfi_offset b9, -64
-; CHECK-SD-NEXT:    fmov d9, d0
-; CHECK-SD-NEXT:    fmov d0, d1
 ; CHECK-SD-NEXT:    fmov d8, d2
+; CHECK-SD-NEXT:    fmov d9, d1
 ; CHECK-SD-NEXT:    bl __fixunsdfti
-; CHECK-SD-NEXT:    fmov d0, d8
+; CHECK-SD-NEXT:    fmov d0, d9
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
 ; CHECK-SD-NEXT:    bl __fixunsdfti
-; CHECK-SD-NEXT:    fmov d0, d9
+; CHECK-SD-NEXT:    fmov d0, d8
 ; CHECK-SD-NEXT:    mov x21, x0
 ; CHECK-SD-NEXT:    mov x22, x1
 ; CHECK-SD-NEXT:    bl __fixunsdfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    mov x4, x21
-; CHECK-SD-NEXT:    mov x5, x22
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov x4, x0
+; CHECK-SD-NEXT:    mov x5, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ldp d9, d8, [sp], #64 // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3694,20 +3688,19 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3754,20 +3747,19 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3822,23 +3814,22 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
 ; CHECK-SD-NEXT:    mov x22, x1
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x21
-; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x21
+; CHECK-SD-NEXT:    mov x1, x22
 ; CHECK-SD-NEXT:    mov x4, x19
 ; CHECK-SD-NEXT:    mov x5, x20
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3904,23 +3895,22 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
 ; CHECK-SD-NEXT:    mov x22, x1
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x21
-; CHECK-SD-NEXT:    mov x3, x22
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x21
+; CHECK-SD-NEXT:    mov x1, x22
 ; CHECK-SD-NEXT:    mov x4, x19
 ; CHECK-SD-NEXT:    mov x5, x20
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7034,20 +7024,19 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
 ; CHECK-SD-NEXT:    bl __fixhfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7089,20 +7078,19 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
 ; CHECK-SD-NEXT:    bl __fixunshfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7147,28 +7135,27 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
 ; CHECK-SD-NEXT:    mov x22, x1
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
 ; CHECK-SD-NEXT:    bl __fixhfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    mov x4, x21
-; CHECK-SD-NEXT:    mov x5, x22
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov x4, x0
+; CHECK-SD-NEXT:    mov x5, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7220,28 +7207,27 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
 ; CHECK-SD-NEXT:    mov x22, x1
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
 ; CHECK-SD-NEXT:    bl __fixunshfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    mov x4, x21
-; CHECK-SD-NEXT:    mov x5, x22
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov x4, x0
+; CHECK-SD-NEXT:    mov x5, x1
+; CHECK-SD-NEXT:    mov x0, x19
+; CHECK-SD-NEXT:    mov x1, x20
+; CHECK-SD-NEXT:    mov x2, x21
+; CHECK-SD-NEXT:    mov x3, x22
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
+; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8083,260 +8069,136 @@ entry:
 }
 
 define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) {
-; CHECK-SD-LABEL: fptos_v2f128_v2i128:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #48
-; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT:    .cfi_offset w19, -8
-; CHECK-SD-NEXT:    .cfi_offset w20, -16
-; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    bl __fixtfti
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov x19, x0
-; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    bl __fixtfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    add sp, sp, #48
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fptos_v2f128_v2i128:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sub sp, sp, #48
-; CHECK-GI-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-GI-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-GI-NEXT:    .cfi_offset w19, -8
-; CHECK-GI-NEXT:    .cfi_offset w20, -16
-; CHECK-GI-NEXT:    .cfi_offset w30, -32
-; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __fixtfti
-; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov x19, x0
-; CHECK-GI-NEXT:    mov x20, x1
-; CHECK-GI-NEXT:    bl __fixtfti
-; CHECK-GI-NEXT:    mov x2, x0
-; CHECK-GI-NEXT:    mov x3, x1
-; CHECK-GI-NEXT:    mov x0, x19
-; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    add sp, sp, #48
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fptos_v2f128_v2i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl __fixtfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    bl __fixtfti
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
 entry:
   %c = fptosi <2 x fp128> %a to <2 x i128>
   ret <2 x i128> %c
 }
 
 define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) {
-; CHECK-SD-LABEL: fptou_v2f128_v2i128:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #48
-; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT:    .cfi_offset w19, -8
-; CHECK-SD-NEXT:    .cfi_offset w20, -16
-; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    bl __fixunstfti
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov x19, x0
-; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    bl __fixunstfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    add sp, sp, #48
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fptou_v2f128_v2i128:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sub sp, sp, #48
-; CHECK-GI-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-GI-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-GI-NEXT:    .cfi_offset w19, -8
-; CHECK-GI-NEXT:    .cfi_offset w20, -16
-; CHECK-GI-NEXT:    .cfi_offset w30, -32
-; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __fixunstfti
-; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov x19, x0
-; CHECK-GI-NEXT:    mov x20, x1
-; CHECK-GI-NEXT:    bl __fixunstfti
-; CHECK-GI-NEXT:    mov x2, x0
-; CHECK-GI-NEXT:    mov x3, x1
-; CHECK-GI-NEXT:    mov x0, x19
-; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    add sp, sp, #48
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fptou_v2f128_v2i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl __fixunstfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    bl __fixunstfti
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
 entry:
   %c = fptoui <2 x fp128> %a to <2 x i128>
   ret <2 x i128> %c
 }
 
 define <3 x i128> @fptos_v3f128_v3i128(<3 x fp128> %a) {
-; CHECK-SD-LABEL: fptos_v3f128_v3i128:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #80
-; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-SD-NEXT:    .cfi_offset w19, -8
-; CHECK-SD-NEXT:    .cfi_offset w20, -16
-; CHECK-SD-NEXT:    .cfi_offset w21, -24
-; CHECK-SD-NEXT:    .cfi_offset w22, -32
-; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    stp q2, q0, [sp] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    bl __fixtfti
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov x19, x0
-; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    bl __fixtfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov x21, x0
-; CHECK-SD-NEXT:    mov x22, x1
-; CHECK-SD-NEXT:    bl __fixtfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    mov x4, x21
-; CHECK-SD-NEXT:    mov x5, x22
-; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    add sp, sp, #80
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fptos_v3f128_v3i128:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sub sp, sp, #80
-; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-GI-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-GI-NEXT:    .cfi_offset w19, -8
-; CHECK-GI-NEXT:    .cfi_offset w20, -16
-; CHECK-GI-NEXT:    .cfi_offset w21, -24
-; CHECK-GI-NEXT:    .cfi_offset w22, -32
-; CHECK-GI-NEXT:    .cfi_offset w30, -48
-; CHECK-GI-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
-; CHECK-GI-NEXT:    bl __fixtfti
-; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov x19, x0
-; CHECK-GI-NEXT:    mov x20, x1
-; CHECK-GI-NEXT:    bl __fixtfti
-; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov x21, x0
-; CHECK-GI-NEXT:    mov x22, x1
-; CHECK-GI-NEXT:    bl __fixtfti
-; CHECK-GI-NEXT:    mov x4, x0
-; CHECK-GI-NEXT:    mov x5, x1
-; CHECK-GI-NEXT:    mov x0, x19
-; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    mov x2, x21
-; CHECK-GI-NEXT:    mov x3, x22
-; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    add sp, sp, #80
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fptos_v3f128_v3i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w30, -48
+; CHECK-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __fixtfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    bl __fixtfti
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    mov x22, x1
+; CHECK-NEXT:    bl __fixtfti
+; CHECK-NEXT:    mov x4, x0
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
 entry:
   %c = fptosi <3 x fp128> %a to <3 x i128>
   ret <3 x i128> %c
 }
 
 define <3 x i128> @fptou_v3f128_v3i128(<3 x fp128> %a) {
-; CHECK-SD-LABEL: fptou_v3f128_v3i128:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sub sp, sp, #80
-; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-SD-NEXT:    .cfi_offset w19, -8
-; CHECK-SD-NEXT:    .cfi_offset w20, -16
-; CHECK-SD-NEXT:    .cfi_offset w21, -24
-; CHECK-SD-NEXT:    .cfi_offset w22, -32
-; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    stp q2, q0, [sp] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    mov v0.16b, v1.16b
-; CHECK-SD-NEXT:    bl __fixunstfti
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov x19, x0
-; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    bl __fixunstfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov x21, x0
-; CHECK-SD-NEXT:    mov x22, x1
-; CHECK-SD-NEXT:    bl __fixunstfti
-; CHECK-SD-NEXT:    fmov d0, x0
-; CHECK-SD-NEXT:    mov x2, x19
-; CHECK-SD-NEXT:    mov x3, x20
-; CHECK-SD-NEXT:    mov x4, x21
-; CHECK-SD-NEXT:    mov x5, x22
-; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov v0.d[1], x1
-; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    add sp, sp, #80
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fptou_v3f128_v3i128:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sub sp, sp, #80
-; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-GI-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-GI-NEXT:    .cfi_offset w19, -8
-; CHECK-GI-NEXT:    .cfi_offset w20, -16
-; CHECK-GI-NEXT:    .cfi_offset w21, -24
-; CHECK-GI-NEXT:    .cfi_offset w22, -32
-; CHECK-GI-NEXT:    .cfi_offset w30, -48
-; CHECK-GI-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
-; CHECK-GI-NEXT:    bl __fixunstfti
-; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov x19, x0
-; CHECK-GI-NEXT:    mov x20, x1
-; CHECK-GI-NEXT:    bl __fixunstfti
-; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov x21, x0
-; CHECK-GI-NEXT:    mov x22, x1
-; CHECK-GI-NEXT:    bl __fixunstfti
-; CHECK-GI-NEXT:    mov x4, x0
-; CHECK-GI-NEXT:    mov x5, x1
-; CHECK-GI-NEXT:    mov x0, x19
-; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    mov x2, x21
-; CHECK-GI-NEXT:    mov x3, x22
-; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    add sp, sp, #80
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fptou_v3f128_v3i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w30, -48
+; CHECK-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __fixunstfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    bl __fixunstfti
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    mov x22, x1
+; CHECK-NEXT:    bl __fixunstfti
+; CHECK-NEXT:    mov x4, x0
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
 entry:
   %c = fptoui <3 x fp128> %a to <3 x i128>
   ret <3 x i128> %c

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index d620a8851ee44..91c8b7f345e32 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -819,47 +819,43 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset b9, -56
 ; CHECK-NEXT:    .cfi_offset b10, -64
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s8, v0.s[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v9.2s, #241, lsl #24
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
 ; CHECK-NEXT:    fmov s10, w8
+; CHECK-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
 ; CHECK-NEXT:    mov x22, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov s8, v0.s[1]
+; CHECK-NEXT:    fcmp s0, s9
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    fcmp s0, s10
 ; CHECK-NEXT:    csel x9, x22, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcmp s0, s0
+; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    csel x19, xzr, x8, vs
 ; CHECK-NEXT:    csel x20, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x21, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s10
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x21, x1, lt
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
 ; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x22, x8, gt
-; CHECK-NEXT:    fcmp s0, s0
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x9, x22, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp s8, s8
 ; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x2, xzr, x8, vs
+; CHECK-NEXT:    csel x3, xzr, x9, vs
 ; CHECK-NEXT:    add sp, sp, #80
 ; CHECK-NEXT:    ret
     %x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f)
@@ -885,47 +881,43 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset b9, -56
 ; CHECK-NEXT:    .cfi_offset b10, -64
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s8, v0.s[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-NEXT:    fmov s10, w8
+; CHECK-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov s8, v0.s[1]
+; CHECK-NEXT:    fcmp s0, s9
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    fcmp s0, s10
 ; CHECK-NEXT:    csel x9, x22, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcmp s0, s0
+; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    csel x19, xzr, x8, vs
 ; CHECK-NEXT:    csel x20, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x21, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s10
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x21, x1, lt
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
 ; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x22, x8, gt
-; CHECK-NEXT:    fcmp s0, s0
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x9, x22, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp s8, s8
 ; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x2, xzr, x8, vs
+; CHECK-NEXT:    csel x3, xzr, x9, vs
 ; CHECK-NEXT:    add sp, sp, #80
 ; CHECK-NEXT:    ret
     %x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f)
@@ -1068,15 +1060,15 @@ define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
 define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i100:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    str d10, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #40] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1089,28 +1081,40 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -80
 ; CHECK-NEXT:    .cfi_offset b9, -88
 ; CHECK-NEXT:    .cfi_offset b10, -96
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v9.2s, #241, lsl #24
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
 ; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
 ; CHECK-NEXT:    mov x26, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    mov s8, v0.s[1]
+; CHECK-NEXT:    fcmp s0, s9
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x25, x1, lt
+; CHECK-NEXT:    fcmp s0, s10
+; CHECK-NEXT:    csel x9, x26, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp s0, s0
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    csel x19, xzr, x8, vs
+; CHECK-NEXT:    csel x20, xzr, x9, vs
+; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    csel x9, x26, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    csel x21, xzr, x8, vs
+; CHECK-NEXT:    csel x22, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    fcmp s0, s9
@@ -1122,48 +1126,32 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s0, s0
 ; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x21, xzr, x8, vs
-; CHECK-NEXT:    csel x22, xzr, x9, vs
+; CHECK-NEXT:    csel x23, xzr, x8, vs
+; CHECK-NEXT:    csel x24, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
+; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x9, x26, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x23, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    mov x4, x21
-; CHECK-NEXT:    mov x5, x22
-; CHECK-NEXT:    mov x6, x23
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    mov x7, x24
-; CHECK-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x25, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr d10, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #40] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x26, x8, gt
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    add sp, sp, #128
+; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x6, xzr, x8, vs
+; CHECK-NEXT:    csel x7, xzr, x9, vs
+; CHECK-NEXT:    add sp, sp, #112
 ; CHECK-NEXT:    ret
     %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f)
     ret <4 x i100> %x
@@ -1172,15 +1160,15 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    str d10, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #40] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1193,28 +1181,40 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -80
 ; CHECK-NEXT:    .cfi_offset b9, -88
 ; CHECK-NEXT:    .cfi_offset b10, -96
-; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-NEXT:    fmov s10, w8
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    mov s8, v0.s[1]
+; CHECK-NEXT:    fcmp s0, s9
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x25, x1, lt
+; CHECK-NEXT:    fcmp s0, s10
+; CHECK-NEXT:    csel x9, x26, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp s0, s0
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    csel x19, xzr, x8, vs
+; CHECK-NEXT:    csel x20, xzr, x9, vs
+; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    csel x9, x26, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x19, xzr, x8, vs
-; CHECK-NEXT:    csel x20, xzr, x9, vs
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    csel x21, xzr, x8, vs
+; CHECK-NEXT:    csel x22, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    fcmp s0, s9
@@ -1226,48 +1226,32 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s0, s0
 ; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x21, xzr, x8, vs
-; CHECK-NEXT:    csel x22, xzr, x9, vs
+; CHECK-NEXT:    csel x23, xzr, x8, vs
+; CHECK-NEXT:    csel x24, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
+; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x9, x26, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s8, s8
-; CHECK-NEXT:    csel x23, xzr, x8, vs
-; CHECK-NEXT:    csel x24, xzr, x9, vs
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    mov x4, x21
-; CHECK-NEXT:    mov x5, x22
-; CHECK-NEXT:    mov x6, x23
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    mov x7, x24
-; CHECK-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x25, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s0, s10
-; CHECK-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr d10, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #40] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x26, x8, gt
-; CHECK-NEXT:    fcmp s0, s0
-; CHECK-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    add sp, sp, #128
+; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x6, xzr, x8, vs
+; CHECK-NEXT:    csel x7, xzr, x9, vs
+; CHECK-NEXT:    add sp, sp, #112
 ; CHECK-NEXT:    ret
     %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f)
     ret <4 x i128> %x
@@ -1465,48 +1449,44 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -48
 ; CHECK-NEXT:    .cfi_offset b9, -56
 ; CHECK-NEXT:    .cfi_offset b10, -64
-; CHECK-NEXT:    mov d8, v0.d[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov d0, d8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT:    mov x22, #34359738367 // =0x7ffffffff
 ; CHECK-NEXT:    fmov d9, x8
 ; CHECK-NEXT:    mov x8, #5053038781909696511 // =0x461fffffffffffff
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x22, #34359738367 // =0x7ffffffff
 ; CHECK-NEXT:    fmov d10, x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    fcmp d8, d9
+; CHECK-NEXT:    mov d8, v0.d[1]
+; CHECK-NEXT:    fcmp d0, d9
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp d8, d10
+; CHECK-NEXT:    fcmp d0, d10
 ; CHECK-NEXT:    csel x9, x22, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
+; CHECK-NEXT:    fcmp d0, d0
+; CHECK-NEXT:    fmov d0, d8
 ; CHECK-NEXT:    csel x19, xzr, x8, vs
 ; CHECK-NEXT:    csel x20, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp d8, d9
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x21, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d0, d10
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x21, x1, lt
+; CHECK-NEXT:    fcmp d8, d10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
 ; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x22, x8, gt
-; CHECK-NEXT:    fcmp d0, d0
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x9, x22, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp d8, d8
 ; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x2, xzr, x8, vs
+; CHECK-NEXT:    csel x3, xzr, x9, vs
 ; CHECK-NEXT:    add sp, sp, #80
 ; CHECK-NEXT:    ret
     %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f)
@@ -1531,48 +1511,44 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -48
 ; CHECK-NEXT:    .cfi_offset b9, -56
 ; CHECK-NEXT:    .cfi_offset b10, -64
-; CHECK-NEXT:    mov d8, v0.d[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov d0, d8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-NEXT:    fmov d9, x8
 ; CHECK-NEXT:    mov x8, #5179139571476070399 // =0x47dfffffffffffff
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-NEXT:    fmov d10, x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    fcmp d8, d9
+; CHECK-NEXT:    mov d8, v0.d[1]
+; CHECK-NEXT:    fcmp d0, d9
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x21, x1, lt
-; CHECK-NEXT:    fcmp d8, d10
+; CHECK-NEXT:    fcmp d0, d10
 ; CHECK-NEXT:    csel x9, x22, x9, gt
 ; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    fcmp d8, d8
+; CHECK-NEXT:    fcmp d0, d0
+; CHECK-NEXT:    fmov d0, d8
 ; CHECK-NEXT:    csel x19, xzr, x8, vs
 ; CHECK-NEXT:    csel x20, xzr, x9, vs
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp d8, d9
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x21, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d0, d10
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x21, x1, lt
+; CHECK-NEXT:    fcmp d8, d10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
 ; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x22, x8, gt
-; CHECK-NEXT:    fcmp d0, d0
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x9, x22, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp d8, d8
 ; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x2, xzr, x8, vs
+; CHECK-NEXT:    csel x3, xzr, x9, vs
 ; CHECK-NEXT:    add sp, sp, #80
 ; CHECK-NEXT:    ret
     %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f)
@@ -1838,9 +1814,8 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    .cfi_offset b9, -88
 ; CHECK-NEXT:    .cfi_offset b10, -96
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    fcvt s8, h0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h1
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v9.2s, #241, lsl #24
@@ -1849,7 +1824,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    fmov s10, w8
 ; CHECK-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
 ; CHECK-NEXT:    mov x26, #34359738367 // =0x7ffffffff
-; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
@@ -1864,7 +1839,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    mov h0, v0.h[2]
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
@@ -1878,6 +1853,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov h0, v0.h[3]
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
@@ -1890,30 +1866,27 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    mov x4, x21
-; CHECK-NEXT:    mov x5, x22
-; CHECK-NEXT:    mov x6, x23
-; CHECK-NEXT:    mov x7, x24
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x25, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x25, x1, lt
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
 ; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x26, x8, gt
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x9, x26, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    csel x6, xzr, x8, vs
+; CHECK-NEXT:    csel x7, xzr, x9, vs
 ; CHECK-NEXT:    add sp, sp, #112
 ; CHECK-NEXT:    ret
     %x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f)
@@ -1945,9 +1918,8 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    .cfi_offset b9, -88
 ; CHECK-NEXT:    .cfi_offset b10, -96
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    fcvt s8, h0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h1
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v9.2s, #255, lsl #24
@@ -1956,7 +1928,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    fmov s10, w8
 ; CHECK-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
@@ -1971,7 +1943,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    mov h0, v0.h[2]
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
@@ -1985,6 +1957,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov h0, v0.h[3]
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, x25, x1, lt
 ; CHECK-NEXT:    fcmp s8, s10
@@ -1997,30 +1970,27 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    mov x4, x21
-; CHECK-NEXT:    mov x5, x22
-; CHECK-NEXT:    mov x6, x23
-; CHECK-NEXT:    mov x7, x24
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, x25, x1, lt
-; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s10
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x25, x1, lt
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
 ; CHECK-NEXT:    ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    csinv x9, x9, xzr, le
-; CHECK-NEXT:    csel x8, x26, x8, gt
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x9, x26, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x26, x25, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x9, xzr, x9, vs
-; CHECK-NEXT:    csel x1, xzr, x8, vs
-; CHECK-NEXT:    fmov d0, x9
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    csel x6, xzr, x8, vs
+; CHECK-NEXT:    csel x7, xzr, x9, vs
 ; CHECK-NEXT:    add sp, sp, #112
 ; CHECK-NEXT:    ret
     %x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f)

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index b03d145d1408d..6089d76f7820c 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -729,37 +729,33 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -40
 ; CHECK-NEXT:    .cfi_offset b9, -48
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s8, v0.s[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    mov x21, #68719476735 // =0xfffffffff
 ; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    mov s8, v0.s[1]
+; CHECK-NEXT:    fcmp s0, #0.0
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcmp s0, s9
+; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    csel x19, x21, x9, gt
 ; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x20
-; CHECK-NEXT:    mov x3, x19
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csel x1, x21, x9, gt
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    mov x1, x19
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x3, x21, x9, gt
 ; CHECK-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    csinv x2, x8, xzr, le
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
     %x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f)
@@ -780,36 +776,32 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -40
 ; CHECK-NEXT:    .cfi_offset b9, -48
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s8, v0.s[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
 ; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    mov s8, v0.s[1]
+; CHECK-NEXT:    fcmp s0, #0.0
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcmp s0, s9
+; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    csinv x19, x9, xzr, le
 ; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
+; CHECK-NEXT:    csel x8, xzr, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csinv x1, x9, xzr, le
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    csinv x2, x9, xzr, le
+; CHECK-NEXT:    csinv x3, x8, xzr, le
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
     %x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f)
@@ -935,13 +927,13 @@ define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
 define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v4f32_v4i100:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x30, x25, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x30, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -952,23 +944,32 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset w30, -64
 ; CHECK-NEXT:    .cfi_offset b8, -72
 ; CHECK-NEXT:    .cfi_offset b9, -80
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-NEXT:    mov x25, #68719476735 // =0xfffffffff
+; CHECK-NEXT:    fmov s9, w8
 ; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s0, s9
 ; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    csel x19, x25, x9, gt
+; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov x25, #68719476735 // =0xfffffffff
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    csel x21, x25, x9, gt
+; CHECK-NEXT:    csinv x22, x8, xzr, le
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    csel x19, x25, x9, gt
-; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s8, v0.s[1]
@@ -977,40 +978,27 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
 ; CHECK-NEXT:    fcmp s0, s9
 ; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csel x21, x25, x9, gt
-; CHECK-NEXT:    csinv x22, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    csel x23, x25, x9, gt
 ; CHECK-NEXT:    csinv x24, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x20
-; CHECK-NEXT:    mov x3, x19
-; CHECK-NEXT:    mov x4, x22
-; CHECK-NEXT:    mov x5, x21
-; CHECK-NEXT:    mov x6, x24
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    mov x7, x23
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov x2, x22
+; CHECK-NEXT:    mov x3, x21
+; CHECK-NEXT:    mov x4, x24
+; CHECK-NEXT:    mov x5, x23
+; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csel x1, x25, x9, gt
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ldp x30, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    add sp, sp, #112
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    mov x1, x19
+; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x7, x25, x9, gt
+; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x30, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    csinv x6, x8, xzr, le
+; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
     %x = call <4 x i100> @llvm.fptoui.sat.v4f32.v4i100(<4 x float> %f)
     ret <4 x i100> %x
@@ -1019,13 +1007,13 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v4f32_v4i128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1035,22 +1023,31 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    .cfi_offset w30, -64
 ; CHECK-NEXT:    .cfi_offset b8, -72
 ; CHECK-NEXT:    .cfi_offset b9, -80
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
+; CHECK-NEXT:    fmov s9, w8
 ; CHECK-NEXT:    mov s8, v0.s[1]
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    csel x8, xzr, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s0, s9
 ; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    csinv x19, x9, xzr, le
+; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    fmov s9, w8
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-NEXT:    fcmp s8, s9
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    csinv x21, x9, xzr, le
+; CHECK-NEXT:    csinv x22, x8, xzr, le
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    csinv x19, x9, xzr, le
-; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s8, v0.s[1]
@@ -1059,40 +1056,27 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-NEXT:    fcmp s0, s9
 ; CHECK-NEXT:    fmov s0, s8
-; CHECK-NEXT:    csinv x21, x9, xzr, le
-; CHECK-NEXT:    csinv x22, x8, xzr, le
+; CHECK-NEXT:    csinv x23, x9, xzr, le
+; CHECK-NEXT:    csinv x24, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    csinv x23, x9, xzr, le
-; CHECK-NEXT:    csinv x24, x8, xzr, le
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    mov x4, x21
-; CHECK-NEXT:    mov x5, x22
-; CHECK-NEXT:    mov x6, x23
-; CHECK-NEXT:    fcmp s0, #0.0
-; CHECK-NEXT:    mov x7, x24
-; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s0, s9
-; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csinv x1, x9, xzr, le
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    add sp, sp, #112
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    csinv x6, x9, xzr, le
+; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    csinv x7, x8, xzr, le
+; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
     %x = call <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float> %f)
     ret <4 x i128> %x
@@ -1261,37 +1245,33 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    .cfi_offset b8, -40
 ; CHECK-NEXT:    .cfi_offset b9, -48
-; CHECK-NEXT:    mov d8, v0.d[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov d0, d8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixunsdfti
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
-; CHECK-NEXT:    fcmp d8, #0.0
 ; CHECK-NEXT:    mov x21, #68719476735 // =0xfffffffff
 ; CHECK-NEXT:    fmov d9, x8
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    mov d8, v0.d[1]
+; CHECK-NEXT:    fcmp d0, #0.0
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d8, d9
+; CHECK-NEXT:    fcmp d0, d9
+; CHECK-NEXT:    fmov d0, d8
 ; CHECK-NEXT:    csel x19, x21, x9, gt
 ; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x20
-; CHECK-NEXT:    mov x3, x19
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    fcmp d8, #0.0
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d0, d9
-; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csel x1, x21, x9, gt
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcmp d8, d9
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    mov x1, x19
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x3, x21, x9, gt
 ; CHECK-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    csinv x2, x8, xzr, le
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
     %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
@@ -1311,36 +1291,32 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    .cfi_offset b8, -40
 ; CHECK-NEXT:    .cfi_offset b9, -48
-; CHECK-NEXT:    mov d8, v0.d[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fmov d0, d8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
-; CHECK-NEXT:    fcmp d8, #0.0
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
 ; CHECK-NEXT:    fmov d9, x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    mov d8, v0.d[1]
+; CHECK-NEXT:    fcmp d0, #0.0
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
-; CHECK-NEXT:    fcmp d8, d9
+; CHECK-NEXT:    fcmp d0, d9
+; CHECK-NEXT:    fmov d0, d8
 ; CHECK-NEXT:    csinv x19, x9, xzr, le
 ; CHECK-NEXT:    csinv x20, x8, xzr, le
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp d8, #0.0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp d0, d9
+; CHECK-NEXT:    csel x8, xzr, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp d8, d9
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csinv x1, x9, xzr, le
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    csinv x2, x9, xzr, le
+; CHECK-NEXT:    csinv x3, x8, xzr, le
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
     %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
@@ -1570,7 +1546,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -72
 ; CHECK-NEXT:    .cfi_offset b9, -80
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h1, v0.h[2]
+; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    fcvt s8, h1
 ; CHECK-NEXT:    fmov s0, s8
@@ -1580,7 +1556,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    fmov s9, w8
 ; CHECK-NEXT:    mov x25, #68719476735 // =0xfffffffff
-; CHECK-NEXT:    mov h0, v0.h[1]
+; CHECK-NEXT:    mov h0, v0.h[2]
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    fcmp s8, s9
@@ -1589,9 +1565,8 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    csinv x20, x9, xzr, le
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
 ; CHECK-NEXT:    fcmp s8, s9
@@ -1600,8 +1575,9 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    csinv x22, x8, xzr, le
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov h0, v0.h[3]
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
 ; CHECK-NEXT:    fcmp s8, s9
@@ -1611,25 +1587,22 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x2, x22
-; CHECK-NEXT:    mov x3, x21
-; CHECK-NEXT:    mov x4, x20
-; CHECK-NEXT:    mov x5, x19
-; CHECK-NEXT:    mov x6, x24
-; CHECK-NEXT:    mov x7, x23
+; CHECK-NEXT:    mov x2, x20
+; CHECK-NEXT:    mov x3, x19
+; CHECK-NEXT:    mov x4, x22
+; CHECK-NEXT:    mov x5, x21
 ; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
 ; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov x0, x24
+; CHECK-NEXT:    mov x1, x23
 ; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x7, x25, x9, gt
 ; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csel x1, x25, x9, gt
-; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ldp x30, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    csinv x6, x8, xzr, le
 ; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
     %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f)
@@ -1656,16 +1629,15 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    .cfi_offset b8, -72
 ; CHECK-NEXT:    .cfi_offset b9, -80
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    fcvt s8, h0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    fcvt s8, h1
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov w8, #2139095039 // =0x7f7fffff
 ; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    fmov s9, w8
-; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    csel x9, xzr, x1, lt
 ; CHECK-NEXT:    csel x8, xzr, x0, lt
 ; CHECK-NEXT:    fcmp s8, s9
@@ -1676,7 +1648,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    mov h0, v0.h[2]
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-NEXT:    fcmp s8, s9
@@ -1685,8 +1657,9 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    csinv x22, x8, xzr, le
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    fcmp s8, #0.0
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov h0, v0.h[3]
 ; CHECK-NEXT:    csel x8, xzr, x1, lt
 ; CHECK-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-NEXT:    fcmp s8, s9
@@ -1696,25 +1669,22 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    fcmp s8, #0.0
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    mov x4, x21
-; CHECK-NEXT:    mov x5, x22
-; CHECK-NEXT:    mov x6, x23
-; CHECK-NEXT:    mov x7, x24
-; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    csel x8, xzr, x0, lt
-; CHECK-NEXT:    csel x9, xzr, x1, lt
-; CHECK-NEXT:    fcmp s8, s9
-; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x2, x21
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    mov x4, x23
+; CHECK-NEXT:    mov x5, x24
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    csinv x8, x8, xzr, le
-; CHECK-NEXT:    csinv x1, x9, xzr, le
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    csinv x6, x9, xzr, le
 ; CHECK-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    csinv x7, x8, xzr, le
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
     %x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f)

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
index e93fb40897078..ed84ec2ba9eae 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
@@ -473,52 +473,37 @@ define <33 x i8> @test_ldnp_v33i8(ptr %A) {
 define <4 x i65> @test_ldnp_v4i65(ptr %A) {
 ; CHECK-LABEL: test_ldnp_v4i65:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ldp x8, x9, [x0, #8]
-; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    ldr x10, [x0, #24]
+; CHECK-NEXT:    ldp x8, x9, [x0, #16]
 ; CHECK-NEXT:    ldrb w11, [x0, #32]
-; CHECK-NEXT:    and x1, x8, #0x1
-; CHECK-NEXT:    extr x2, x9, x8, #1
-; CHECK-NEXT:    extr x4, x10, x9, #2
-; CHECK-NEXT:    mov.d v0[1], x1
-; CHECK-NEXT:    extr x6, x11, x10, #3
-; CHECK-NEXT:    ubfx x3, x9, #1, #1
-; CHECK-NEXT:    ubfx x5, x10, #2, #1
+; CHECK-NEXT:    ldp x0, x10, [x0]
 ; CHECK-NEXT:    ubfx x7, x11, #3, #1
-; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    extr x4, x9, x8, #2
+; CHECK-NEXT:    extr x6, x11, x9, #3
+; CHECK-NEXT:    ubfx x3, x8, #1, #1
+; CHECK-NEXT:    extr x2, x8, x10, #1
+; CHECK-NEXT:    ubfx x5, x9, #2, #1
+; CHECK-NEXT:    and x1, x10, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test_ldnp_v4i65:
 ; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    ldp x10, x9, [x0]
-; CHECK-BE-NEXT:    ldrb w8, [x0, #32]
-; CHECK-BE-NEXT:    ldp x12, x11, [x0, #16]
-; CHECK-BE-NEXT:    lsr x13, x10, #56
-; CHECK-BE-NEXT:    orr x7, x8, x11, lsl #8
-; CHECK-BE-NEXT:    extr x8, x10, x9, #56
-; CHECK-BE-NEXT:    extr x11, x12, x11, #56
-; CHECK-BE-NEXT:    lsr x14, x12, #56
-; CHECK-BE-NEXT:    extr x15, x9, x12, #56
-; CHECK-BE-NEXT:    lsr x10, x10, #59
-; CHECK-BE-NEXT:    extr x1, x13, x8, #3
-; CHECK-BE-NEXT:    lsr x8, x9, #56
-; CHECK-BE-NEXT:    ubfx x12, x12, #57, #1
-; CHECK-BE-NEXT:    ubfx x9, x9, #58, #1
-; CHECK-BE-NEXT:    extr x5, x14, x11, #1
-; CHECK-BE-NEXT:    and x11, x11, #0x1
-; CHECK-BE-NEXT:    fmov d0, x10
-; CHECK-BE-NEXT:    fmov d2, x12
-; CHECK-BE-NEXT:    fmov d3, x11
-; CHECK-BE-NEXT:    fmov d1, x9
-; CHECK-BE-NEXT:    extr x3, x8, x15, #2
-; CHECK-BE-NEXT:    mov v0.d[1], x1
-; CHECK-BE-NEXT:    mov v2.d[1], x5
-; CHECK-BE-NEXT:    mov v3.d[1], x7
-; CHECK-BE-NEXT:    mov v1.d[1], x3
-; CHECK-BE-NEXT:    fmov x0, d0
-; CHECK-BE-NEXT:    fmov x4, d2
-; CHECK-BE-NEXT:    fmov x6, d3
-; CHECK-BE-NEXT:    fmov x2, d1
+; CHECK-BE-NEXT:    ldp x9, x8, [x0]
+; CHECK-BE-NEXT:    ldrb w12, [x0, #32]
+; CHECK-BE-NEXT:    ldp x10, x11, [x0, #16]
+; CHECK-BE-NEXT:    extr x13, x9, x8, #56
+; CHECK-BE-NEXT:    lsr x14, x9, #56
+; CHECK-BE-NEXT:    lsr x16, x8, #56
+; CHECK-BE-NEXT:    extr x15, x8, x10, #56
+; CHECK-BE-NEXT:    orr x7, x12, x11, lsl #8
+; CHECK-BE-NEXT:    extr x11, x10, x11, #56
+; CHECK-BE-NEXT:    lsr x12, x10, #56
+; CHECK-BE-NEXT:    extr x1, x14, x13, #3
+; CHECK-BE-NEXT:    lsr x0, x9, #59
+; CHECK-BE-NEXT:    ubfx x2, x8, #58, #1
+; CHECK-BE-NEXT:    ubfx x4, x10, #57, #1
+; CHECK-BE-NEXT:    extr x3, x16, x15, #2
+; CHECK-BE-NEXT:    extr x5, x12, x11, #1
+; CHECK-BE-NEXT:    and x6, x11, #0x1
 ; CHECK-BE-NEXT:    ret
   %lv = load <4 x i65>, ptr %A, align 8, !nontemporal !0
   ret <4 x i65> %lv

diff  --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 84179d3694a9d..fa0447c2c5d79 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -483,21 +483,18 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
 define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; CHECK-LABEL: v2i128:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    adds x8, x0, x4
+; CHECK-NEXT:    adcs x9, x1, x5
+; CHECK-NEXT:    asr x10, x9, #63
+; CHECK-NEXT:    eor x11, x10, #0x8000000000000000
+; CHECK-NEXT:    csel x0, x10, x8, vs
+; CHECK-NEXT:    csel x1, x11, x9, vs
 ; CHECK-NEXT:    adds x8, x2, x6
 ; CHECK-NEXT:    adcs x9, x3, x7
 ; CHECK-NEXT:    asr x10, x9, #63
 ; CHECK-NEXT:    eor x11, x10, #0x8000000000000000
 ; CHECK-NEXT:    csel x2, x10, x8, vs
 ; CHECK-NEXT:    csel x3, x11, x9, vs
-; CHECK-NEXT:    adds x8, x0, x4
-; CHECK-NEXT:    adcs x9, x1, x5
-; CHECK-NEXT:    asr x10, x9, #63
-; CHECK-NEXT:    csel x8, x10, x8, vs
-; CHECK-NEXT:    eor x11, x10, #0x8000000000000000
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    csel x1, x11, x9, vs
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
   ret <2 x i128> %z

diff  --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index aca9e58c1a1ba..d8b2762cf15e9 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -486,21 +486,18 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
 define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; CHECK-LABEL: v2i128:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x0, x4
+; CHECK-NEXT:    sbcs x9, x1, x5
+; CHECK-NEXT:    asr x10, x9, #63
+; CHECK-NEXT:    eor x11, x10, #0x8000000000000000
+; CHECK-NEXT:    csel x0, x10, x8, vs
+; CHECK-NEXT:    csel x1, x11, x9, vs
 ; CHECK-NEXT:    subs x8, x2, x6
 ; CHECK-NEXT:    sbcs x9, x3, x7
 ; CHECK-NEXT:    asr x10, x9, #63
 ; CHECK-NEXT:    eor x11, x10, #0x8000000000000000
 ; CHECK-NEXT:    csel x2, x10, x8, vs
 ; CHECK-NEXT:    csel x3, x11, x9, vs
-; CHECK-NEXT:    subs x8, x0, x4
-; CHECK-NEXT:    sbcs x9, x1, x5
-; CHECK-NEXT:    asr x10, x9, #63
-; CHECK-NEXT:    csel x8, x10, x8, vs
-; CHECK-NEXT:    eor x11, x10, #0x8000000000000000
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    csel x1, x11, x9, vs
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
   ret <2 x i128> %z

diff  --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index 3c6c1f1618a95..afc0d8704ebac 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -477,17 +477,14 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
 define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; CHECK-LABEL: v2i128:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    adds x8, x0, x4
+; CHECK-NEXT:    adcs x9, x1, x5
+; CHECK-NEXT:    csinv x0, x8, xzr, lo
+; CHECK-NEXT:    csinv x1, x9, xzr, lo
 ; CHECK-NEXT:    adds x8, x2, x6
 ; CHECK-NEXT:    adcs x9, x3, x7
 ; CHECK-NEXT:    csinv x2, x8, xzr, lo
 ; CHECK-NEXT:    csinv x3, x9, xzr, lo
-; CHECK-NEXT:    adds x8, x0, x4
-; CHECK-NEXT:    adcs x9, x1, x5
-; CHECK-NEXT:    csinv x8, x8, xzr, lo
-; CHECK-NEXT:    csinv x1, x9, xzr, lo
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
   ret <2 x i128> %z

diff  --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 363c12e12cb8b..dfcbe96ea948a 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -475,17 +475,14 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
 define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; CHECK-LABEL: v2i128:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x0, x4
+; CHECK-NEXT:    sbcs x9, x1, x5
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    csel x1, xzr, x9, lo
 ; CHECK-NEXT:    subs x8, x2, x6
 ; CHECK-NEXT:    sbcs x9, x3, x7
 ; CHECK-NEXT:    csel x2, xzr, x8, lo
 ; CHECK-NEXT:    csel x3, xzr, x9, lo
-; CHECK-NEXT:    subs x8, x0, x4
-; CHECK-NEXT:    sbcs x9, x1, x5
-; CHECK-NEXT:    csel x8, xzr, x8, lo
-; CHECK-NEXT:    csel x1, xzr, x9, lo
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
   ret <2 x i128> %z

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 570834fb67010..81b6a6940a7d6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -2318,43 +2318,41 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    strb.w r4, [r9, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
 ; CHECK-NEXT:    vcmp.f32 s17, s22
-; CHECK-NEXT:    vmov r1, s1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    mvnlt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
-; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    and r1, r3, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
 ; CHECK-NEXT:    strb.w r2, [r9, #24]
@@ -3687,268 +3685,257 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #48
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI40_0
-; CHECK-NEXT:    vmov r5, r7, d8
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    vmov r9, r3, d0
-; CHECK-NEXT:    str r0, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    vmov r10, r9, d8
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    vmov r7, r3, d0
+; CHECK-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    str r7, [sp, #44] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI40_1
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    vmov r8, r3, d0
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r11, r3
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    csel r4, r2, r4, ne
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str.w r4, [r10, #8]
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    str.w r9, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    str r4, [r6, #8]
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    ldr r5, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    str.w r8, [sp, #32] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    csel r6, r1, r0, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    movne.w r7, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    str r6, [r0, #4]
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str r7, [r6, #4]
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r10, r8
+; CHECK-NEXT:    str.w r11, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    csel r6, r1, r0, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    str.w r10, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    str.w r9, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    vmov r9, r8, d9
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    str r6, [r0]
-; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str r7, [r6]
+; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr.w r10, [sp, #32] @ 4-byte Reload
 ; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r4, r10
-; CHECK-NEXT:    str.w r10, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r5, r11
-; CHECK-NEXT:    str.w r11, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov r10, r3
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    str r1, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    mvneq r10, #7
+; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r11, r1, r11, ne
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #7
+; CHECK-NEXT:    movne.w r11, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r10
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    csel r10, r4, r0, ne
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r10, #0
 ; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str.w r10, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    lsrl r10, r11, #28
 ; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r4, r10
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r0, r11, r4, lsl #4
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r0, s1
 ; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r5, r6
-; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    strd r10, r0, [r6, #16]
 ; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr.w r11, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r11, r1, r0, ne
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r11, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    mvneq r0, #7
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r11
-; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    lsrl r4, r1, #28
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    str r1, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    movne r0, #7
 ; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r7, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r6, r1, r0, ne
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    and r1, r10, #15
-; CHECK-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT:    lsrl r6, r1, #28
-; CHECK-NEXT:    strd r4, r0, [r2, #16]
-; CHECK-NEXT:    mov r8, r2
-; CHECK-NEXT:    strb r6, [r2, #24]
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    and r1, r5, #15
+; CHECK-NEXT:    mov r8, r6
+; CHECK-NEXT:    lsrl r4, r1, #28
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    strb r4, [r6, #24]
 ; CHECK-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    mvneq r0, #7
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #7
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    and r0, r4, #15
-; CHECK-NEXT:    orr.w r0, r0, r11, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
 ; CHECK-NEXT:    str.w r0, [r8, #12]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9}
@@ -5433,7 +5420,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    .pad #32
 ; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    vcvtb.f32.f16 s21, s19
 ; CHECK-NEXT:    vcvtt.f32.f16 s24, s19
 ; CHECK-NEXT:    vmov r0, s21
@@ -5442,13 +5429,13 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vcvtb.f32.f16 s30, s18
 ; CHECK-NEXT:    vldr s20, .LCPI50_2
 ; CHECK-NEXT:    vmov r8, s24
-; CHECK-NEXT:    vmov r4, s26
+; CHECK-NEXT:    vmov r9, s26
 ; CHECK-NEXT:    vcvtt.f32.f16 s22, s18
 ; CHECK-NEXT:    vmov r6, s28
-; CHECK-NEXT:    vmov r5, s30
+; CHECK-NEXT:    vmov r7, s30
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vldr s18, .LCPI50_3
-; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r5, r3
 ; CHECK-NEXT:    vcmp.f32 s21, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s21, s20
@@ -5464,7 +5451,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s21, s20
-; CHECK-NEXT:    str.w r2, [r9, #83]
+; CHECK-NEXT:    str.w r2, [r11, #83]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -5476,7 +5463,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #79]
+; CHECK-NEXT:    str.w r1, [r11, #79]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s21, s20
@@ -5487,11 +5474,11 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r9, #75]
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    str.w r0, [r11, #75]
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s30, s18
-; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    mov r7, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    it lt
@@ -5506,7 +5493,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    str.w r2, [r9, #58]
+; CHECK-NEXT:    str.w r2, [r11, #58]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -5518,7 +5505,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #54]
+; CHECK-NEXT:    str.w r1, [r11, #54]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s30, s20
@@ -5529,7 +5516,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r9, #50]
+; CHECK-NEXT:    str.w r0, [r11, #50]
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s28, s18
@@ -5548,7 +5535,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    str.w r2, [r9, #33]
+; CHECK-NEXT:    str.w r2, [r11, #33]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -5560,7 +5547,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #29]
+; CHECK-NEXT:    str.w r1, [r11, #29]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s28, s20
@@ -5571,8 +5558,8 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r9, #25]
-; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    str.w r0, [r11, #25]
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s26, s18
 ; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
@@ -5590,7 +5577,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    str.w r2, [r9, #8]
+; CHECK-NEXT:    str.w r2, [r11, #8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -5602,7 +5589,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #4]
+; CHECK-NEXT:    str.w r1, [r11, #4]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s26, s20
@@ -5613,7 +5600,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r9]
+; CHECK-NEXT:    str.w r0, [r11]
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s24, s18
@@ -5633,69 +5620,68 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r7, #7
+; CHECK-NEXT:    mvnlt r5, #7
 ; CHECK-NEXT:    vcmp.f32 s21, s20
-; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #7
+; CHECK-NEXT:    movgt r5, #7
 ; CHECK-NEXT:    vcmp.f32 s21, s21
-; CHECK-NEXT:    mov r10, r2
+; CHECK-NEXT:    mov r8, r2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r7, #0
-; CHECK-NEXT:    and r0, r7, #15
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    and r0, r5, #15
 ; CHECK-NEXT:    orr.w r1, r0, r6, lsl #4
 ; CHECK-NEXT:    vmov r0, s22
-; CHECK-NEXT:    str.w r1, [r9, #87]
+; CHECK-NEXT:    str.w r1, [r11, #87]
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s22, s18
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r8, #-1
+; CHECK-NEXT:    movgt.w r10, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s30, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    movvs.w r10, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r5, #7
+; CHECK-NEXT:    mvnlt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r5, #7
+; CHECK-NEXT:    movgt r7, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    and r0, r5, #15
-; CHECK-NEXT:    orr.w r0, r0, r8, lsl #4
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    and r0, r7, #15
+; CHECK-NEXT:    orr.w r0, r0, r10, lsl #4
 ; CHECK-NEXT:    vcvtt.f32.f16 s30, s17
-; CHECK-NEXT:    str.w r0, [r9, #62]
+; CHECK-NEXT:    str.w r0, [r11, #62]
 ; CHECK-NEXT:    vmov r0, s30
-; CHECK-NEXT:    mov r7, r3
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s30, s18
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    vcmp.f32 s30, s30
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
@@ -5715,8 +5701,9 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    str.w r0, [r9, #37]
+; CHECK-NEXT:    str.w r0, [r11, #37]
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s16, s18
@@ -5732,26 +5719,26 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    vcmp.f32 s26, s18
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r5, #7
+; CHECK-NEXT:    mvnlt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r5, #7
+; CHECK-NEXT:    movgt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s18
-; CHECK-NEXT:    and r5, r5, #15
+; CHECK-NEXT:    and r7, r7, #15
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    orr.w r5, r5, r0, lsl #4
-; CHECK-NEXT:    str.w r5, [r9, #12]
+; CHECK-NEXT:    orr.w r7, r7, r0, lsl #4
+; CHECK-NEXT:    str.w r7, [r11, #12]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    movlt.w r9, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    b.w .LBB50_3
 ; CHECK-NEXT:    .p2align 2
@@ -5766,181 +5753,176 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:  .LBB50_3:
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r11, #0
+; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsrl r6, r11, #28
+; CHECK-NEXT:    lsrl r6, r9, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r10, #0
-; CHECK-NEXT:    orr.w r5, r11, r10, lsl #4
-; CHECK-NEXT:    str.w r5, [r9, #95]
-; CHECK-NEXT:    str.w r6, [r9, #91]
+; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    orr.w r7, r9, r8, lsl #4
+; CHECK-NEXT:    str.w r7, [r11, #95]
+; CHECK-NEXT:    str.w r6, [r11, #91]
 ; CHECK-NEXT:    vcmp.f32 s24, s18
-; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r6, #7
+; CHECK-NEXT:    mvnlt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r6, #7
+; CHECK-NEXT:    movgt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r6, #0
-; CHECK-NEXT:    and r5, r6, #15
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    and r7, r7, #15
 ; CHECK-NEXT:    vcmp.f32 s22, s18
-; CHECK-NEXT:    lsrl r10, r5, #28
+; CHECK-NEXT:    lsrl r8, r7, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    strb.w r10, [r9, #99]
+; CHECK-NEXT:    strb.w r8, [r11, #99]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r7, #7
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #7
+; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    vcmp.f32 s22, s18
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    lsrl r10, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r4, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r4, r7
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT:    str.w r7, [r11, #70]
+; CHECK-NEXT:    str.w r10, [r11, #66]
 ; CHECK-NEXT:    vcmp.f32 s22, s18
-; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    vmov r5, s1
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    mvnlt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    lsrl r8, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    movgt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r4, #0
-; CHECK-NEXT:    orr.w r6, r5, r4, lsl #4
+; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    and r5, r7, #15
-; CHECK-NEXT:    lsrl r4, r5, #28
-; CHECK-NEXT:    str.w r6, [r9, #70]
-; CHECK-NEXT:    str.w r8, [r9, #66]
 ; CHECK-NEXT:    vcmp.f32 s30, s18
-; CHECK-NEXT:    strb.w r4, [r9, #74]
+; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    mov r5, r4
+; CHECK-NEXT:    strb.w r6, [r11, #74]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r4, #7
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r4, #7
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    vcmp.f32 s30, s18
-; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r4, r5, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r7, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r4
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT:    str.w r7, [r11, #45]
+; CHECK-NEXT:    str.w r4, [r11, #41]
 ; CHECK-NEXT:    vcmp.f32 s30, s18
-; CHECK-NEXT:    ldr.w r12, [sp] @ 4-byte Reload
-; CHECK-NEXT:    vmov r5, s1
-; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    mvnlt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsrl r12, r5, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    movgt r7, #7
 ; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r6, #0
-; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
-; CHECK-NEXT:    and r5, r4, #15
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    and r5, r7, #15
 ; CHECK-NEXT:    vcmp.f32 s16, s18
 ; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    str.w r7, [r9, #45]
-; CHECK-NEXT:    str.w r12, [r9, #41]
-; CHECK-NEXT:    strb.w r6, [r9, #49]
+; CHECK-NEXT:    strb.w r6, [r11, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
 ; CHECK-NEXT:    vcmp.f32 s16, s18
-; CHECK-NEXT:    vmov r1, s1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    strd r0, r1, [r11, #16]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    mvnlt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
-; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    and r1, r3, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
-; CHECK-NEXT:    strb.w r2, [r9, #24]
+; CHECK-NEXT:    strb.w r2, [r11, #24]
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index 2b6d0da531704..5ab184a066e49 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -1879,26 +1879,16 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    strb.w r6, [r8, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, #0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
 ; CHECK-NEXT:    vcmp.f32 s17, #0
-; CHECK-NEXT:    vmov r1, s1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vcmp.f32 s17, s20
@@ -1906,7 +1896,15 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    strd r0, r1, [r8, #16]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt r3, #15
 ; CHECK-NEXT:    and r1, r3, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
 ; CHECK-NEXT:    strb.w r2, [r8, #24]
@@ -2925,195 +2923,191 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI40_0
 ; CHECK-NEXT:    vmov r6, r5, d8
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmov r2, r9, d0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov r2, r7, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r10, r2
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    mov r9, r2
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI40_1
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    str r2, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    mov r10, r3
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    strd r1, r0, [sp, #20] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r0, r2, r4, ne
+; CHECK-NEXT:    csel r0, r2, r8, ne
 ; CHECK-NEXT:    str r3, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r8, #8]
+; CHECK-NEXT:    str r0, [r4, #8]
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r11, r6
+; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r7, r6
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r8, #4]
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    str r0, [r4, #4]
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r6, r8
-; CHECK-NEXT:    strd r8, r7, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT:    strd r4, r11, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r5, r11
-; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    vmov r8, r11, d9
 ; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str r0, [r6]
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    str r0, [r4]
+; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r5, r9
+; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    mov r6, r5
-; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r5, r7
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r6, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    strd r0, r2, [sp, #20] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r0, r3, r7, ne
-; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #15
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    add.w r12, sp, #16
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    stm.w r12, {r0, r2, r3} @ 12-byte Folded Spill
+; CHECK-NEXT:    csel r9, r1, r10, ne
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #-1
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r10, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    ldr r5, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r7
-; CHECK-NEXT:    vmov r0, s1
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r6, r10
-; CHECK-NEXT:    str.w r10, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    str.w r9, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    lsrl r4, r9, #28
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    csel r6, r1, r0, ne
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r10, r4
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    ldr r5, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r0, r9, r6, lsl #4
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    lsrl r10, r5, #28
+; CHECK-NEXT:    strd r4, r0, [r5, #16]
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    ldr.w r9, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
 ; CHECK-NEXT:    ldr.w r11, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r9, r7
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    ldr.w r8, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r1, r5, r0, lsl #4
-; CHECK-NEXT:    strd r10, r1, [r2, #16]
-; CHECK-NEXT:    mov r8, r2
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    and r1, r1, #15
-; CHECK-NEXT:    lsrl r0, r1, #28
-; CHECK-NEXT:    strb r0, [r2, #24]
+; CHECK-NEXT:    movne r0, #15
+; CHECK-NEXT:    and r1, r0, #15
+; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    strb r6, [r5, #24]
 ; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    ldrd r3, r2, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    ldr r4, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #40] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #15
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, r4, lsl #4
-; CHECK-NEXT:    str.w r0, [r8, #12]
+; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
+; CHECK-NEXT:    str r0, [r5, #12]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -4216,7 +4210,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    .pad #32
 ; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vcvtb.f32.f16 s30, s19
 ; CHECK-NEXT:    vcvtb.f32.f16 s28, s18
 ; CHECK-NEXT:    vmov r0, s30
@@ -4224,14 +4218,14 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vcvtb.f32.f16 s24, s16
 ; CHECK-NEXT:    vcvtb.f32.f16 s26, s17
 ; CHECK-NEXT:    vldr s20, .LCPI50_1
-; CHECK-NEXT:    vmov r8, s22
-; CHECK-NEXT:    vmov r5, s28
+; CHECK-NEXT:    vmov r4, s22
+; CHECK-NEXT:    vmov r7, s28
 ; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
-; CHECK-NEXT:    vmov r4, s24
+; CHECK-NEXT:    vmov r9, s24
 ; CHECK-NEXT:    vmov r6, s26
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s30, #0
-; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r5, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    it lt
@@ -4242,7 +4236,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    str.w r2, [r9, #83]
+; CHECK-NEXT:    str.w r2, [r8, #83]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4250,18 +4244,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #79]
+; CHECK-NEXT:    str.w r1, [r8, #79]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r9, #75]
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    str.w r0, [r8, #75]
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s28, #0
-; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    mov r7, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
@@ -4272,7 +4266,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    str.w r2, [r9, #58]
+; CHECK-NEXT:    str.w r2, [r8, #58]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4280,14 +4274,14 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #54]
+; CHECK-NEXT:    str.w r1, [r8, #54]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r9, #50]
+; CHECK-NEXT:    str.w r0, [r8, #50]
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s26, #0
@@ -4302,7 +4296,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    str.w r2, [r9, #33]
+; CHECK-NEXT:    str.w r2, [r8, #33]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4310,18 +4304,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #29]
+; CHECK-NEXT:    str.w r1, [r8, #29]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r9, #25]
-; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    str.w r0, [r8, #25]
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s24, #0
-; CHECK-NEXT:    mov r4, r3
+; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    it lt
@@ -4332,7 +4326,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    str.w r2, [r9, #8]
+; CHECK-NEXT:    str.w r2, [r8, #8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4340,21 +4334,21 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r9, #4]
+; CHECK-NEXT:    str.w r1, [r8, #4]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r9]
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    str.w r0, [r8]
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4363,177 +4357,174 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #15
-; CHECK-NEXT:    and r0, r7, #15
-; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    movgt r5, #15
+; CHECK-NEXT:    and r0, r5, #15
+; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    orr.w r1, r0, r6, lsl #4
 ; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    mov r10, r2
-; CHECK-NEXT:    str.w r1, [r9, #87]
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    str.w r1, [r8, #87]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r7, r3
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r8, #-1
+; CHECK-NEXT:    movgt.w r10, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r5, #15
-; CHECK-NEXT:    and r0, r5, #15
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    and r0, r7, #15
 ; CHECK-NEXT:    vcvtt.f32.f16 s28, s17
-; CHECK-NEXT:    orr.w r0, r0, r8, lsl #4
-; CHECK-NEXT:    str.w r0, [r9, #62]
+; CHECK-NEXT:    orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT:    str.w r0, [r8, #62]
 ; CHECK-NEXT:    vmov r0, s28
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s28, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vcmp.f32 s26, #0
 ; CHECK-NEXT:    mov r1, r0
 ; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r0, #15
 ; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    str.w r0, [r9, #37]
+; CHECK-NEXT:    str.w r0, [r8, #37]
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s24, #0
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    vcmp.f32 s24, #0
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r4, #15
-; CHECK-NEXT:    and r5, r4, #15
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    and r7, r7, #15
 ; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    orr.w r5, r5, r0, lsl #4
+; CHECK-NEXT:    orr.w r7, r7, r0, lsl #4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r5, [r9, #12]
+; CHECK-NEXT:    str.w r7, [r8, #12]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    movlt.w r9, #0
 ; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    lsrl r6, r11, #28
+; CHECK-NEXT:    lsrl r6, r9, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    orr.w r5, r11, r10, lsl #4
-; CHECK-NEXT:    str.w r5, [r9, #95]
-; CHECK-NEXT:    str.w r6, [r9, #91]
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    orr.w r7, r9, r4, lsl #4
+; CHECK-NEXT:    str.w r7, [r8, #95]
+; CHECK-NEXT:    str.w r6, [r8, #91]
 ; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r6, #15
-; CHECK-NEXT:    and r5, r6, #15
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    and r7, r7, #15
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    lsrl r10, r5, #28
+; CHECK-NEXT:    lsrl r4, r7, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb.w r10, [r9, #99]
+; CHECK-NEXT:    strb.w r4, [r8, #99]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r10, r5, #28
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r7
+; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT:    str.w r7, [r8, #70]
+; CHECK-NEXT:    str.w r10, [r8, #66]
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    vmov r5, s1
-; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsrl r8, r5, #28
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r4, #-1
-; CHECK-NEXT:    orr.w r6, r5, r4, lsl #4
+; CHECK-NEXT:    movgt r7, #15
 ; CHECK-NEXT:    and r5, r7, #15
-; CHECK-NEXT:    lsrl r4, r5, #28
-; CHECK-NEXT:    str.w r6, [r9, #70]
-; CHECK-NEXT:    str.w r8, [r9, #66]
 ; CHECK-NEXT:    vcmp.f32 s28, #0
-; CHECK-NEXT:    strb.w r4, [r9, #74]
+; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    strb.w r6, [r8, #74]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #15
-; CHECK-NEXT:    mov r12, r7
+; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    vcmp.f32 s28, #0
-; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r4, r11, #28
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r7, #-1
 ; CHECK-NEXT:    b.w .LBB50_2
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
@@ -4541,46 +4532,34 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    .long 0x717fffff @ float 1.26765052E+30
 ; CHECK-NEXT:    .p2align 1
 ; CHECK-NEXT:  .LBB50_2:
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r12
-; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
-; CHECK-NEXT:    vmov r5, s1
-; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    orr.w r7, r11, r6, lsl #4
+; CHECK-NEXT:    str.w r7, [r8, #45]
+; CHECK-NEXT:    str.w r4, [r8, #41]
 ; CHECK-NEXT:    vcmp.f32 s28, #0
-; CHECK-NEXT:    lsrl r4, r5, #28
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
-; CHECK-NEXT:    and r5, r12, #15
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    and r5, r7, #15
 ; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    str.w r7, [r9, #45]
-; CHECK-NEXT:    str.w r4, [r9, #41]
-; CHECK-NEXT:    strb.w r6, [r9, #49]
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    strb.w r6, [r8, #49]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
 ; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    vmov r1, s1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
@@ -4588,10 +4567,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
-; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    strd r0, r1, [r8, #16]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt r3, #15
 ; CHECK-NEXT:    and r1, r3, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
-; CHECK-NEXT:    strb.w r2, [r9, #24]
+; CHECK-NEXT:    strb.w r2, [r8, #24]
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
index f2c8440b177d8..55a621eaf4c9c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
@@ -207,10 +207,8 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sbcs.w r12, r3, r1
 ; CHECK-NEXT:    cset r12, lt
 ; CHECK-NEXT:    cmp.w r12, #0
-; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -486,10 +484,8 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sbcs.w r12, r3, r1
 ; CHECK-NEXT:    cset r12, lo
 ; CHECK-NEXT:    cmp.w r12, #0
-; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -772,10 +768,8 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sbcs.w r12, r1, r3
 ; CHECK-NEXT:    cset r12, lt
 ; CHECK-NEXT:    cmp.w r12, #0
-; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -1051,10 +1045,8 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sbcs.w r12, r1, r3
 ; CHECK-NEXT:    cset r12, lo
 ; CHECK-NEXT:    cmp.w r12, #0
-; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index d80dd5a673e20..85317e1fe4626 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -535,22 +535,20 @@ define void @vst3_v2i8(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    ldrb r2, [r0]
-; CHECK-NEXT:    mov r4, sp
-; CHECK-NEXT:    ldrb r3, [r0, #1]
-; CHECK-NEXT:    ldrb.w r12, [r0, #2]
-; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
-; CHECK-NEXT:    ldrb.w lr, [r0, #3]
-; CHECK-NEXT:    vmov r2, s0
-; CHECK-NEXT:    ldrb r5, [r0, #5]
+; CHECK-NEXT:    mov r5, sp
+; CHECK-NEXT:    ldrb r3, [r0, #2]
 ; CHECK-NEXT:    vmov.16 q0[0], r2
+; CHECK-NEXT:    ldrb.w r12, [r0, #1]
+; CHECK-NEXT:    ldrb.w lr, [r0, #3]
+; CHECK-NEXT:    vmov.16 q0[1], r3
+; CHECK-NEXT:    ldrb r4, [r0, #5]
 ; CHECK-NEXT:    ldrb r0, [r0, #4]
-; CHECK-NEXT:    vmov.16 q0[1], r12
 ; CHECK-NEXT:    vmov.16 q0[2], r0
 ; CHECK-NEXT:    add r0, sp, #8
-; CHECK-NEXT:    vmov.16 q0[3], r3
+; CHECK-NEXT:    vmov.16 q0[3], r12
 ; CHECK-NEXT:    vmov.16 q0[4], lr
-; CHECK-NEXT:    vmov.16 q0[5], r5
-; CHECK-NEXT:    vstrb.16 q0, [r4]
+; CHECK-NEXT:    vmov.16 q0[5], r4
+; CHECK-NEXT:    vstrb.16 q0, [r5]
 ; CHECK-NEXT:    vstrb.16 q0, [r0]
 ; CHECK-NEXT:    vldrh.u32 q0, [r0]
 ; CHECK-NEXT:    ldr r2, [sp]

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index f3a65c40031af..b36904495e878 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -238,27 +238,23 @@ define void @vst4_v2i16(ptr %src, ptr %dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    ldrh r3, [r0, #2]
-; CHECK-NEXT:    ldrh r2, [r0]
-; CHECK-NEXT:    ldrh.w r12, [r0, #10]
-; CHECK-NEXT:    ldrh.w lr, [r0, #4]
-; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
-; CHECK-NEXT:    ldrh r4, [r0, #12]
-; CHECK-NEXT:    ldrh r5, [r0, #6]
+; CHECK-NEXT:    ldrh r2, [r0, #4]
+; CHECK-NEXT:    ldrh r3, [r0, #8]
+; CHECK-NEXT:    ldrh.w r12, [r0, #12]
+; CHECK-NEXT:    ldrh.w lr, [r0, #2]
+; CHECK-NEXT:    ldrh r4, [r0, #6]
+; CHECK-NEXT:    ldrh r5, [r0, #10]
 ; CHECK-NEXT:    ldrh r6, [r0, #14]
-; CHECK-NEXT:    ldrh r0, [r0, #8]
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r12
-; CHECK-NEXT:    vmov r0, s4
-; CHECK-NEXT:    vmov.16 q1[0], r0
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    vmov.16 q1[1], lr
-; CHECK-NEXT:    vmov.16 q1[2], r0
-; CHECK-NEXT:    vmov.16 q1[3], r4
-; CHECK-NEXT:    vmov.16 q1[4], r3
-; CHECK-NEXT:    vmov.16 q1[5], r5
-; CHECK-NEXT:    vmov.16 q1[6], r12
-; CHECK-NEXT:    vmov.16 q1[7], r6
-; CHECK-NEXT:    vstrh.16 q1, [r1]
+; CHECK-NEXT:    ldrh r0, [r0]
+; CHECK-NEXT:    vmov.16 q0[0], r0
+; CHECK-NEXT:    vmov.16 q0[1], r2
+; CHECK-NEXT:    vmov.16 q0[2], r3
+; CHECK-NEXT:    vmov.16 q0[3], r12
+; CHECK-NEXT:    vmov.16 q0[4], lr
+; CHECK-NEXT:    vmov.16 q0[5], r4
+; CHECK-NEXT:    vmov.16 q0[6], r5
+; CHECK-NEXT:    vmov.16 q0[7], r6
+; CHECK-NEXT:    vstrh.16 q0, [r1]
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %l1 = load <2 x i16>, ptr %src, align 4
@@ -475,26 +471,22 @@ define void @vst4_v2i8(ptr %src, ptr %dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    ldrb r4, [r0, #5]
-; CHECK-NEXT:    ldrb r5, [r0, #4]
-; CHECK-NEXT:    ldrb r2, [r0]
-; CHECK-NEXT:    ldrb r3, [r0, #1]
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r4
-; CHECK-NEXT:    vmov r5, s0
-; CHECK-NEXT:    ldrb.w r12, [r0, #2]
-; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
-; CHECK-NEXT:    ldrb.w lr, [r0, #3]
-; CHECK-NEXT:    vmov r2, s0
-; CHECK-NEXT:    ldrb r6, [r0, #7]
-; CHECK-NEXT:    vmov.16 q0[0], r2
-; CHECK-NEXT:    ldrb r0, [r0, #6]
-; CHECK-NEXT:    vmov.16 q0[1], r12
-; CHECK-NEXT:    vmov.16 q0[2], r5
-; CHECK-NEXT:    vmov.16 q0[3], r0
-; CHECK-NEXT:    vmov.16 q0[4], r3
-; CHECK-NEXT:    vmov.16 q0[5], lr
-; CHECK-NEXT:    vmov.16 q0[6], r4
-; CHECK-NEXT:    vmov.16 q0[7], r6
+; CHECK-NEXT:    ldrb r4, [r0]
+; CHECK-NEXT:    ldrb r6, [r0, #2]
+; CHECK-NEXT:    vmov.16 q0[0], r4
+; CHECK-NEXT:    ldrb r2, [r0, #4]
+; CHECK-NEXT:    vmov.16 q0[1], r6
+; CHECK-NEXT:    ldrb r3, [r0, #6]
+; CHECK-NEXT:    vmov.16 q0[2], r2
+; CHECK-NEXT:    ldrb r5, [r0, #1]
+; CHECK-NEXT:    vmov.16 q0[3], r3
+; CHECK-NEXT:    ldrb.w r12, [r0, #5]
+; CHECK-NEXT:    ldrb.w lr, [r0, #7]
+; CHECK-NEXT:    vmov.16 q0[4], r5
+; CHECK-NEXT:    ldrb r0, [r0, #3]
+; CHECK-NEXT:    vmov.16 q0[5], r0
+; CHECK-NEXT:    vmov.16 q0[6], r12
+; CHECK-NEXT:    vmov.16 q0[7], lr
 ; CHECK-NEXT:    vstrb.16 q0, [r1]
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:


        


More information about the llvm-commits mailing list