[llvm] [MVE] Expand64BitShift - handle all constant shift amounts less than 32 (RFC) (PR #81261)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 08:05:30 PST 2024


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/81261

Expand64BitShift was always dropping to generic shift legalization if the shift amount type was larger than i64, even if the constant shift amount was actually very small. I've adjusted the constant bounds checks to work with APInt types so we can always perform the comparison.

This results in the MVE long shift instructions being used more often, and it looks like this is preventing some additional combines from happening - I'd appreciate any suggestions on what needs to be done - should the Expand64BitShift logic be different and fallback to generic shift legalization more often?

This came about while I was trying to extend the DAGTypeLegalizer::ExpandShift* helpers and need to move to consistently using the legal shift amount types instead of reusing the shift amount type from the original wider shift.

>From 6791a42f9463b0a9d3470f4578edadc48411e668 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 9 Feb 2024 15:54:38 +0000
Subject: [PATCH] [MVE] Expand64BitShift - handle all constant shift amounts
 less than 32 (RFC)

Expand64BitShift was always dropping to generic shift legalization if the shift amount type was larger than i64, even if the constant shift amount was actually very small. I've adjusted the constant bounds checks to work with APInt types so we can always perform the comparison.

This results in the MVE long shift instructions being used more often, and it looks like this is preventing some additional combines from happening - I'd appreciate any suggestions on what needs to be done - should the Expand64BitShift logic be different and fallback to generic shift legalization more often?

This came about while I was trying to extend the DAGTypeLegalizer::ExpandShift* helpers and need to move to consistently using the legal shift amount types instead of reusing the shift amount type from the original wider shift.
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |    4 +-
 .../CodeGen/Thumb2/mve-fptosi-sat-vector.ll   | 1703 +++++++++--------
 .../CodeGen/Thumb2/mve-fptoui-sat-vector.ll   | 1327 ++++++-------
 3 files changed, 1545 insertions(+), 1489 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b5c4a8a322ea7f..b98006ed0cb3f4 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -6702,8 +6702,8 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
 
     // If the shift amount is greater than 32 or has a greater bitwidth than 64
     // then do the default optimisation
-    if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
-        (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
+    if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
+        (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
       return SDValue();
 
     // Extract the lower 32 bits of the shift amount if it's not an i32
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 3ca01cfa3a8f77..570834fb670100 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -1821,44 +1821,42 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f32_v4i32_duplicate(<4 x float>
 define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i50:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    vmov r0, s19
 ; CHECK-NEXT:    vldr s20, .LCPI28_0
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    vmov r4, s16
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vmov r6, s16
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    movtlt r7, #65534
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movtlt r5, #65534
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    vldr s22, .LCPI28_1
 ; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    movtlt r5, #65534
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movtlt r7, #65534
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r7, #65535
-; CHECK-NEXT:    movtgt r7, #1
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #1
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r10, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    it lt
@@ -1866,109 +1864,103 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r5, #65535
-; CHECK-NEXT:    movtgt r5, #1
+; CHECK-NEXT:    movwgt r7, #65535
+; CHECK-NEXT:    movtgt r7, #1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    str.w r0, [r8]
+; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s22
-; CHECK-NEXT:    str.w r0, [r8]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    movgt.w r4, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s17
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itt vs
-; CHECK-NEXT:    movvs r6, #0
-; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    lsls r0, r5, #22
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    orr.w r0, r0, r6, lsr #10
-; CHECK-NEXT:    str.w r0, [r8, #20]
-; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r4, #0
 ; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r9, #-1
-; CHECK-NEXT:    vcmp.f32 s17, s17
+; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
+; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs.w r9, #0
-; CHECK-NEXT:    lsr.w r0, r9, #14
-; CHECK-NEXT:    orr.w r1, r0, r7, lsl #18
-; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    str.w r1, [r8, #8]
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    bfc r1, #18, #14
+; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    bfc r5, #18, #14
+; CHECK-NEXT:    mov r6, r9
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsll r4, r1, #22
+; CHECK-NEXT:    lsrl r6, r5, #28
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movwlt r10, #0
+; CHECK-NEXT:    movtlt r10, #65534
+; CHECK-NEXT:    vcmp.f32 s16, s22
+; CHECK-NEXT:    orrs r1, r5
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r10, #65535
+; CHECK-NEXT:    movtgt r10, #1
+; CHECK-NEXT:    str.w r1, [r8, #20]
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    lsrs r2, r5, #10
+; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    orr.w r2, r6, r4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movtlt r1, #65534
+; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s22
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r4, #0
-; CHECK-NEXT:    movtlt r4, #65534
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r4, #65535
-; CHECK-NEXT:    movtgt r4, #1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    str.w r2, [r8, #16]
+; CHECK-NEXT:    lsrs r2, r7, #10
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    strb.w r2, [r8, #24]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    ubfx r2, r7, #14, #4
-; CHECK-NEXT:    vcmp.f32 s16, s16
-; CHECK-NEXT:    it vs
+; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    orr.w r2, r2, r0, lsl #4
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r2, [r8, #12]
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r4, #0
-; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    bfc r4, #18, #14
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    orr.w r2, r4, r9, lsl #18
-; CHECK-NEXT:    str.w r2, [r8, #4]
-; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    lsrs r0, r0, #28
 ; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #22
-; CHECK-NEXT:    str.w r0, [r8, #16]
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    lsrl r2, r1, #14
+; CHECK-NEXT:    vcmp.f32 s16, s16
+; CHECK-NEXT:    orr.w r1, r1, r9, lsl #4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    strd r2, r1, [r8, #8]
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs.w r10, #0
+; CHECK-NEXT:    bfc r10, #18, #14
+; CHECK-NEXT:    orr.w r0, r10, r0, lsl #18
+; CHECK-NEXT:    str.w r0, [r8, #4]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI28_0:
@@ -2120,21 +2112,22 @@ define arm_aapcs_vfpcc <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
 define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i100:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    vldr s20, .LCPI30_0
-; CHECK-NEXT:    vmov r7, s19
-; CHECK-NEXT:    vmov r5, s16
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vldr s22, .LCPI30_1
-; CHECK-NEXT:    mov r6, r3
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    vmov r3, s16
+; CHECK-NEXT:    vldr s22, .LCPI30_0
+; CHECK-NEXT:    vmov r7, s17
+; CHECK-NEXT:    vldr s20, .LCPI30_1
+; CHECK-NEXT:    vmov r4, s19
 ; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s20
@@ -2150,7 +2143,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    str.w r2, [r4, #33]
+; CHECK-NEXT:    str.w r2, [r9, #33]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -2162,7 +2155,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #29]
+; CHECK-NEXT:    str.w r1, [r9, #29]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
@@ -2173,11 +2166,11 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r4, #25]
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    str.w r0, [r9, #25]
+; CHECK-NEXT:    mov r0, r3
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s16, s22
-; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    mov r11, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
@@ -2192,7 +2185,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    str r2, [r4, #8]
+; CHECK-NEXT:    str.w r2, [r9, #8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -2204,7 +2197,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r1, [r4, #4]
+; CHECK-NEXT:    str.w r1, [r9, #4]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
@@ -2215,165 +2208,165 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    str.w r0, [r9]
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s19, s22
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s22
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    mvnlt r10, #7
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt.w r10, #7
+; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r7, r1, #28
-; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
+; CHECK-NEXT:    movvs.w r10, #0
+; CHECK-NEXT:    and r0, r10, #15
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
+; CHECK-NEXT:    str.w r0, [r9, #37]
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r8, r3
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r7, [r4, #45]
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s19, s19
-; CHECK-NEXT:    lsrs r2, r2, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    lsrs r7, r0, #28
-; CHECK-NEXT:    vcmp.f32 s19, s22
-; CHECK-NEXT:    orr.w r7, r7, r1, lsl #4
-; CHECK-NEXT:    vmov r1, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    str.w r7, [r4, #41]
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    mvnlt r11, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
+; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt.w r11, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #4
+; CHECK-NEXT:    movvs.w r11, #0
+; CHECK-NEXT:    and r7, r11, #15
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb.w r2, [r4, #49]
+; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    orr.w r7, r7, r0, lsl #4
+; CHECK-NEXT:    str.w r7, [r9, #12]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r6, #7
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r6, #7
-; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r6, #0
-; CHECK-NEXT:    and r2, r6, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    movvs r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s17
+; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r4, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    lsrl r6, r5, #28
+; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    orr.w r7, r5, r4, lsl #4
+; CHECK-NEXT:    str.w r7, [r9, #45]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    str.w r6, [r9, #41]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    mvnlt r8, #7
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt.w r8, #7
+; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    lsrs r7, r1, #28
+; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    and r5, r8, #15
+; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    lsrl r4, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
-; CHECK-NEXT:    str r7, [r4, #20]
+; CHECK-NEXT:    strb.w r4, [r9, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    mvnlt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    lsr.w r7, r0, #28
-; CHECK-NEXT:    orr.w r1, r7, r1, lsl #4
-; CHECK-NEXT:    str r1, [r4, #16]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r1, r2, #28
-; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    orr.w r1, r1, r3, lsl #4
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    vmov r1, s1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb r1, [r4, #24]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r5, #7
-; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r5, #7
-; CHECK-NEXT:    vcmp.f32 s16, s16
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    and r1, r5, #15
-; CHECK-NEXT:    orr.w r0, r1, r0, lsl #4
-; CHECK-NEXT:    str r0, [r4, #12]
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    and r1, r3, #15
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    strb.w r2, [r9, #24]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI30_0:
-; CHECK-NEXT:    .long 0x70ffffff @ float 6.33825262E+29
-; CHECK-NEXT:  .LCPI30_1:
 ; CHECK-NEXT:    .long 0xf1000000 @ float -6.338253E+29
+; CHECK-NEXT:  .LCPI30_1:
+; CHECK-NEXT:    .long 0x70ffffff @ float 6.33825262E+29
     %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f)
     ret <4 x i100> %x
 }
@@ -3694,151 +3687,155 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #48
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI40_0
-; CHECK-NEXT:    vmov r6, r5, d8
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    vmov r9, r8, d0
-; CHECK-NEXT:    str.w r8, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    vmov r5, r7, d8
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    vmov r9, r3, d0
+; CHECK-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI40_1
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    vmov r7, r3, d0
-; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    vmov r8, r3, d0
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r11, r3
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    csel r4, r2, r4, ne
+; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str.w r11, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    str.w r4, [r11, #8]
-; CHECK-NEXT:    str.w r9, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    str.w r4, [r10, #8]
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r10, r7
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    str.w r9, [sp, #44] @ 4-byte Spill
 ; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    csel r7, r1, r0, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    csel r6, r1, r0, ne
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    movne.w r6, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    str.w r7, [r11, #4]
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    str.w r10, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    str r6, [r0, #4]
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r10, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    csel r7, r1, r0, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    csel r6, r1, r0, ne
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r7, #-1
-; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    str r5, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r7, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    vmov r9, r8, d9
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r5, r11
-; CHECK-NEXT:    str r7, [r0]
-; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    str r6, [r0]
+; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r4, r10
+; CHECK-NEXT:    str.w r10, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    mov r5, r11
+; CHECK-NEXT:    str.w r11, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __fixdfti
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    str r1, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    strd r2, r0, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r10, r1, r11, ne
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    mvneq r10, #7
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #-1
+; CHECK-NEXT:    movne.w r10, #7
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r6, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
@@ -3852,21 +3849,21 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r1, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    lsr.w r0, r10, #28
-; CHECK-NEXT:    orr.w r0, r0, r4, lsl #4
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r10
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    vmov r0, s1
 ; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    mov r7, r5
-; CHECK-NEXT:    str r0, [r1, #20]
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r5, r6
+; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
@@ -3876,73 +3873,75 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    csel r11, r1, r0, ne
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldr r5, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    lsr.w r0, r11, #28
-; CHECK-NEXT:    orr.w r0, r0, r10, lsl #4
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    str r0, [r5, #16]
+; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r4, r11
+; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r4, r1, #28
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    str r1, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r10, r6
+; CHECK-NEXT:    mov r7, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    mvneq r0, #7
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #7
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    csel r6, r1, r0, ne
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r6, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    lsr.w r0, r4, #28
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT:    strb r0, [r5, #24]
-; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    and r1, r10, #15
 ; CHECK-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
+; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    strd r4, r0, [r2, #16]
+; CHECK-NEXT:    mov r8, r2
+; CHECK-NEXT:    strb r6, [r2, #24]
+; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    mov r6, r4
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    mvneq r0, #7
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #7
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
@@ -3950,7 +3949,7 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    and r0, r4, #15
 ; CHECK-NEXT:    orr.w r0, r0, r11, lsl #4
-; CHECK-NEXT:    str r0, [r5, #12]
+; CHECK-NEXT:    str.w r0, [r8, #12]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -4694,107 +4693,127 @@ define arm_aapcs_vfpcc <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
 define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i19:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .vsave {d8}
-; CHECK-NEXT:    vpush {d8}
-; CHECK-NEXT:    vldr s12, .LCPI46_0
-; CHECK-NEXT:    vcvtt.f32.f16 s15, s3
-; CHECK-NEXT:    vldr s14, .LCPI46_1
-; CHECK-NEXT:    vcvtb.f32.f16 s7, s0
-; CHECK-NEXT:    vmaxnm.f32 s16, s15, s12
-; CHECK-NEXT:    vcvtb.f32.f16 s4, s1
-; CHECK-NEXT:    vcvtt.f32.f16 s8, s1
-; CHECK-NEXT:    vcvtb.f32.f16 s1, s2
-; CHECK-NEXT:    vcvtt.f32.f16 s0, s0
-; CHECK-NEXT:    vcvtt.f32.f16 s2, s2
-; CHECK-NEXT:    vcvtb.f32.f16 s3, s3
-; CHECK-NEXT:    vmaxnm.f32 s6, s4, s12
-; CHECK-NEXT:    vmaxnm.f32 s10, s8, s12
-; CHECK-NEXT:    vmaxnm.f32 s5, s1, s12
-; CHECK-NEXT:    vmaxnm.f32 s9, s7, s12
-; CHECK-NEXT:    vmaxnm.f32 s11, s0, s12
-; CHECK-NEXT:    vmaxnm.f32 s13, s2, s12
-; CHECK-NEXT:    vminnm.f32 s16, s16, s14
-; CHECK-NEXT:    vmaxnm.f32 s12, s3, s12
-; CHECK-NEXT:    vcvt.s32.f32 s16, s16
-; CHECK-NEXT:    vminnm.f32 s12, s12, s14
-; CHECK-NEXT:    vminnm.f32 s13, s13, s14
-; CHECK-NEXT:    vcvt.s32.f32 s12, s12
-; CHECK-NEXT:    vminnm.f32 s9, s9, s14
-; CHECK-NEXT:    vcvt.s32.f32 s13, s13
-; CHECK-NEXT:    vminnm.f32 s11, s11, s14
-; CHECK-NEXT:    vcvt.s32.f32 s11, s11
-; CHECK-NEXT:    vminnm.f32 s5, s5, s14
-; CHECK-NEXT:    vcvt.s32.f32 s9, s9
-; CHECK-NEXT:    vminnm.f32 s10, s10, s14
-; CHECK-NEXT:    vcmp.f32 s15, s15
-; CHECK-NEXT:    vminnm.f32 s6, s6, s14
-; CHECK-NEXT:    vmov r1, s16
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    lsrs r2, r1, #11
-; CHECK-NEXT:    vcmp.f32 s3, s3
-; CHECK-NEXT:    strb r2, [r0, #18]
-; CHECK-NEXT:    vmov r3, s12
+; CHECK-NEXT:    .save {r4, r5, r7, r9, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r7, r9, r11, lr}
+; CHECK-NEXT:    vldr s4, .LCPI46_0
+; CHECK-NEXT:    vcvtb.f32.f16 s8, s1
+; CHECK-NEXT:    vcvtt.f32.f16 s12, s1
+; CHECK-NEXT:    vcvtt.f32.f16 s1, s0
+; CHECK-NEXT:    vldr s6, .LCPI46_1
+; CHECK-NEXT:    vmaxnm.f32 s5, s1, s4
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    vmaxnm.f32 s14, s12, s4
+; CHECK-NEXT:    vminnm.f32 s5, s5, s6
+; CHECK-NEXT:    vmaxnm.f32 s7, s0, s4
+; CHECK-NEXT:    vminnm.f32 s7, s7, s6
+; CHECK-NEXT:    vcvt.s32.f32 s5, s5
+; CHECK-NEXT:    vcvt.s32.f32 s7, s7
+; CHECK-NEXT:    vminnm.f32 s14, s14, s6
+; CHECK-NEXT:    vcvt.s32.f32 s14, s14
+; CHECK-NEXT:    vmaxnm.f32 s10, s8, s4
+; CHECK-NEXT:    vminnm.f32 s10, s10, s6
+; CHECK-NEXT:    vcmp.f32 s1, s1
+; CHECK-NEXT:    vcvt.s32.f32 s10, s10
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s0, s0
+; CHECK-NEXT:    mov.w r7, #0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s2
+; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    vmov r2, s5
+; CHECK-NEXT:    mov.w r5, #0
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    ubfx r2, r3, #14, #5
-; CHECK-NEXT:    vcvt.s32.f32 s5, s5
-; CHECK-NEXT:    orr.w r1, r2, r1, lsl #5
-; CHECK-NEXT:    vcmp.f32 s2, s2
-; CHECK-NEXT:    strh r1, [r0, #16]
-; CHECK-NEXT:    vmov lr, s13
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    vmov r1, s7
+; CHECK-NEXT:    bfc r2, #19, #13
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w lr, #0
-; CHECK-NEXT:    ubfx r1, lr, #1, #18
-; CHECK-NEXT:    vcmp.f32 s0, s0
-; CHECK-NEXT:    orr.w r1, r1, r3, lsl #18
-; CHECK-NEXT:    vcvt.s32.f32 s10, s10
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vcmp.f32 s12, s12
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov r12, s11
-; CHECK-NEXT:    str r1, [r0, #12]
-; CHECK-NEXT:    vmov r3, s9
+; CHECK-NEXT:    vcmp.f32 s8, s8
+; CHECK-NEXT:    lsll r2, r7, #19
+; CHECK-NEXT:    bfc r1, #19, #13
+; CHECK-NEXT:    vmov r12, s14
+; CHECK-NEXT:    vmaxnm.f32 s8, s0, s4
+; CHECK-NEXT:    orr.w r1, r1, r2
+; CHECK-NEXT:    str r1, [r0]
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs.w r12, #0
-; CHECK-NEXT:    vcmp.f32 s7, s7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s0, s0
+; CHECK-NEXT:    vcvtt.f32.f16 s0, s2
+; CHECK-NEXT:    vmaxnm.f32 s2, s0, s4
+; CHECK-NEXT:    vminnm.f32 s8, s8, s6
+; CHECK-NEXT:    vminnm.f32 s2, s2, s6
+; CHECK-NEXT:    vmov r3, s10
+; CHECK-NEXT:    vcvt.s32.f32 s2, s2
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    vcvt.s32.f32 s8, s8
 ; CHECK-NEXT:    bfc r3, #19, #13
-; CHECK-NEXT:    vcvt.s32.f32 s6, s6
-; CHECK-NEXT:    orr.w r3, r3, r12, lsl #19
-; CHECK-NEXT:    str r3, [r0]
-; CHECK-NEXT:    vcmp.f32 s1, s1
-; CHECK-NEXT:    vmov r3, s5
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    bfc r2, #19, #13
+; CHECK-NEXT:    mov r4, r3
+; CHECK-NEXT:    lsrl r2, r1, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s0, s0
+; CHECK-NEXT:    lsrl r4, r9, #26
+; CHECK-NEXT:    vcvtt.f32.f16 s0, s3
+; CHECK-NEXT:    mov lr, r1
+; CHECK-NEXT:    orr.w r1, r4, r2
+; CHECK-NEXT:    vmov r4, s2
+; CHECK-NEXT:    vmaxnm.f32 s2, s0, s4
+; CHECK-NEXT:    vmov r2, s8
+; CHECK-NEXT:    vminnm.f32 s2, s2, s6
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    vcmp.f32 s8, s8
-; CHECK-NEXT:    bfc r3, #19, #13
-; CHECK-NEXT:    vmov r1, s10
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    vcvt.s32.f32 s2, s2
+; CHECK-NEXT:    bfc r2, #19, #13
+; CHECK-NEXT:    lsll r2, r5, #12
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    ubfx r2, r1, #7, #12
-; CHECK-NEXT:    vcmp.f32 s4, s4
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #12
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    orr.w r2, r2, lr, lsl #31
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    orrs r2, r1
+; CHECK-NEXT:    bfc r4, #19, #13
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    lsll r4, r1, #31
+; CHECK-NEXT:    vcmp.f32 s0, s0
+; CHECK-NEXT:    orrs r2, r4
 ; CHECK-NEXT:    str r2, [r0, #8]
-; CHECK-NEXT:    vmov r2, s6
-; CHECK-NEXT:    ubfx r3, r12, #13, #6
+; CHECK-NEXT:    orr.w r2, r7, r3, lsl #6
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s3
+; CHECK-NEXT:    orr.w r3, r2, r12, lsl #25
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmaxnm.f32 s2, s0, s4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vminnm.f32 s2, s2, s6
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    vcvt.s32.f32 s2, s2
 ; CHECK-NEXT:    bfc r2, #19, #13
-; CHECK-NEXT:    orr.w r2, r3, r2, lsl #6
-; CHECK-NEXT:    orr.w r1, r2, r1, lsl #25
-; CHECK-NEXT:    str r1, [r0, #4]
-; CHECK-NEXT:    vpop {d8}
-; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    vcmp.f32 s0, s0
+; CHECK-NEXT:    lsll r2, r7, #5
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    mov.w r11, #0
+; CHECK-NEXT:    vmov r7, s2
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    mov r4, r7
+; CHECK-NEXT:    bfc r4, #19, #13
+; CHECK-NEXT:    lsrl r4, r11, #14
+; CHECK-NEXT:    orrs r2, r4
+; CHECK-NEXT:    strh r2, [r0, #16]
+; CHECK-NEXT:    str r3, [r0, #4]
+; CHECK-NEXT:    lsrs r2, r2, #16
+; CHECK-NEXT:    strb r2, [r0, #18]
+; CHECK-NEXT:    orr.w r2, r9, lr
+; CHECK-NEXT:    orrs r2, r5
+; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    orr.w r1, r1, r7, lsl #18
+; CHECK-NEXT:    str r1, [r0, #12]
+; CHECK-NEXT:    pop.w {r4, r5, r7, r9, r11, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI46_0:
@@ -4844,42 +4863,40 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    vcvtt.f32.f16 s28, s19
-; CHECK-NEXT:    vmov r0, s28
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vcvtt.f32.f16 s30, s19
+; CHECK-NEXT:    vmov r0, s30
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmov r0, s26
-; CHECK-NEXT:    vldr s22, .LCPI48_1
-; CHECK-NEXT:    vcvtb.f32.f16 s24, s16
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
-; CHECK-NEXT:    vcmp.f32 s28, s22
-; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    vldr s24, .LCPI48_1
+; CHECK-NEXT:    vcvtb.f32.f16 s20, s16
+; CHECK-NEXT:    vcvtb.f32.f16 s28, s19
+; CHECK-NEXT:    vcmp.f32 s30, s24
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov r6, s24
-; CHECK-NEXT:    vldr s20, .LCPI48_0
-; CHECK-NEXT:    vmov r5, s18
+; CHECK-NEXT:    vmov r7, s20
+; CHECK-NEXT:    vldr s22, .LCPI48_0
+; CHECK-NEXT:    vmov r6, s28
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r4, #0
-; CHECK-NEXT:    movtlt r4, #65534
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movtlt r5, #65534
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s26, s22
-; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vcmp.f32 s26, s24
+; CHECK-NEXT:    mov r10, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    vcmp.f32 s30, s22
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcmp.f32 s26, s22
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r4, #65535
-; CHECK-NEXT:    movtgt r4, #1
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
@@ -4887,263 +4904,244 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r11, #25]
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    str.w r0, [r9, #25]
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s24, s22
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    vcmp.f32 s20, s24
+; CHECK-NEXT:    mov r8, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    vcmp.f32 s20, s22
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s24, s24
+; CHECK-NEXT:    vcmp.f32 s20, s20
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s22
+; CHECK-NEXT:    vcmp.f32 s30, s24
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    str.w r0, [r9]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r0, [r11]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vcmp.f32 s30, s22
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r7, #0
-; CHECK-NEXT:    str r7, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    it vs
+; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs r4, #0
-; CHECK-NEXT:    lsls r0, r4, #22
-; CHECK-NEXT:    orr.w r7, r0, r7, lsr #10
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    bfc r7, #18, #14
+; CHECK-NEXT:    lsll r4, r7, #22
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    vcmp.f32 s28, s22
+; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    movtlt r6, #65534
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r6, #65535
-; CHECK-NEXT:    movtgt r6, #1
+; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    str.w r7, [r11, #45]
+; CHECK-NEXT:    vcmp.f32 s28, s24
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movtlt r1, #65534
+; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    lsrs r0, r5, #14
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #18
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s17
-; CHECK-NEXT:    str.w r0, [r11, #33]
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    bfc r1, #18, #14
+; CHECK-NEXT:    vcvtt.f32.f16 s28, s18
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    orr.w r0, r1, r7
+; CHECK-NEXT:    str.w r0, [r9, #45]
+; CHECK-NEXT:    vmov r0, s28
+; CHECK-NEXT:    orrs r4, r2
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movtlt r1, #65534
+; CHECK-NEXT:    vcmp.f32 s28, s22
+; CHECK-NEXT:    vcvtb.f32.f16 s18, s17
+; CHECK-NEXT:    lsrs r0, r5, #10
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #1
+; CHECK-NEXT:    str.w r4, [r9, #41]
+; CHECK-NEXT:    strb.w r0, [r9, #49]
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vcmp.f32 s28, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    itt vs
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    bfc r1, #18, #14
+; CHECK-NEXT:    mov r4, r7
+; CHECK-NEXT:    lsrl r4, r1, #14
+; CHECK-NEXT:    orr.w r6, r1, r6, lsl #4
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    vcvtt.f32.f16 s28, s17
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    vmov r0, s28
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vcmp.f32 s18, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movwlt r9, #0
-; CHECK-NEXT:    movtlt r9, #65534
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movtlt r5, #65534
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r9, #65535
-; CHECK-NEXT:    movtgt r9, #1
-; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r9, #0
-; CHECK-NEXT:    lsl.w r0, r9, #22
-; CHECK-NEXT:    orr.w r0, r0, r1, lsr #10
-; CHECK-NEXT:    str.w r0, [r11, #20]
-; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #1
+; CHECK-NEXT:    str.w r6, [r9, #37]
+; CHECK-NEXT:    str.w r4, [r9, #33]
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s16, s22
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s16
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r8, #-1
+; CHECK-NEXT:    vcmp.f32 s28, s22
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    movtlt r4, #65534
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    vcmp.f32 s16, s22
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    vcmp.f32 s26, s24
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r4, #65535
+; CHECK-NEXT:    movtgt r4, #1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movwlt r10, #0
 ; CHECK-NEXT:    movtlt r10, #65534
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    lsr.w r0, r8, #14
+; CHECK-NEXT:    vcmp.f32 s26, s22
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r10, #65535
 ; CHECK-NEXT:    movtgt r10, #1
+; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs.w r10, #0
-; CHECK-NEXT:    orr.w r0, r0, r10, lsl #18
-; CHECK-NEXT:    str.w r0, [r11, #8]
-; CHECK-NEXT:    lsrs r0, r4, #10
-; CHECK-NEXT:    vcvtb.f32.f16 s16, s19
-; CHECK-NEXT:    strb.w r0, [r11, #49]
+; CHECK-NEXT:    bfc r10, #18, #14
+; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    orr.w r0, r10, r7, lsl #18
+; CHECK-NEXT:    str.w r0, [r9, #29]
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    ubfx r0, r6, #14, #4
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    vcmp.f32 s16, s16
-; CHECK-NEXT:    vcvtb.f32.f16 s18, s17
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r7, #0
-; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
-; CHECK-NEXT:    str.w r0, [r11, #37]
-; CHECK-NEXT:    vcmp.f32 s26, s22
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    movtlt r0, #65534
-; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r0, #65535
-; CHECK-NEXT:    movtgt r0, #1
-; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    bfc r0, #18, #14
-; CHECK-NEXT:    orr.w r0, r0, r5, lsl #18
-; CHECK-NEXT:    str.w r0, [r11, #29]
-; CHECK-NEXT:    lsr.w r0, r9, #10
-; CHECK-NEXT:    strb.w r0, [r11, #24]
-; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    ubfx r2, r10, #14, #4
+; CHECK-NEXT:    vcmp.f32 s18, s24
+; CHECK-NEXT:    itt vs
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    movvs r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt.w r11, #-1
 ; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    bfc r1, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    orr.w r2, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r2, [r11, #12]
-; CHECK-NEXT:    vcmp.f32 s24, s22
-; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    itt vs
+; CHECK-NEXT:    movvs.w r11, #0
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    vcmp.f32 s20, s24
+; CHECK-NEXT:    bfc r5, #18, #14
+; CHECK-NEXT:    mov r10, r11
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    lsll r6, r1, #22
+; CHECK-NEXT:    lsrl r10, r5, #28
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r2, #0
-; CHECK-NEXT:    movtlt r2, #65534
+; CHECK-NEXT:    movwlt r8, #0
+; CHECK-NEXT:    movtlt r8, #65534
+; CHECK-NEXT:    vcmp.f32 s20, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r2, #65535
-; CHECK-NEXT:    movtgt r2, #1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    movwgt r8, #65535
+; CHECK-NEXT:    movtgt r8, #1
+; CHECK-NEXT:    orrs r1, r5
+; CHECK-NEXT:    str.w r1, [r9, #20]
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    vcmp.f32 s16, s24
+; CHECK-NEXT:    orr.w r2, r10, r6
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movtlt r1, #65534
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s22
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #1
+; CHECK-NEXT:    str.w r2, [r9, #16]
+; CHECK-NEXT:    lsrs r2, r4, #10
+; CHECK-NEXT:    vcmp.f32 s16, s24
+; CHECK-NEXT:    strb.w r2, [r9, #24]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r4, #0
-; CHECK-NEXT:    movtlt r4, #65534
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    bfc r2, #18, #14
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r4, #65535
-; CHECK-NEXT:    movtgt r4, #1
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    vcmp.f32 s16, s16
-; CHECK-NEXT:    orr.w r2, r2, r8, lsl #18
-; CHECK-NEXT:    str.w r2, [r11, #4]
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r4, #0
-; CHECK-NEXT:    bfc r4, #18, #14
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    lsrs r2, r7, #28
-; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    orr.w r2, r2, r4, lsl #4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #22
-; CHECK-NEXT:    str.w r2, [r11, #41]
-; CHECK-NEXT:    it vs
+; CHECK-NEXT:    itt vs
+; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    lsrs r0, r0, #28
 ; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #22
-; CHECK-NEXT:    str.w r0, [r11, #16]
-; CHECK-NEXT:    add sp, #16
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    lsrl r2, r1, #14
+; CHECK-NEXT:    vcmp.f32 s20, s20
+; CHECK-NEXT:    orr.w r1, r1, r11, lsl #4
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    strd r2, r1, [r9, #8]
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    bfc r8, #18, #14
+; CHECK-NEXT:    orr.w r0, r8, r0, lsl #18
+; CHECK-NEXT:    str.w r0, [r9, #4]
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 2
@@ -5426,477 +5424,482 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i100:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtb.f32.f16 s30, s19
-; CHECK-NEXT:    vmov r0, s30
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s21, s19
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s19
+; CHECK-NEXT:    vmov r0, s21
+; CHECK-NEXT:    vcvtb.f32.f16 s26, s16
+; CHECK-NEXT:    vcvtb.f32.f16 s28, s17
+; CHECK-NEXT:    vcvtb.f32.f16 s30, s18
+; CHECK-NEXT:    vldr s20, .LCPI50_2
+; CHECK-NEXT:    vmov r8, s24
+; CHECK-NEXT:    vmov r4, s26
+; CHECK-NEXT:    vcvtt.f32.f16 s22, s18
+; CHECK-NEXT:    vmov r6, s28
+; CHECK-NEXT:    vmov r5, s30
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcvtb.f32.f16 s28, s18
-; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    vmov r3, s28
-; CHECK-NEXT:    vldr s24, .LCPI50_2
-; CHECK-NEXT:    vldr s20, .LCPI50_3
-; CHECK-NEXT:    vcvtt.f32.f16 s19, s19
-; CHECK-NEXT:    vcmp.f32 s30, s24
-; CHECK-NEXT:    vcvtb.f32.f16 s22, s16
+; CHECK-NEXT:    vldr s18, .LCPI50_3
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    vcmp.f32 s21, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vcmp.f32 s21, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s30
+; CHECK-NEXT:    vcmp.f32 s21, s21
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s24
+; CHECK-NEXT:    vcmp.f32 s21, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    str.w r2, [r4, #83]
+; CHECK-NEXT:    vcmp.f32 s21, s20
+; CHECK-NEXT:    str.w r2, [r9, #83]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s30
+; CHECK-NEXT:    vcmp.f32 s21, s21
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s24
+; CHECK-NEXT:    vcmp.f32 s21, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #79]
+; CHECK-NEXT:    str.w r1, [r9, #79]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s17
+; CHECK-NEXT:    vcmp.f32 s21, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s30, s30
+; CHECK-NEXT:    vcmp.f32 s21, s21
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r4, #75]
-; CHECK-NEXT:    vmov r9, s19
-; CHECK-NEXT:    vmov r8, s22
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    vmov r6, s26
+; CHECK-NEXT:    str.w r0, [r9, #75]
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s28, s24
-; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    vcmp.f32 s30, s18
+; CHECK-NEXT:    mov r5, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    vcmp.f32 s30, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    str.w r2, [r4, #58]
+; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    str.w r2, [r9, #58]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    vcmp.f32 s30, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #54]
+; CHECK-NEXT:    str.w r1, [r9, #54]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r4, #50]
+; CHECK-NEXT:    str.w r0, [r9, #50]
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s26, s24
-; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    vcmp.f32 s28, s18
+; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s24
+; CHECK-NEXT:    vcmp.f32 s28, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    str.w r2, [r4, #33]
+; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    str.w r2, [r9, #33]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s24
+; CHECK-NEXT:    vcmp.f32 s28, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #29]
+; CHECK-NEXT:    str.w r1, [r9, #29]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r4, #25]
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    str.w r0, [r9, #25]
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s22, s24
-; CHECK-NEXT:    mov r8, r3
+; CHECK-NEXT:    vcmp.f32 s26, s18
+; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s22
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s24
+; CHECK-NEXT:    vcmp.f32 s26, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    str r2, [r4, #8]
+; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    str.w r2, [r9, #8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s22
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s24
+; CHECK-NEXT:    vcmp.f32 s26, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r1, [r4, #4]
+; CHECK-NEXT:    str.w r1, [r9, #4]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s22, s22
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    str.w r0, [r9]
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s19, s24
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s24
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vcmp.f32 s24, s18
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
+; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s24
+; CHECK-NEXT:    vcmp.f32 s21, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    lsrs r6, r1, #28
+; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    orr.w r6, r6, r2, lsl #4
-; CHECK-NEXT:    str.w r6, [r4, #95]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    mvnlt r7, #7
+; CHECK-NEXT:    vcmp.f32 s21, s20
+; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt r7, #7
+; CHECK-NEXT:    vcmp.f32 s21, s21
+; CHECK-NEXT:    mov r10, r2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    lsrs r6, r0, #28
-; CHECK-NEXT:    orr.w r1, r6, r1, lsl #4
-; CHECK-NEXT:    str.w r1, [r4, #91]
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    and r0, r7, #15
+; CHECK-NEXT:    orr.w r1, r0, r6, lsl #4
+; CHECK-NEXT:    vmov r0, s22
+; CHECK-NEXT:    str.w r1, [r9, #87]
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    vcmp.f32 s22, s18
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
+; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
-; CHECK-NEXT:    lsrs r1, r2, #28
-; CHECK-NEXT:    vcvtt.f32.f16 s19, s18
+; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    orr.w r2, r1, r3, lsl #4
-; CHECK-NEXT:    vmov r1, s19
-; CHECK-NEXT:    strb.w r2, [r4, #99]
-; CHECK-NEXT:    vcmp.f32 s30, s24
+; CHECK-NEXT:    movvs.w r8, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    mvnlt r5, #7
 ; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r5, #7
-; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    and r2, r5, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #87]
-; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    and r0, r5, #15
+; CHECK-NEXT:    orr.w r0, r0, r8, lsl #4
+; CHECK-NEXT:    vcvtt.f32.f16 s30, s17
+; CHECK-NEXT:    str.w r0, [r9, #62]
+; CHECK-NEXT:    vmov r0, s30
+; CHECK-NEXT:    mov r7, r3
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s19, s24
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s17
+; CHECK-NEXT:    vcmp.f32 s30, s18
+; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    vcmp.f32 s28, s18
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    mvnlt r0, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt r0, #7
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r6, r1, #28
-; CHECK-NEXT:    vcmp.f32 s19, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    orr.w r6, r6, r2, lsl #4
+; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
+; CHECK-NEXT:    str.w r0, [r9, #37]
+; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    vcmp.f32 s16, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r6, [r4, #70]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s19, s19
-; CHECK-NEXT:    lsrs r2, r2, #28
+; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    lsrs r6, r0, #28
-; CHECK-NEXT:    orr.w r1, r6, r1, lsl #4
-; CHECK-NEXT:    str.w r1, [r4, #66]
-; CHECK-NEXT:    vmov r1, s18
-; CHECK-NEXT:    vcmp.f32 s19, s24
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
+; CHECK-NEXT:    vcmp.f32 s26, s18
+; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    mvnlt r5, #7
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt r5, #7
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #4
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    vcmp.f32 s24, s18
+; CHECK-NEXT:    and r5, r5, #15
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    orr.w r5, r5, r0, lsl #4
+; CHECK-NEXT:    str.w r5, [r9, #12]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt.w r11, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    b.w .LBB50_3
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI50_2:
-; CHECK-NEXT:    .long 0xf1000000 @ float -6.338253E+29
+; CHECK-NEXT:    .long 0x70ffffff @ float 6.33825262E+29
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.2:
 ; CHECK-NEXT:  .LCPI50_3:
-; CHECK-NEXT:    .long 0x70ffffff @ float 6.33825262E+29
+; CHECK-NEXT:    .long 0xf1000000 @ float -6.338253E+29
 ; CHECK-NEXT:    .p2align 1
 ; CHECK-NEXT:  .LBB50_3:
-; CHECK-NEXT:    strb.w r2, [r4, #74]
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r7, #7
-; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #7
-; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    movgt.w r11, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s24, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r7, #0
-; CHECK-NEXT:    and r2, r7, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #62]
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s18, s24
+; CHECK-NEXT:    movvs.w r11, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    lsrl r6, r11, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    movvs.w r10, #0
+; CHECK-NEXT:    orr.w r5, r11, r10, lsl #4
+; CHECK-NEXT:    str.w r5, [r9, #95]
+; CHECK-NEXT:    str.w r6, [r9, #91]
+; CHECK-NEXT:    vcmp.f32 s24, s18
+; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    mvnlt r6, #7
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt r6, #7
+; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r7, r1, #28
-; CHECK-NEXT:    vcmp.f32 s18, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    and r5, r6, #15
+; CHECK-NEXT:    vcmp.f32 s22, s18
+; CHECK-NEXT:    lsrl r10, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r7, [r4, #45]
+; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    strb.w r10, [r9, #99]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    mvnlt r7, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    lsrs r2, r2, #28
+; CHECK-NEXT:    movgt r7, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s22, s18
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    lsrs r7, r0, #28
-; CHECK-NEXT:    vcmp.f32 s18, s24
-; CHECK-NEXT:    orr.w r7, r7, r1, lsl #4
-; CHECK-NEXT:    vmov r1, s16
+; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    str.w r7, [r4, #41]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #4
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r7
+; CHECK-NEXT:    vcmp.f32 s22, s18
+; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov r5, s1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb.w r2, [r4, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r10, #7
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    lsrl r8, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r10, #7
-; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    vcmp.f32 s22, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r10, #0
-; CHECK-NEXT:    and r2, r10, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s16, s24
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    orr.w r6, r5, r4, lsl #4
+; CHECK-NEXT:    and r5, r7, #15
+; CHECK-NEXT:    lsrl r4, r5, #28
+; CHECK-NEXT:    str.w r6, [r9, #70]
+; CHECK-NEXT:    str.w r8, [r9, #66]
+; CHECK-NEXT:    vcmp.f32 s30, s18
+; CHECK-NEXT:    strb.w r4, [r9, #74]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    mvnlt r4, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt r4, #7
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    vcmp.f32 s30, s18
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    lsrs r7, r1, #28
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r7, r4
+; CHECK-NEXT:    vcmp.f32 s30, s18
+; CHECK-NEXT:    ldr.w r12, [sp] @ 4-byte Reload
+; CHECK-NEXT:    vmov r5, s1
+; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
-; CHECK-NEXT:    str r7, [r4, #20]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s16
+; CHECK-NEXT:    lsrl r12, r5, #28
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s24
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT:    and r5, r4, #15
+; CHECK-NEXT:    vcmp.f32 s16, s18
+; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    lsr.w r7, r0, #28
-; CHECK-NEXT:    orr.w r1, r7, r1, lsl #4
-; CHECK-NEXT:    str r1, [r4, #16]
+; CHECK-NEXT:    str.w r7, [r9, #45]
+; CHECK-NEXT:    str.w r12, [r9, #41]
+; CHECK-NEXT:    strb.w r6, [r9, #49]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    mvnlt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -5904,28 +5907,44 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r1, r2, #28
-; CHECK-NEXT:    vcmp.f32 s22, s24
+; CHECK-NEXT:    vcmp.f32 s16, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    orr.w r1, r1, r3, lsl #4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb r1, [r4, #24]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    mvnlt r8, #7
-; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r8, #7
-; CHECK-NEXT:    vcmp.f32 s22, s22
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r8, #0
-; CHECK-NEXT:    and r1, r8, #15
-; CHECK-NEXT:    orr.w r0, r1, r0, lsl #4
-; CHECK-NEXT:    str r0, [r4, #12]
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    vcmp.f32 s16, s18
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vcmp.f32 s16, s16
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    and r1, r3, #15
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    strb.w r2, [r9, #24]
+; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:  @ %bb.4:
     %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f)
     ret <8 x i100> %x
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index 8ea12bd1fc0deb..2b6d0da5317042 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -1506,110 +1506,110 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    vldr s20, .LCPI28_0
-; CHECK-NEXT:    vmov r4, s17
-; CHECK-NEXT:    vmov r6, s19
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vmov r4, s19
+; CHECK-NEXT:    vldr s20, .LCPI28_0
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s17, #0
-; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r10, #65535
-; CHECK-NEXT:    movtgt r10, #3
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #3
 ; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    vcmp.f32 s19, #0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r6, #65535
-; CHECK-NEXT:    movtgt r6, #3
+; CHECK-NEXT:    movwgt r7, #65535
+; CHECK-NEXT:    movtgt r7, #3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    str.w r7, [r8]
+; CHECK-NEXT:    str.w r6, [r8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsl.w r0, r6, #22
-; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r4, #-1
-; CHECK-NEXT:    orr.w r0, r0, r4, lsr #10
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r0, [r8, #20]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    bfc r1, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r9, #-1
-; CHECK-NEXT:    lsr.w r0, r9, #14
-; CHECK-NEXT:    orr.w r1, r0, r10, lsl #18
-; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    str.w r1, [r8, #8]
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    lsrs r2, r6, #10
+; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    bfc r5, #18, #14
+; CHECK-NEXT:    mov r6, r10
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    lsll r4, r1, #22
+; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt.w r9, #0
+; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    orrs r1, r5
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r1, #65535
-; CHECK-NEXT:    movtgt r1, #3
+; CHECK-NEXT:    movwgt r9, #65535
+; CHECK-NEXT:    movtgt r9, #3
+; CHECK-NEXT:    str.w r1, [r8, #20]
+; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    orr.w r2, r6, r4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    bfc r9, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, #0
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r5, #65535
-; CHECK-NEXT:    movtgt r5, #3
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #3
+; CHECK-NEXT:    str.w r2, [r8, #16]
+; CHECK-NEXT:    lsrs r2, r7, #10
+; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    strb.w r2, [r8, #24]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    bfc r1, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    ubfx r2, r10, #14, #4
-; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    orr.w r2, r2, r0, lsl #4
-; CHECK-NEXT:    lsrs r0, r0, #28
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    bfc r5, #18, #14
-; CHECK-NEXT:    str.w r2, [r8, #12]
-; CHECK-NEXT:    orr.w r2, r5, r9, lsl #18
-; CHECK-NEXT:    str.w r2, [r8, #4]
-; CHECK-NEXT:    orr.w r0, r0, r4, lsl #22
-; CHECK-NEXT:    str.w r0, [r8, #16]
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    orr.w r0, r9, r0, lsl #18
+; CHECK-NEXT:    lsrl r2, r1, #14
+; CHECK-NEXT:    orr.w r1, r1, r10, lsl #4
+; CHECK-NEXT:    strd r2, r1, [r8, #8]
+; CHECK-NEXT:    str.w r0, [r8, #4]
 ; CHECK-NEXT:    vpop {d8, d9, d10}
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
 ; CHECK-NEXT:    .p2align 2
@@ -1729,32 +1729,31 @@ define arm_aapcs_vfpcc <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
 define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v4f32_v4i100:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10}
 ; CHECK-NEXT:    vpush {d8, d9, d10}
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    vldr s20, .LCPI30_0
-; CHECK-NEXT:    vmov r5, s16
-; CHECK-NEXT:    vmov r7, s19
 ; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    mov r9, r3
+; CHECK-NEXT:    vmov r3, s16
+; CHECK-NEXT:    vldr s20, .LCPI30_0
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    mov r6, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, #0
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    str.w r2, [r4, #33]
+; CHECK-NEXT:    str.w r2, [r8, #33]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1762,18 +1761,20 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #29]
+; CHECK-NEXT:    str.w r1, [r8, #29]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r4, #25]
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    str.w r0, [r8, #25]
+; CHECK-NEXT:    vmov r7, s17
+; CHECK-NEXT:    vmov r4, s19
+; CHECK-NEXT:    mov r0, r3
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    mov r10, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
@@ -1784,7 +1785,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    str r2, [r4, #8]
+; CHECK-NEXT:    str.w r2, [r8, #8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1792,126 +1793,126 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r1, [r4, #4]
+; CHECK-NEXT:    str.w r1, [r8, #4]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    str.w r0, [r8]
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s19, #0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, #0
+; CHECK-NEXT:    vcmp.f32 s18, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r4, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt.w r9, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r7, r1, #28
-; CHECK-NEXT:    vcmp.f32 s19, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
+; CHECK-NEXT:    movgt.w r9, #15
+; CHECK-NEXT:    and r0, r9, #15
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    orr.w r0, r0, r4, lsl #4
+; CHECK-NEXT:    str.w r0, [r8, #37]
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r11, r3
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r7, [r4, #45]
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    lsrs r7, r0, #28
-; CHECK-NEXT:    vcmp.f32 s19, #0
-; CHECK-NEXT:    orr.w r7, r7, r1, lsl #4
-; CHECK-NEXT:    vmov r1, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    str.w r7, [r4, #41]
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r2, r2, #28
-; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    vcmp.f32 s19, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #4
+; CHECK-NEXT:    movgt.w r10, #15
+; CHECK-NEXT:    and r7, r10, #15
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb.w r2, [r4, #49]
+; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    orr.w r7, r7, r0, lsl #4
+; CHECK-NEXT:    str.w r7, [r8, #12]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s19, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r6, #15
-; CHECK-NEXT:    and r2, r6, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    lsrl r4, r5, #28
+; CHECK-NEXT:    vcmp.f32 s19, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT:    str.w r7, [r8, #45]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    str.w r4, [r8, #41]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    lsrs r7, r1, #28
+; CHECK-NEXT:    movgt.w r11, #15
+; CHECK-NEXT:    and r5, r11, #15
+; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
-; CHECK-NEXT:    str r7, [r4, #20]
+; CHECK-NEXT:    strb.w r6, [r8, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r3, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    lsrs r7, r0, #28
-; CHECK-NEXT:    orr.w r1, r7, r1, lsl #4
+; CHECK-NEXT:    movgt r3, #15
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    str r1, [r4, #16]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r1, r2, #28
-; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    orr.w r1, r1, r3, lsl #4
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    vmov r1, s1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb r1, [r4, #24]
+; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r5, #15
-; CHECK-NEXT:    and r1, r5, #15
-; CHECK-NEXT:    orr.w r0, r1, r0, lsl #4
-; CHECK-NEXT:    str r0, [r4, #12]
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r8, #16]
+; CHECK-NEXT:    and r1, r3, #15
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    strb.w r2, [r8, #24]
 ; CHECK-NEXT:    vpop {d8, d9, d10}
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI30_0:
@@ -2923,195 +2924,196 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #48
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI40_0
-; CHECK-NEXT:    vmov r9, r5, d8
-; CHECK-NEXT:    str r0, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    vmov r6, r5, d8
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    vmov r2, r9, d0
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r7, r2
-; CHECK-NEXT:    mov r6, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r10, r2
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI40_1
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    str r2, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    vmov r11, r3, d0
+; CHECK-NEXT:    str r3, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r0, r2, r8, ne
-; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    strd r1, r0, [sp, #20] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r0, r2, r4, ne
+; CHECK-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    ldr r4, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    str.w r0, [r8, #8]
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r11, r7
-; CHECK-NEXT:    str r0, [r4, #8]
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    str r5, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    str r7, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    str.w r9, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r7, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str r0, [r4, #4]
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    str.w r0, [r8, #4]
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r6, r8
+; CHECK-NEXT:    strd r8, r7, [sp, #28] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    mov r9, r7
-; CHECK-NEXT:    str.w r10, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r5, r11
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    vmov r8, r11, d9
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    ldr r5, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str r0, [r6]
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    mov r6, r5
+; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r5, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    cmp.w r10, #0
-; CHECK-NEXT:    strd r2, r0, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r7, r1, r10, ne
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    strd r0, r2, [sp, #20] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r0, r3, r7, ne
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r7, #-1
-; CHECK-NEXT:    mov r4, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r10, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    movne r0, #15
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    csel r9, r1, r0, ne
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    lsrs r0, r7, #28
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov q0[3], q0[1], r0, r7
+; CHECK-NEXT:    vmov r0, s1
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    orr.w r0, r0, r9, lsl #4
-; CHECK-NEXT:    str r0, [r6, #20]
+; CHECK-NEXT:    mov r6, r10
+; CHECK-NEXT:    str.w r10, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str.w r9, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    mov r5, r10
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    lsrs r0, r4, #28
-; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
-; CHECK-NEXT:    str r0, [r6, #16]
-; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r4
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    ldr.w r10, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    lsrl r10, r5, #28
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    ldr.w r11, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    ldr.w r11, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r9, r7
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #15
-; CHECK-NEXT:    lsr.w r1, r9, #28
-; CHECK-NEXT:    ldr.w r9, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r1, r0, lsl #4
-; CHECK-NEXT:    strb.w r0, [r9, #24]
-; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r1, r5, r0, lsl #4
+; CHECK-NEXT:    strd r10, r1, [r2, #16]
+; CHECK-NEXT:    mov r8, r2
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    and r1, r1, #15
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    strb r0, [r2, #24]
+; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    ldrd r3, r2, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #40] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #15
 ; CHECK-NEXT:    and r0, r0, #15
 ; CHECK-NEXT:    orr.w r0, r0, r4, lsl #4
-; CHECK-NEXT:    str.w r0, [r9, #12]
+; CHECK-NEXT:    str.w r0, [r8, #12]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -3639,73 +3641,90 @@ define arm_aapcs_vfpcc <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
 define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) {
 ; CHECK-LABEL: test_unsigned_v8f16_v8i19:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    vldr s6, .LCPI46_1
-; CHECK-NEXT:    vcvtb.f32.f16 s8, s0
-; CHECK-NEXT:    vcvtb.f32.f16 s12, s2
-; CHECK-NEXT:    vcvtb.f32.f16 s10, s1
-; CHECK-NEXT:    vcvtt.f32.f16 s14, s1
-; CHECK-NEXT:    vcvtb.f32.f16 s1, s3
-; CHECK-NEXT:    vcvtt.f32.f16 s0, s0
-; CHECK-NEXT:    vcvtt.f32.f16 s2, s2
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r9, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r9, r11, lr}
 ; CHECK-NEXT:    vldr s4, .LCPI46_0
-; CHECK-NEXT:    vcvtt.f32.f16 s3, s3
-; CHECK-NEXT:    vmaxnm.f32 s8, s8, s6
-; CHECK-NEXT:    vmaxnm.f32 s10, s10, s6
-; CHECK-NEXT:    vmaxnm.f32 s0, s0, s6
-; CHECK-NEXT:    vmaxnm.f32 s12, s12, s6
-; CHECK-NEXT:    vmaxnm.f32 s14, s14, s6
-; CHECK-NEXT:    vmaxnm.f32 s2, s2, s6
-; CHECK-NEXT:    vmaxnm.f32 s1, s1, s6
-; CHECK-NEXT:    vmaxnm.f32 s6, s3, s6
-; CHECK-NEXT:    vminnm.f32 s8, s8, s4
-; CHECK-NEXT:    vminnm.f32 s10, s10, s4
-; CHECK-NEXT:    vminnm.f32 s0, s0, s4
-; CHECK-NEXT:    vminnm.f32 s12, s12, s4
-; CHECK-NEXT:    vminnm.f32 s14, s14, s4
-; CHECK-NEXT:    vminnm.f32 s2, s2, s4
-; CHECK-NEXT:    vminnm.f32 s1, s1, s4
-; CHECK-NEXT:    vminnm.f32 s4, s6, s4
-; CHECK-NEXT:    vcvt.u32.f32 s1, s1
-; CHECK-NEXT:    vcvt.u32.f32 s4, s4
-; CHECK-NEXT:    vcvt.u32.f32 s2, s2
+; CHECK-NEXT:    vcvtb.f32.f16 s14, s1
+; CHECK-NEXT:    vldr s6, .LCPI46_1
+; CHECK-NEXT:    vcvtt.f32.f16 s12, s1
+; CHECK-NEXT:    vmaxnm.f32 s14, s14, s4
+; CHECK-NEXT:    vmaxnm.f32 s12, s12, s4
+; CHECK-NEXT:    vminnm.f32 s14, s14, s6
+; CHECK-NEXT:    vminnm.f32 s12, s12, s6
 ; CHECK-NEXT:    vcvt.u32.f32 s14, s14
+; CHECK-NEXT:    vcvtb.f32.f16 s10, s0
 ; CHECK-NEXT:    vcvt.u32.f32 s12, s12
+; CHECK-NEXT:    vcvtt.f32.f16 s0, s0
+; CHECK-NEXT:    vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT:    vmaxnm.f32 s10, s10, s4
+; CHECK-NEXT:    vminnm.f32 s0, s0, s6
+; CHECK-NEXT:    vminnm.f32 s10, s10, s6
 ; CHECK-NEXT:    vcvt.u32.f32 s0, s0
+; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    vcvt.u32.f32 s10, s10
-; CHECK-NEXT:    vmov r1, s1
-; CHECK-NEXT:    vmov r3, s4
-; CHECK-NEXT:    vcvt.u32.f32 s8, s8
-; CHECK-NEXT:    vmov r4, s12
-; CHECK-NEXT:    vmov r5, s10
-; CHECK-NEXT:    lsrs r2, r1, #14
-; CHECK-NEXT:    orr.w r12, r2, r3, lsl #5
-; CHECK-NEXT:    vmov r3, s2
-; CHECK-NEXT:    strh.w r12, [r0, #16]
-; CHECK-NEXT:    lsrs r2, r3, #1
-; CHECK-NEXT:    orr.w lr, r2, r1, lsl #18
+; CHECK-NEXT:    vcvtt.f32.f16 s8, s2
+; CHECK-NEXT:    vcvtb.f32.f16 s2, s2
+; CHECK-NEXT:    vmaxnm.f32 s8, s8, s4
 ; CHECK-NEXT:    vmov r2, s14
-; CHECK-NEXT:    lsrs r1, r2, #7
-; CHECK-NEXT:    orr.w r1, r1, r4, lsl #12
-; CHECK-NEXT:    orr.w r1, r1, r3, lsl #31
-; CHECK-NEXT:    vmov r3, s0
-; CHECK-NEXT:    lsrs r4, r3, #13
-; CHECK-NEXT:    orr.w r4, r4, r5, lsl #6
-; CHECK-NEXT:    orr.w r2, r4, r2, lsl #25
+; CHECK-NEXT:    vmaxnm.f32 s2, s2, s4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vminnm.f32 s2, s2, s6
+; CHECK-NEXT:    vcvt.u32.f32 s2, s2
+; CHECK-NEXT:    vminnm.f32 s8, s8, s6
+; CHECK-NEXT:    vcvt.u32.f32 s8, s8
+; CHECK-NEXT:    mov.w r11, #0
+; CHECK-NEXT:    vmov r12, s0
+; CHECK-NEXT:    vcvtt.f32.f16 s0, s3
+; CHECK-NEXT:    lsll r12, r1, #19
+; CHECK-NEXT:    vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT:    vminnm.f32 s0, s0, s6
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vcvt.u32.f32 s0, s0
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #6
+; CHECK-NEXT:    lsrl r2, r5, #26
+; CHECK-NEXT:    orr.w r1, r1, r4, lsl #25
+; CHECK-NEXT:    str r1, [r0, #4]
+; CHECK-NEXT:    vmov r1, s10
+; CHECK-NEXT:    lsrl r4, r11, #7
+; CHECK-NEXT:    orr.w r1, r1, r12
+; CHECK-NEXT:    str r1, [r0]
+; CHECK-NEXT:    orr.w r1, r2, r4
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    lsll r2, r7, #12
 ; CHECK-NEXT:    vmov r4, s8
-; CHECK-NEXT:    orr.w r3, r4, r3, lsl #19
-; CHECK-NEXT:    strd r3, r2, [r0]
-; CHECK-NEXT:    strd r1, lr, [r0, #8]
-; CHECK-NEXT:    lsr.w r1, r12, #16
-; CHECK-NEXT:    strb r1, [r0, #18]
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    orrs r2, r1
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    lsll r4, r1, #31
+; CHECK-NEXT:    orr.w r12, r2, r4
+; CHECK-NEXT:    vmov r4, s0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s3
+; CHECK-NEXT:    lsll r4, r3, #5
+; CHECK-NEXT:    vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT:    vminnm.f32 s0, s0, s6
+; CHECK-NEXT:    vcvt.u32.f32 s0, s0
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsrl r6, r9, #14
+; CHECK-NEXT:    orr.w r3, r6, r4
+; CHECK-NEXT:    strh r3, [r0, #16]
+; CHECK-NEXT:    str.w r12, [r0, #8]
+; CHECK-NEXT:    lsrs r3, r3, #16
+; CHECK-NEXT:    strb r3, [r0, #18]
+; CHECK-NEXT:    orr.w r3, r5, r11
+; CHECK-NEXT:    orrs r3, r7
+; CHECK-NEXT:    orrs r1, r3
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #18
+; CHECK-NEXT:    str r1, [r0, #12]
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r9, r11, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI46_0:
-; CHECK-NEXT:    .long 0x48ffffe0 @ float 524287
-; CHECK-NEXT:  .LCPI46_1:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
+; CHECK-NEXT:  .LCPI46_1:
+; CHECK-NEXT:    .long 0x48ffffe0 @ float 524287
     %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f)
     ret <8 x i19> %x
 }
@@ -3749,46 +3768,46 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    vcvtb.f32.f16 s24, s18
 ; CHECK-NEXT:    vmov r0, s24
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcvtt.f32.f16 s26, s19
+; CHECK-NEXT:    vcvtt.f32.f16 s28, s19
 ; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    vmov r0, s28
 ; CHECK-NEXT:    vcvtb.f32.f16 s22, s16
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
+; CHECK-NEXT:    vcvtb.f32.f16 s26, s19
 ; CHECK-NEXT:    vcmp.f32 s24, #0
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    vmov r5, s22
 ; CHECK-NEXT:    vldr s20, .LCPI48_0
-; CHECK-NEXT:    vmov r8, s18
+; CHECK-NEXT:    vmov r11, s26
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s26, #0
-; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vcmp.f32 s28, #0
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r10, #65535
-; CHECK-NEXT:    movtgt r10, #3
+; CHECK-NEXT:    movwgt r4, #65535
+; CHECK-NEXT:    movtgt r4, #3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    str.w r7, [r4, #25]
+; CHECK-NEXT:    str.w r7, [r10, #25]
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
@@ -3797,188 +3816,184 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    vcmp.f32 s28, #0
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str r0, [r4]
+; CHECK-NEXT:    mov r7, r4
+; CHECK-NEXT:    str.w r0, [r10]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    lsl.w r0, r10, #22
-; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    orr.w r6, r0, r6, lsr #10
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bfc r7, #18, #14
+; CHECK-NEXT:    lsll r6, r7, #22
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #3
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    bfc r1, #18, #14
+; CHECK-NEXT:    vcvtt.f32.f16 s26, s18
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    orr.w r0, r1, r7
+; CHECK-NEXT:    str.w r0, [r10, #45]
+; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    orrs r6, r2
+; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    vcmp.f32 s26, #0
 ; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcvtb.f32.f16 s18, s17
+; CHECK-NEXT:    lsrs r0, r4, #10
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r5, #65535
-; CHECK-NEXT:    movtgt r5, #3
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #3
+; CHECK-NEXT:    str.w r6, [r10, #41]
+; CHECK-NEXT:    strb.w r0, [r10, #49]
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    bfc r1, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r6, [r4, #45]
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    lsrs r0, r7, #14
-; CHECK-NEXT:    orr.w r0, r0, r5, lsl #18
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s17
-; CHECK-NEXT:    str.w r0, [r4, #33]
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    mov r4, r7
+; CHECK-NEXT:    lsrl r4, r1, #14
+; CHECK-NEXT:    orr.w r6, r1, r5, lsl #4
 ; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    vcvtt.f32.f16 s26, s17
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #3
+; CHECK-NEXT:    str.w r6, [r10, #37]
+; CHECK-NEXT:    str.w r4, [r10, #33]
+; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    vcmp.f32 s24, #0
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r6, #65535
+; CHECK-NEXT:    movtgt r6, #3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r1, r0
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r9, #65535
 ; CHECK-NEXT:    movtgt r9, #3
-; CHECK-NEXT:    lsl.w r0, r9, #22
-; CHECK-NEXT:    orr.w r0, r0, r1, lsr #10
+; CHECK-NEXT:    bfc r9, #18, #14
 ; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
-; CHECK-NEXT:    str r0, [r4, #20]
+; CHECK-NEXT:    orr.w r0, r9, r7, lsl #18
+; CHECK-NEXT:    str.w r0, [r10, #29]
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    bfc r1, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r11, #65535
-; CHECK-NEXT:    movtgt r11, #3
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r4, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    bfc r5, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r8, #-1
-; CHECK-NEXT:    lsr.w r0, r8, #14
-; CHECK-NEXT:    vcvtb.f32.f16 s16, s19
-; CHECK-NEXT:    orr.w r0, r0, r11, lsl #18
-; CHECK-NEXT:    str r0, [r4, #8]
-; CHECK-NEXT:    lsr.w r0, r10, #10
-; CHECK-NEXT:    strb.w r0, [r4, #49]
-; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    mov r8, r11
+; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    lsll r4, r1, #22
+; CHECK-NEXT:    lsrl r8, r5, #28
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r7, #65535
+; CHECK-NEXT:    movtgt r7, #3
+; CHECK-NEXT:    orrs r1, r5
+; CHECK-NEXT:    str.w r1, [r10, #20]
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    orr.w r2, r8, r4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    ubfx r0, r5, #14, #4
+; CHECK-NEXT:    bfc r7, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    vcmp.f32 s24, #0
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    vcvtb.f32.f16 s18, s17
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r0, #65535
-; CHECK-NEXT:    movtgt r0, #3
-; CHECK-NEXT:    bfc r0, #18, #14
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    orr.w r0, r0, r7, lsl #18
-; CHECK-NEXT:    str.w r0, [r4, #29]
-; CHECK-NEXT:    lsr.w r0, r9, #10
-; CHECK-NEXT:    strb r0, [r4, #24]
-; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    ubfx r2, r11, #14, #4
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    orr.w r2, r2, r0, lsl #4
-; CHECK-NEXT:    str r2, [r4, #12]
-; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r2, #65535
-; CHECK-NEXT:    movtgt r2, #3
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #3
-; CHECK-NEXT:    bfc r2, #18, #14
+; CHECK-NEXT:    str.w r2, [r10, #16]
+; CHECK-NEXT:    lsrs r2, r6, #10
+; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    strb.w r2, [r10, #24]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r10, #65535
-; CHECK-NEXT:    movtgt r10, #3
-; CHECK-NEXT:    orr.w r2, r2, r8, lsl #18
-; CHECK-NEXT:    str r2, [r4, #4]
-; CHECK-NEXT:    bfc r10, #18, #14
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    lsrs r2, r6, #28
 ; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    orr.w r2, r2, r10, lsl #4
-; CHECK-NEXT:    lsrs r0, r0, #28
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #22
-; CHECK-NEXT:    str.w r2, [r4, #41]
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #22
-; CHECK-NEXT:    str r0, [r4, #16]
-; CHECK-NEXT:    add sp, #16
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    orr.w r0, r7, r0, lsl #18
+; CHECK-NEXT:    lsrl r2, r1, #14
+; CHECK-NEXT:    orr.w r1, r1, r11, lsl #4
+; CHECK-NEXT:    strd r2, r1, [r10, #8]
+; CHECK-NEXT:    str.w r0, [r10, #4]
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 2
@@ -4192,21 +4207,61 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
 define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-LABEL: test_unsigned_v8f16_v8i100:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtb.f32.f16 s28, s19
-; CHECK-NEXT:    vmov r0, s28
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
-; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    vmov r3, s26
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s30, s19
+; CHECK-NEXT:    vcvtb.f32.f16 s28, s18
+; CHECK-NEXT:    vmov r0, s30
+; CHECK-NEXT:    vcvtt.f32.f16 s22, s19
+; CHECK-NEXT:    vcvtb.f32.f16 s24, s16
+; CHECK-NEXT:    vcvtb.f32.f16 s26, s17
 ; CHECK-NEXT:    vldr s20, .LCPI50_1
+; CHECK-NEXT:    vmov r8, s22
+; CHECK-NEXT:    vmov r5, s28
+; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
+; CHECK-NEXT:    vmov r4, s24
+; CHECK-NEXT:    vmov r6, s26
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcmp.f32 s30, #0
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, #0
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    str.w r2, [r9, #83]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, #0
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    str.w r1, [r9, #79]
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    str.w r0, [r9, #75]
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s28, #0
-; CHECK-NEXT:    vcvtt.f32.f16 s30, s19
+; CHECK-NEXT:    mov r5, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
@@ -4217,7 +4272,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    str.w r2, [r4, #83]
+; CHECK-NEXT:    str.w r2, [r9, #58]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4225,23 +4280,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #79]
+; CHECK-NEXT:    str.w r1, [r9, #54]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    vcvtb.f32.f16 s22, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcvtb.f32.f16 s24, s17
-; CHECK-NEXT:    str.w r0, [r4, #75]
-; CHECK-NEXT:    vmov r9, s30
-; CHECK-NEXT:    vmov r8, s22
-; CHECK-NEXT:    vmov r6, s24
-; CHECK-NEXT:    mov r0, r3
+; CHECK-NEXT:    str.w r0, [r9, #50]
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s26, #0
-; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    it lt
@@ -4252,7 +4302,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    str.w r2, [r4, #58]
+; CHECK-NEXT:    str.w r2, [r9, #33]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4260,18 +4310,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #54]
+; CHECK-NEXT:    str.w r1, [r9, #29]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r4, #50]
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    str.w r0, [r9, #25]
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s24, #0
-; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    mov r4, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    it lt
@@ -4282,7 +4332,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    str.w r2, [r4, #33]
+; CHECK-NEXT:    str.w r2, [r9, #8]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4290,227 +4340,200 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r1, [r4, #29]
+; CHECK-NEXT:    str.w r1, [r9, #4]
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str.w r0, [r4, #25]
+; CHECK-NEXT:    str.w r0, [r9]
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    mov r8, r3
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    str r2, [r4, #8]
+; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    vcmp.f32 s30, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str r1, [r4, #4]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    and r0, r7, #15
+; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    orr.w r1, r0, r6, lsl #4
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    mov r10, r2
+; CHECK-NEXT:    str.w r1, [r9, #87]
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s30, #0
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, #0
+; CHECK-NEXT:    vcmp.f32 s28, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r6, r1, #28
-; CHECK-NEXT:    vcmp.f32 s30, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    orr.w r6, r6, r2, lsl #4
+; CHECK-NEXT:    movgt r5, #15
+; CHECK-NEXT:    and r0, r5, #15
+; CHECK-NEXT:    vcvtt.f32.f16 s28, s17
+; CHECK-NEXT:    orr.w r0, r0, r8, lsl #4
+; CHECK-NEXT:    str.w r0, [r9, #62]
+; CHECK-NEXT:    vmov r0, s28
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcmp.f32 s28, #0
+; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r6, [r4, #95]
+; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    lsrs r6, r0, #28
-; CHECK-NEXT:    orr.w r1, r6, r1, lsl #4
-; CHECK-NEXT:    vcmp.f32 s30, #0
-; CHECK-NEXT:    str.w r1, [r4, #91]
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    lsrs r1, r2, #28
-; CHECK-NEXT:    vcvtt.f32.f16 s30, s18
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    orr.w r2, r1, r3, lsl #4
-; CHECK-NEXT:    vmov r1, s30
-; CHECK-NEXT:    strb.w r2, [r4, #99]
-; CHECK-NEXT:    vcmp.f32 s28, #0
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    vcmp.f32 s28, s20
+; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r5, #15
-; CHECK-NEXT:    and r2, r5, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #87]
-; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    movgt r0, #15
+; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
+; CHECK-NEXT:    str.w r0, [r9, #37]
+; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s30, #0
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s17
+; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, #0
+; CHECK-NEXT:    vcmp.f32 s24, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r6, r1, #28
-; CHECK-NEXT:    vcmp.f32 s30, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    orr.w r6, r6, r2, lsl #4
+; CHECK-NEXT:    movgt r4, #15
+; CHECK-NEXT:    and r5, r4, #15
+; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    orr.w r5, r5, r0, lsl #4
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r6, [r4, #70]
+; CHECK-NEXT:    str.w r5, [r9, #12]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s30, s20
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    lsrs r6, r0, #28
-; CHECK-NEXT:    orr.w r1, r6, r1, lsl #4
-; CHECK-NEXT:    str.w r1, [r4, #66]
-; CHECK-NEXT:    vmov r1, s18
-; CHECK-NEXT:    vcmp.f32 s30, #0
-; CHECK-NEXT:    lsrs r2, r2, #28
+; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    lsrl r6, r11, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #4
+; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    orr.w r5, r11, r10, lsl #4
+; CHECK-NEXT:    str.w r5, [r9, #95]
+; CHECK-NEXT:    str.w r6, [r9, #91]
+; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb.w r2, [r4, #74]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r7, #15
-; CHECK-NEXT:    and r2, r7, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #62]
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    movgt r6, #15
+; CHECK-NEXT:    and r5, r6, #15
 ; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    lsrl r10, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    strb.w r10, [r9, #99]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r7, r1, #28
-; CHECK-NEXT:    vcmp.f32 s18, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    str.w r7, [r4, #45]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    lsrs r7, r0, #28
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    vmov q0[3], q0[1], r6, r7
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    orr.w r7, r7, r1, lsl #4
-; CHECK-NEXT:    vmov r1, s16
+; CHECK-NEXT:    vmov r5, s1
+; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    str.w r7, [r4, #41]
+; CHECK-NEXT:    lsrl r8, r5, #28
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r2, r2, #28
-; CHECK-NEXT:    vcmp.f32 s24, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    orr.w r2, r2, r3, lsl #4
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    orr.w r6, r5, r4, lsl #4
+; CHECK-NEXT:    and r5, r7, #15
+; CHECK-NEXT:    lsrl r4, r5, #28
+; CHECK-NEXT:    str.w r6, [r9, #70]
+; CHECK-NEXT:    str.w r8, [r9, #66]
+; CHECK-NEXT:    vcmp.f32 s28, #0
+; CHECK-NEXT:    strb.w r4, [r9, #74]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb.w r2, [r4, #49]
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r10, #15
-; CHECK-NEXT:    and r2, r10, #15
-; CHECK-NEXT:    orr.w r0, r2, r0, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    movgt r7, #15
+; CHECK-NEXT:    mov r12, r7
+; CHECK-NEXT:    vcmp.f32 s28, #0
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movgt.w r7, #-1
 ; CHECK-NEXT:    b.w .LBB50_2
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
@@ -4518,47 +4541,61 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    .long 0x717fffff @ float 1.26765052E+30
 ; CHECK-NEXT:    .p2align 1
 ; CHECK-NEXT:  .LBB50_2:
+; CHECK-NEXT:    vmov q0[3], q0[1], r7, r12
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; CHECK-NEXT:    vmov r5, s1
+; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    vcmp.f32 s28, #0
+; CHECK-NEXT:    lsrl r4, r5, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    lsrs r7, r1, #28
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT:    and r5, r12, #15
+; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    lsrl r6, r5, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    orr.w r7, r7, r2, lsl #4
-; CHECK-NEXT:    str r7, [r4, #20]
+; CHECK-NEXT:    str.w r7, [r9, #45]
+; CHECK-NEXT:    str.w r4, [r9, #41]
+; CHECK-NEXT:    strb.w r6, [r9, #49]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r3, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    lsrs r7, r0, #28
-; CHECK-NEXT:    orr.w r1, r7, r1, lsl #4
+; CHECK-NEXT:    movgt r3, #15
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    str r1, [r4, #16]
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    lsr.w r1, r2, #28
-; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    orr.w r1, r1, r3, lsl #4
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    vmov r1, s1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    strb r1, [r4, #24]
+; CHECK-NEXT:    lsrl r0, r1, #28
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r8, #15
-; CHECK-NEXT:    and r1, r8, #15
-; CHECK-NEXT:    orr.w r0, r1, r0, lsl #4
-; CHECK-NEXT:    str r0, [r4, #12]
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r9, #16]
+; CHECK-NEXT:    and r1, r3, #15
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    strb.w r2, [r9, #24]
+; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:  @ %bb.3:
     %x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f)
     ret <8 x i100> %x



More information about the llvm-commits mailing list