[llvm] df017ba - [TargetLowering] Don't use ISD::SELECT_CC in expandFP_TO_INT_SAT.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 29 10:23:19 PDT 2023


Author: Craig Topper
Date: 2023-04-29T10:23:08-07:00
New Revision: df017ba9d33f6e0d7d518abd397b0383ba271894

URL: https://github.com/llvm/llvm-project/commit/df017ba9d33f6e0d7d518abd397b0383ba271894
DIFF: https://github.com/llvm/llvm-project/commit/df017ba9d33f6e0d7d518abd397b0383ba271894.diff

LOG: [TargetLowering] Don't use ISD::SELECT_CC in expandFP_TO_INT_SAT.

This function gets called for vectors and ISD::SELECT_CC was never
intended to support vectors. Some updates were made to support
it when this function started getting used for vectors.

Overall, using separate ISD::SETCC and ISD::SELECT looks like an
improvement even for scalar.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D149481

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
    llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
    llvm/test/CodeGen/RISCV/double-convert.ll
    llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
    llvm/test/CodeGen/RISCV/float-convert.ll
    llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
    llvm/test/CodeGen/RISCV/half-convert.ll
    llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
    llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
    llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
    llvm/test/CodeGen/X86/fptosi-sat-scalar.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3580e131d7840..5763d4bd0b4a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10319,8 +10319,10 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
 
     // Otherwise, select 0 if Src is NaN.
     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
-    return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
-                           ISD::CondCode::SETUO);
+    EVT SetCCVT =
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+    SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+    return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
   }
 
   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
@@ -10334,13 +10336,16 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
 
   SDValue Select = FpToInt;
 
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
   // If Src ULT MinFloat, select MinInt. In particular, this also selects
   // MinInt if Src is NaN.
-  Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
-                           ISD::CondCode::SETULT);
+  SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
+  Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
   // If Src OGT MaxFloat, select MaxInt.
-  Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
-                           ISD::CondCode::SETOGT);
+  SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
+  Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
 
   // In the unsigned case we are done, because we mapped NaN to MinInt, which
   // is already zero.
@@ -10349,7 +10354,8 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
 
   // Otherwise, select 0 if Src is NaN.
   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
-  return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+  SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+  return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
 }
 
 SDValue TargetLowering::expandVectorSplice(SDNode *Node,

diff  --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 9544e395744a4..4b27e804e6df9 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -571,71 +571,61 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #12
 ; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI6_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #27
-; SOFT-NEXT:    lsls r5, r0, #27
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    lsls r1, r0, #27
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB6_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r6, r7
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB6_2:
-; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    movs r6, #0
 ; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB6_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r6, r7
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB6_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bne .LBB6_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:  .LBB6_6:
-; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB6_6:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB6_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    b .LBB6_9
 ; SOFT-NEXT:  .LBB6_8:
-; SOFT-NEXT:    ldr r5, .LCPI6_1
+; SOFT-NEXT:    ldr r3, .LCPI6_1
 ; SOFT-NEXT:  .LBB6_9:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI6_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB6_11
 ; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r5, .LCPI6_2
+; SOFT-NEXT:    ldr r3, .LCPI6_2
 ; SOFT-NEXT:  .LBB6_11:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB6_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:  .LBB6_13:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -699,72 +689,62 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #12
 ; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI7_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #223
-; SOFT-NEXT:    lsls r5, r0, #24
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    lsls r1, r0, #24
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB7_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r6, r7
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB7_2:
-; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    movs r6, #0
 ; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB7_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r6, r7
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB7_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bne .LBB7_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:  .LBB7_6:
-; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB7_6:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB7_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    b .LBB7_9
 ; SOFT-NEXT:  .LBB7_8:
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r5, r0, #31
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    lsls r3, r2, #31
 ; SOFT-NEXT:  .LBB7_9:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI7_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB7_11
 ; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r5, .LCPI7_1
+; SOFT-NEXT:    ldr r3, .LCPI7_1
 ; SOFT-NEXT:  .LBB7_11:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB7_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:  .LBB7_13:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -825,134 +805,110 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #20
 ; SOFT-NEXT:    sub sp, #20
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    ldr r1, .LCPI8_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #241
-; SOFT-NEXT:    lsls r5, r0, #24
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    lsls r1, r0, #24
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB8_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB8_2:
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r1, r6
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB8_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB8_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bne .LBB8_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB8_6:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB8_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB8_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB8_8:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    bne .LBB8_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB8_10:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB8_12
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB8_18
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB8_19
 ; SOFT-NEXT:  .LBB8_12:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB8_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:  .LBB8_13:
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB8_14:
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    bne .LBB8_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB8_16:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r7, r6
-; SOFT-NEXT:    bne .LBB8_18
+; SOFT-NEXT:    movs r4, #7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB8_20
 ; SOFT-NEXT:  @ %bb.17:
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB8_21
 ; SOFT-NEXT:  .LBB8_18:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    movs r5, #7
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB8_20
-; SOFT-NEXT:  @ %bb.19:
-; SOFT-NEXT:    mvns r0, r5
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB8_12
+; SOFT-NEXT:  .LBB8_19:
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB8_13
+; SOFT-NEXT:    b .LBB8_14
 ; SOFT-NEXT:  .LBB8_20:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB8_22
-; SOFT-NEXT:  @ %bb.21:
-; SOFT-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB8_22:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB8_24
-; SOFT-NEXT:  @ %bb.23:
-; SOFT-NEXT:    mov r6, r5
-; SOFT-NEXT:  .LBB8_24:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mvns r7, r4
+; SOFT-NEXT:  .LBB8_21:
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB8_23
+; SOFT-NEXT:  @ %bb.22:
+; SOFT-NEXT:    mov r4, r7
+; SOFT-NEXT:  .LBB8_23:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB8_25
+; SOFT-NEXT:  @ %bb.24:
+; SOFT-NEXT:    mov r5, r4
+; SOFT-NEXT:  .LBB8_25:
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.25:
+; SOFT-NEXT:  @ %bb.26:
 ; SOFT-NEXT:  .LCPI8_0:
 ; SOFT-NEXT:    .long 1895825407 @ 0x70ffffff
 ;
@@ -1031,136 +987,111 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #20
 ; SOFT-NEXT:    sub sp, #20
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    ldr r1, .LCPI9_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #255
-; SOFT-NEXT:    lsls r7, r0, #24
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    lsls r1, r0, #24
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
 ; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB9_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB9_2:
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r1, r6
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB9_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB9_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bne .LBB9_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB9_6:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB9_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB9_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB9_8:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    bne .LBB9_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB9_10:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB9_12
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB9_18
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB9_19
 ; SOFT-NEXT:  .LBB9_12:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB9_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB9_13:
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB9_14:
-; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    bne .LBB9_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB9_16:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r5, r6
-; SOFT-NEXT:    bne .LBB9_18
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB9_20
 ; SOFT-NEXT:  @ %bb.17:
-; SOFT-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB9_21
+; SOFT-NEXT:    b .LBB9_22
 ; SOFT-NEXT:  .LBB9_18:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB9_20
-; SOFT-NEXT:  @ %bb.19:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
-; SOFT-NEXT:    b .LBB9_21
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB9_12
+; SOFT-NEXT:  .LBB9_19:
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB9_13
+; SOFT-NEXT:    b .LBB9_14
 ; SOFT-NEXT:  .LBB9_20:
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r7, r0, #31
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB9_22
 ; SOFT-NEXT:  .LBB9_21:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB9_23
-; SOFT-NEXT:  @ %bb.22:
-; SOFT-NEXT:    ldr r7, .LCPI9_1
-; SOFT-NEXT:  .LBB9_23:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB9_25
-; SOFT-NEXT:  @ %bb.24:
-; SOFT-NEXT:    mov r6, r7
-; SOFT-NEXT:  .LBB9_25:
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    ldr r6, .LCPI9_1
+; SOFT-NEXT:  .LBB9_22:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB9_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r5, r6
+; SOFT-NEXT:  .LBB9_24:
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.26:
+; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI9_0:
 ; SOFT-NEXT:    .long 2130706431 @ 0x7effffff
 ; SOFT-NEXT:  .LCPI9_1:
@@ -1891,94 +1822,77 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    movs r0, #15
 ; SOFT-NEXT:    mvns r2, r0
 ; SOFT-NEXT:    ldr r3, .LCPI16_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #195
 ; SOFT-NEXT:    lsls r3, r0, #24
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB16_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB16_2:
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    bne .LBB16_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    b .LBB16_5
 ; SOFT-NEXT:  .LBB16_4:
-; SOFT-NEXT:    mvns r7, r6
+; SOFT-NEXT:    mvns r0, r4
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB16_5:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB16_7
 ; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB16_7:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI16_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB16_9
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB16_9
 ; SOFT-NEXT:  @ %bb.8:
-; SOFT-NEXT:    ldr r0, .LCPI16_1
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB16_10
 ; SOFT-NEXT:  .LBB16_9:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB16_11
-; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    b .LBB16_12
-; SOFT-NEXT:  .LBB16_11:
-; SOFT-NEXT:    ldr r7, .LCPI16_2
+; SOFT-NEXT:    ldr r3, .LCPI16_1
+; SOFT-NEXT:  .LBB16_10:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB16_12
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    ldr r3, .LCPI16_2
 ; SOFT-NEXT:  .LBB16_12:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB16_14
 ; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    mov r6, r7
+; SOFT-NEXT:    mov r4, r3
 ; SOFT-NEXT:  .LBB16_14:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.15:
@@ -2075,85 +1989,67 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #12
 ; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r5, r1
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r6, r1
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    mvns r2, r4
 ; SOFT-NEXT:    ldr r3, .LCPI17_0
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    ldr r3, .LCPI17_1
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB17_2
 ; SOFT-NEXT:  @ %bb.1:
 ; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB17_2:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB17_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB17_4:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB17_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB17_6:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI17_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI17_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB17_8
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB17_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r0, r0, #31
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB17_9
 ; SOFT-NEXT:  .LBB17_8:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB17_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    b .LBB17_11
-; SOFT-NEXT:  .LBB17_10:
-; SOFT-NEXT:    ldr r7, .LCPI17_2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    lsls r3, r2, #31
+; SOFT-NEXT:  .LBB17_9:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB17_11
+; SOFT-NEXT:  @ %bb.10:
+; SOFT-NEXT:    ldr r3, .LCPI17_2
 ; SOFT-NEXT:  .LBB17_11:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB17_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r4, r7
+; SOFT-NEXT:    mov r4, r3
 ; SOFT-NEXT:  .LBB17_13:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
@@ -2262,16 +2158,16 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    .pad #28
+; SOFT-NEXT:    sub sp, #28
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    mvns r2, r4
 ; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; SOFT-NEXT:    ldr r3, .LCPI18_1
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
@@ -2281,145 +2177,99 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB18_2
 ; SOFT-NEXT:  @ %bb.1:
 ; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB18_2:
-; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
-; SOFT-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB18_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:  .LBB18_4:
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB18_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB18_6:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI18_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB18_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB18_8:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB18_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB18_10:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB18_12
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    beq .LBB18_18
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB18_19
 ; SOFT-NEXT:  .LBB18_12:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI18_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB18_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB18_13:
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB18_14:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bne .LBB18_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB18_16:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB18_18
+; SOFT-NEXT:    movs r5, #7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB18_20
 ; SOFT-NEXT:  @ %bb.17:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_21
+; SOFT-NEXT:    b .LBB18_22
 ; SOFT-NEXT:  .LBB18_18:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI18_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    movs r7, #7
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB18_20
-; SOFT-NEXT:  @ %bb.19:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    b .LBB18_21
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB18_12
+; SOFT-NEXT:  .LBB18_19:
+; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_13
+; SOFT-NEXT:    b .LBB18_14
 ; SOFT-NEXT:  .LBB18_20:
-; SOFT-NEXT:    mvns r0, r7
+; SOFT-NEXT:    mvns r7, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_22
 ; SOFT-NEXT:  .LBB18_21:
-; SOFT-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    bne .LBB18_23
-; SOFT-NEXT:  @ %bb.22:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:  .LBB18_23:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB18_25
-; SOFT-NEXT:  @ %bb.24:
-; SOFT-NEXT:    mov r4, r7
-; SOFT-NEXT:  .LBB18_25:
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:  .LBB18_22:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB18_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:  .LBB18_24:
 ; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    add sp, #28
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.26:
+; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI18_0:
 ; SOFT-NEXT:    .long 1176502271 @ 0x461fffff
 ; SOFT-NEXT:  .LCPI18_1:
@@ -2569,16 +2419,16 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    .pad #28
+; SOFT-NEXT:    sub sp, #28
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    mvns r2, r4
 ; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; SOFT-NEXT:    ldr r3, .LCPI19_1
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
@@ -2588,145 +2438,99 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB19_2
 ; SOFT-NEXT:  @ %bb.1:
 ; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB19_2:
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB19_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:  .LBB19_4:
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB19_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB19_6:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB19_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB19_8:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB19_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB19_10:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB19_12
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    beq .LBB19_18
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB19_19
 ; SOFT-NEXT:  .LBB19_12:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB19_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:  .LBB19_13:
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB19_14:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bne .LBB19_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB19_16:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB19_18
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB19_20
 ; SOFT-NEXT:  @ %bb.17:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB19_21
+; SOFT-NEXT:    b .LBB19_22
 ; SOFT-NEXT:  .LBB19_18:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_1
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB19_20
-; SOFT-NEXT:  @ %bb.19:
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r0, r0, #31
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:  .LBB19_20:
+; SOFT-NEXT:    mov r1, r2
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB19_22
-; SOFT-NEXT:  @ %bb.21:
-; SOFT-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    b .LBB19_23
+; SOFT-NEXT:    bne .LBB19_12
+; SOFT-NEXT:  .LBB19_19:
+; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB19_13
+; SOFT-NEXT:    b .LBB19_14
+; SOFT-NEXT:  .LBB19_20:
+; SOFT-NEXT:    movs r5, #1
+; SOFT-NEXT:    lsls r5, r5, #31
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB19_22
+; SOFT-NEXT:  .LBB19_21:
+; SOFT-NEXT:    ldr r5, .LCPI19_2
 ; SOFT-NEXT:  .LBB19_22:
-; SOFT-NEXT:    ldr r7, .LCPI19_2
-; SOFT-NEXT:  .LBB19_23:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB19_25
-; SOFT-NEXT:  @ %bb.24:
-; SOFT-NEXT:    mov r4, r7
-; SOFT-NEXT:  .LBB19_25:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB19_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:  .LBB19_24:
 ; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    add sp, #28
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.26:
+; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI19_0:
 ; SOFT-NEXT:    .long 1205862399 @ 0x47dfffff
 ; SOFT-NEXT:  .LCPI19_1:
@@ -3485,71 +3289,61 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI26_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #27
-; SOFT-NEXT:    lsls r5, r0, #27
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    lsls r1, r0, #27
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB26_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r6, r7
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB26_2:
-; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    movs r6, #0
 ; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB26_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r6, r7
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB26_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bne .LBB26_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:  .LBB26_6:
-; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB26_6:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB26_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    b .LBB26_9
 ; SOFT-NEXT:  .LBB26_8:
-; SOFT-NEXT:    ldr r5, .LCPI26_1
+; SOFT-NEXT:    ldr r3, .LCPI26_1
 ; SOFT-NEXT:  .LBB26_9:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI26_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB26_11
 ; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r5, .LCPI26_2
+; SOFT-NEXT:    ldr r3, .LCPI26_2
 ; SOFT-NEXT:  .LBB26_11:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB26_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:  .LBB26_13:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -3660,72 +3454,62 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI27_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #223
-; SOFT-NEXT:    lsls r5, r0, #24
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    lsls r1, r0, #24
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB27_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r6, r7
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB27_2:
-; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    movs r6, #0
 ; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB27_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r6, r7
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB27_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bne .LBB27_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:  .LBB27_6:
-; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB27_6:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB27_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    b .LBB27_9
 ; SOFT-NEXT:  .LBB27_8:
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r5, r0, #31
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    lsls r3, r2, #31
 ; SOFT-NEXT:  .LBB27_9:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI27_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB27_11
 ; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r5, .LCPI27_1
+; SOFT-NEXT:    ldr r3, .LCPI27_1
 ; SOFT-NEXT:  .LBB27_11:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB27_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:  .LBB27_13:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -3834,134 +3618,110 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #20
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    ldr r1, .LCPI28_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #241
-; SOFT-NEXT:    lsls r5, r0, #24
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    lsls r1, r0, #24
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB28_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB28_2:
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r1, r6
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB28_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB28_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bne .LBB28_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB28_6:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB28_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB28_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB28_8:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    bne .LBB28_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB28_10:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB28_12
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB28_18
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB28_19
 ; SOFT-NEXT:  .LBB28_12:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB28_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:  .LBB28_13:
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB28_14:
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    bne .LBB28_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB28_16:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r7, r6
-; SOFT-NEXT:    bne .LBB28_18
+; SOFT-NEXT:    movs r4, #7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB28_20
 ; SOFT-NEXT:  @ %bb.17:
-; SOFT-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB28_21
 ; SOFT-NEXT:  .LBB28_18:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    movs r5, #7
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB28_20
-; SOFT-NEXT:  @ %bb.19:
-; SOFT-NEXT:    mvns r0, r5
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB28_12
+; SOFT-NEXT:  .LBB28_19:
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB28_13
+; SOFT-NEXT:    b .LBB28_14
 ; SOFT-NEXT:  .LBB28_20:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB28_22
-; SOFT-NEXT:  @ %bb.21:
-; SOFT-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB28_22:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB28_24
-; SOFT-NEXT:  @ %bb.23:
-; SOFT-NEXT:    mov r6, r5
-; SOFT-NEXT:  .LBB28_24:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mvns r7, r4
+; SOFT-NEXT:  .LBB28_21:
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB28_23
+; SOFT-NEXT:  @ %bb.22:
+; SOFT-NEXT:    mov r4, r7
+; SOFT-NEXT:  .LBB28_23:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB28_25
+; SOFT-NEXT:  @ %bb.24:
+; SOFT-NEXT:    mov r5, r4
+; SOFT-NEXT:  .LBB28_25:
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.25:
+; SOFT-NEXT:  @ %bb.26:
 ; SOFT-NEXT:  .LCPI28_0:
 ; SOFT-NEXT:    .long 1895825407 @ 0x70ffffff
 ;
@@ -4112,136 +3872,111 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #20
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    ldr r1, .LCPI29_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #255
-; SOFT-NEXT:    lsls r7, r0, #24
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    lsls r1, r0, #24
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
 ; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB29_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB29_2:
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r1, r6
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB29_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB29_4:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bne .LBB29_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB29_6:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB29_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB29_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB29_8:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    bne .LBB29_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB29_10:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB29_12
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB29_18
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB29_19
 ; SOFT-NEXT:  .LBB29_12:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB29_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB29_13:
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB29_14:
-; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    bne .LBB29_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB29_16:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r5, r6
-; SOFT-NEXT:    bne .LBB29_18
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB29_20
 ; SOFT-NEXT:  @ %bb.17:
-; SOFT-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB29_21
+; SOFT-NEXT:    b .LBB29_22
 ; SOFT-NEXT:  .LBB29_18:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB29_20
-; SOFT-NEXT:  @ %bb.19:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
-; SOFT-NEXT:    b .LBB29_21
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB29_12
+; SOFT-NEXT:  .LBB29_19:
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB29_13
+; SOFT-NEXT:    b .LBB29_14
 ; SOFT-NEXT:  .LBB29_20:
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r7, r0, #31
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB29_22
 ; SOFT-NEXT:  .LBB29_21:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB29_23
-; SOFT-NEXT:  @ %bb.22:
-; SOFT-NEXT:    ldr r7, .LCPI29_1
-; SOFT-NEXT:  .LBB29_23:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB29_25
-; SOFT-NEXT:  @ %bb.24:
-; SOFT-NEXT:    mov r6, r7
-; SOFT-NEXT:  .LBB29_25:
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    ldr r6, .LCPI29_1
+; SOFT-NEXT:  .LBB29_22:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB29_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r5, r6
+; SOFT-NEXT:  .LBB29_24:
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.26:
+; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI29_0:
 ; SOFT-NEXT:    .long 2130706431 @ 0x7effffff
 ; SOFT-NEXT:  .LCPI29_1:

diff  --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
index 0fc1eabe9b9f0..3438fb113015c 100644
--- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
@@ -443,48 +443,45 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI6_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
 ; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    bne .LBB6_2
+; SOFT-NEXT:    beq .LBB6_5
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB6_6
 ; SOFT-NEXT:  .LBB6_2:
-; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB6_7
+; SOFT-NEXT:  .LBB6_3:
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB6_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r0, r7
+; SOFT-NEXT:    bne .LBB6_8
 ; SOFT-NEXT:  .LBB6_4:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    ldr r1, .LCPI6_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB6_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB6_5:
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB6_2
 ; SOFT-NEXT:  .LBB6_6:
+; SOFT-NEXT:    mvns r0, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB6_3
+; SOFT-NEXT:  .LBB6_7:
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB6_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, .LCPI6_1
+; SOFT-NEXT:    beq .LBB6_4
 ; SOFT-NEXT:  .LBB6_8:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    ldr r1, .LCPI6_1
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -529,54 +526,50 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI7_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB7_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB7_2:
-; SOFT-NEXT:    mvns r5, r7
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bne .LBB7_4
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    beq .LBB7_7
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB7_8
 ; SOFT-NEXT:  .LBB7_4:
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI7_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB7_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB7_5:
+; SOFT-NEXT:    mov r2, r1
 ; SOFT-NEXT:  .LBB7_6:
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB7_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB7_7:
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB7_4
 ; SOFT-NEXT:  .LBB7_8:
 ; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB7_5
+; SOFT-NEXT:    b .LBB7_6
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI7_0:
@@ -622,94 +615,76 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI8_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __fixunssfti
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB8_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB8_2:
-; SOFT-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mvns r7, r6
+; SOFT-NEXT:    mvns r2, r6
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r6, r2
 ; SOFT-NEXT:    bne .LBB8_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:  .LBB8_4:
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB8_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB8_6:
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB8_8
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    beq .LBB8_13
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB8_14
 ; SOFT-NEXT:  .LBB8_8:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB8_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB8_15
+; SOFT-NEXT:  .LBB8_9:
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB8_16
 ; SOFT-NEXT:  .LBB8_10:
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB8_12
-; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    beq .LBB8_12
+; SOFT-NEXT:  .LBB8_11:
+; SOFT-NEXT:    movs r3, #15
 ; SOFT-NEXT:  .LBB8_12:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB8_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB8_13:
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB8_8
 ; SOFT-NEXT:  .LBB8_14:
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB8_16
-; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    movs r3, #15
+; SOFT-NEXT:    bne .LBB8_9
+; SOFT-NEXT:  .LBB8_15:
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB8_10
 ; SOFT-NEXT:  .LBB8_16:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB8_11
+; SOFT-NEXT:    b .LBB8_12
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI8_0:
@@ -771,96 +746,75 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI9_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __fixunssfti
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB9_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB9_2:
-; SOFT-NEXT:    mvns r6, r7
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mvns r6, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bne .LBB9_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:  .LBB9_4:
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB9_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB9_6:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bne .LBB9_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:  .LBB9_8:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB9_8:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB9_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB9_10:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB9_12
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    beq .LBB9_15
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB9_16
 ; SOFT-NEXT:  .LBB9_12:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB9_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:  .LBB9_13:
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB9_14:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB9_16
-; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:  .LBB9_16:
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    mov r2, r7
 ; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB9_15:
+; SOFT-NEXT:    mov r7, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB9_12
+; SOFT-NEXT:  .LBB9_16:
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB9_13
+; SOFT-NEXT:    b .LBB9_14
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI9_0:
@@ -1431,64 +1385,54 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #7
 ; SOFT-NEXT:    mvns r2, r0
 ; SOFT-NEXT:    ldr r3, .LCPI16_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    mov r3, r7
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_d2ulz
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB16_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB16_2:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB16_4
+; SOFT-NEXT:    bne .LBB16_6
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r0, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB16_7
 ; SOFT-NEXT:  .LBB16_4:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, .LCPI16_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB16_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB16_8
+; SOFT-NEXT:  .LBB16_5:
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB16_6:
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    beq .LBB16_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r1, .LCPI16_1
+; SOFT-NEXT:    mvns r0, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB16_4
+; SOFT-NEXT:  .LBB16_7:
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB16_5
 ; SOFT-NEXT:  .LBB16_8:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #12
+; SOFT-NEXT:    ldr r1, .LCPI16_1
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
@@ -1554,66 +1498,56 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
-; SOFT-NEXT:    mov r5, r1
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:    ldr r3, .LCPI17_0
-; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_d2ulz
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB17_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB17_2:
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bne .LBB17_4
+; SOFT-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    beq .LBB17_7
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB17_8
 ; SOFT-NEXT:  .LBB17_4:
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI17_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bne .LBB17_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:  .LBB17_5:
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:  .LBB17_6:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB17_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB17_8:
 ; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB17_7:
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB17_4
+; SOFT-NEXT:  .LBB17_8:
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    beq .LBB17_5
+; SOFT-NEXT:    b .LBB17_6
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI17_0:
@@ -1693,117 +1627,82 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #28
-; SOFT-NEXT:    sub sp, #28
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mvns r2, r5
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mvns r2, r6
 ; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __fixunsdfti
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB18_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r4, r6
 ; SOFT-NEXT:  .LBB18_2:
-; SOFT-NEXT:    str r3, [sp, #24] @ 4-byte Spill
-; SOFT-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB18_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB18_4:
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB18_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r7, r6
 ; SOFT-NEXT:  .LBB18_6:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB18_8
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    beq .LBB18_13
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_14
 ; SOFT-NEXT:  .LBB18_8:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB18_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB18_15
+; SOFT-NEXT:  .LBB18_9:
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_16
 ; SOFT-NEXT:  .LBB18_10:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB18_12
-; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB18_12
+; SOFT-NEXT:  .LBB18_11:
+; SOFT-NEXT:    movs r3, #15
 ; SOFT-NEXT:  .LBB18_12:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB18_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB18_13:
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_8
 ; SOFT-NEXT:  .LBB18_14:
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB18_16
-; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    movs r3, #15
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB18_9
+; SOFT-NEXT:  .LBB18_15:
+; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_10
 ; SOFT-NEXT:  .LBB18_16:
-; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #28
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB18_11
+; SOFT-NEXT:    b .LBB18_12
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI18_0:
@@ -1915,117 +1814,77 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #28
-; SOFT-NEXT:    sub sp, #28
-; SOFT-NEXT:    mov r5, r1
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __fixunsdfti
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB19_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r6
 ; SOFT-NEXT:  .LBB19_2:
-; SOFT-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r4, [sp, #24] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB19_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB19_4:
-; SOFT-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB19_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:  .LBB19_6:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB19_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:  .LBB19_8:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:  .LBB19_6:
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    bne .LBB19_8
+; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:  .LBB19_8:
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB19_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB19_10:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    mov r5, r4
 ; SOFT-NEXT:    bne .LBB19_12
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    mov r5, r2
 ; SOFT-NEXT:  .LBB19_12:
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB19_14
 ; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:  .LBB19_14:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB19_16
 ; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT:    mov r4, r3
 ; SOFT-NEXT:  .LBB19_16:
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    add sp, #28
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
@@ -2617,48 +2476,45 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI26_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
 ; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    bne .LBB26_2
+; SOFT-NEXT:    beq .LBB26_5
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB26_6
 ; SOFT-NEXT:  .LBB26_2:
-; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB26_7
+; SOFT-NEXT:  .LBB26_3:
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB26_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r0, r7
+; SOFT-NEXT:    bne .LBB26_8
 ; SOFT-NEXT:  .LBB26_4:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    ldr r1, .LCPI26_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB26_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB26_5:
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB26_2
 ; SOFT-NEXT:  .LBB26_6:
+; SOFT-NEXT:    mvns r0, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB26_3
+; SOFT-NEXT:  .LBB26_7:
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB26_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, .LCPI26_1
+; SOFT-NEXT:    beq .LBB26_4
 ; SOFT-NEXT:  .LBB26_8:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    ldr r1, .LCPI26_1
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -2733,56 +2589,52 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI27_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB27_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB27_2:
-; SOFT-NEXT:    mvns r5, r7
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bne .LBB27_4
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    beq .LBB27_7
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB27_8
 ; SOFT-NEXT:  .LBB27_4:
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI27_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB27_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB27_5:
+; SOFT-NEXT:    mov r2, r1
 ; SOFT-NEXT:  .LBB27_6:
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB27_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB27_7:
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB27_4
 ; SOFT-NEXT:  .LBB27_8:
 ; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB27_5
+; SOFT-NEXT:    b .LBB27_6
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI27_0:
@@ -2863,96 +2715,78 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI28_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __fixunssfti
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB28_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB28_2:
-; SOFT-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mvns r7, r6
+; SOFT-NEXT:    mvns r2, r6
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r6, r2
 ; SOFT-NEXT:    bne .LBB28_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:  .LBB28_4:
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB28_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB28_6:
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB28_8
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    beq .LBB28_13
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB28_14
 ; SOFT-NEXT:  .LBB28_8:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB28_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB28_15
+; SOFT-NEXT:  .LBB28_9:
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB28_16
 ; SOFT-NEXT:  .LBB28_10:
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB28_12
-; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    beq .LBB28_12
+; SOFT-NEXT:  .LBB28_11:
+; SOFT-NEXT:    movs r3, #15
 ; SOFT-NEXT:  .LBB28_12:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB28_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB28_13:
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB28_8
 ; SOFT-NEXT:  .LBB28_14:
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB28_16
-; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    movs r3, #15
+; SOFT-NEXT:    bne .LBB28_9
+; SOFT-NEXT:  .LBB28_15:
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB28_10
 ; SOFT-NEXT:  .LBB28_16:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB28_11
+; SOFT-NEXT:    b .LBB28_12
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI28_0:
@@ -3065,98 +2899,77 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    ldr r1, .LCPI29_0
 ; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __fixunssfti
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB29_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB29_2:
-; SOFT-NEXT:    mvns r6, r7
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mvns r6, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bne .LBB29_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:  .LBB29_4:
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB29_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB29_6:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bne .LBB29_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:  .LBB29_8:
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB29_8:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB29_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB29_10:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB29_12
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    beq .LBB29_15
 ; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB29_16
 ; SOFT-NEXT:  .LBB29_12:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB29_14
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:  .LBB29_13:
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB29_14:
-; SOFT-NEXT:    cmp r5, #0
-; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB29_16
-; SOFT-NEXT:  @ %bb.15:
-; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:  .LBB29_16:
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    mov r2, r7
 ; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB29_15:
+; SOFT-NEXT:    mov r7, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB29_12
+; SOFT-NEXT:  .LBB29_16:
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB29_13
+; SOFT-NEXT:    b .LBB29_14
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI29_0:

diff  --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index b4f9e3b9e2a79..f207d8b615ff8 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -538,27 +538,27 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi at plt
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    bnez s0, .LBB12_2
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB12_2
 ; RV32IFD-NEXT:  # %bb.1: # %start
-; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB12_2: # %start
-; RV32IFD-NEXT:    lui a2, %hi(.LCPI12_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI12_1)(a2)
-; RV32IFD-NEXT:    flt.d a2, fa5, fs0
-; RV32IFD-NEXT:    beqz a2, .LBB12_4
+; RV32IFD-NEXT:    lui a1, %hi(.LCPI12_1)
+; RV32IFD-NEXT:    fld fa5, %lo(.LCPI12_1)(a1)
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB12_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a1, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB12_4: # %start
-; RV32IFD-NEXT:    feq.d a3, fs0, fs0
-; RV32IFD-NEXT:    seqz a3, a3
-; RV32IFD-NEXT:    addi a3, a3, -1
-; RV32IFD-NEXT:    and a1, a3, a1
-; RV32IFD-NEXT:    neg a2, a2
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    feq.d a1, fs0, fs0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -584,32 +584,30 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s2, 278016
-; RV32I-NEXT:    addi s2, s2, -1
+; RV32I-NEXT:    lui a3, 278016
+; RV32I-NEXT:    addi a3, a3, -1
 ; RV32I-NEXT:    li a2, -1
-; RV32I-NEXT:    mv a3, s2
 ; RV32I-NEXT:    call __gtdf2 at plt
-; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a3, 802304
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gedf2 at plt
-; RV32I-NEXT:    mv s6, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __fixdfdi at plt
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    mv s5, a1
 ; RV32I-NEXT:    lui a0, 524288
-; RV32I-NEXT:    bgez s6, .LBB12_2
+; RV32I-NEXT:    bgez s3, .LBB12_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    lui s5, 524288
 ; RV32I-NEXT:  .LBB12_2: # %start
-; RV32I-NEXT:    blez s4, .LBB12_4
+; RV32I-NEXT:    blez s2, .LBB12_4
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    addi s5, a0, -1
 ; RV32I-NEXT:  .LBB12_4: # %start
@@ -620,32 +618,14 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    call __unorddf2 at plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and s4, a0, s5
-; RV32I-NEXT:    lui a3, 802304
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    li a2, 0
-; RV32I-NEXT:    call __gedf2 at plt
-; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and s3, a0, s3
-; RV32I-NEXT:    li a2, -1
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    call __gtdf2 at plt
-; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    or s2, a0, s3
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:    call __unorddf2 at plt
-; RV32I-NEXT:    snez a0, a0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and a0, a0, s2
-; RV32I-NEXT:    mv a1, s4
+; RV32I-NEXT:    and a1, a0, s5
+; RV32I-NEXT:    slti a2, s3, 0
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a2, a2, s4
+; RV32I-NEXT:    sgtz a3, s2
+; RV32I-NEXT:    neg a3, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -653,7 +633,6 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -785,54 +764,33 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s2, 278272
-; RV32I-NEXT:    addi s2, s2, -1
+; RV32I-NEXT:    lui a3, 278272
+; RV32I-NEXT:    addi a3, a3, -1
 ; RV32I-NEXT:    li a2, -1
-; RV32I-NEXT:    mv a3, s2
 ; RV32I-NEXT:    call __gtdf2 at plt
 ; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg s4, a0
+; RV32I-NEXT:    neg s2, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __gedf2 at plt
 ; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi s5, a0, -1
+; RV32I-NEXT:    addi s3, a0, -1
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __fixunsdfdi at plt
-; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    and a0, s5, a0
-; RV32I-NEXT:    or s4, s4, a0
-; RV32I-NEXT:    li a2, -1
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    call __gtdf2 at plt
-; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg s2, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    li a2, 0
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    call __gedf2 at plt
-; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and a0, a0, s3
-; RV32I-NEXT:    or a1, s2, a0
-; RV32I-NEXT:    mv a0, s4
+; RV32I-NEXT:    and a0, s3, a0
+; RV32I-NEXT:    or a0, s2, a0
+; RV32I-NEXT:    and a1, s3, a1
+; RV32I-NEXT:    or a1, s2, a1
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1288,13 +1246,12 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI26_0)(a0)
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI26_1)
 ; RV32IFD-NEXT:    fld fa4, %lo(.LCPI26_1)(a0)
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    neg a0, a0
 ; RV32IFD-NEXT:    fmax.d fa5, fa0, fa5
 ; RV32IFD-NEXT:    fmin.d fa5, fa5, fa4
-; RV32IFD-NEXT:    fcvt.w.d a0, fa5, rtz
-; RV32IFD-NEXT:    feq.d a1, fa0, fa0
-; RV32IFD-NEXT:    seqz a1, a1
-; RV32IFD-NEXT:    addi a1, a1, -1
-; RV32IFD-NEXT:    and a0, a1, a0
+; RV32IFD-NEXT:    fcvt.w.d a1, fa5, rtz
+; RV32IFD-NEXT:    and a0, a0, a1
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fcvt_w_s_sat_i16:
@@ -1303,13 +1260,12 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV64IFD-NEXT:    fld fa5, %lo(.LCPI26_0)(a0)
 ; RV64IFD-NEXT:    lui a0, %hi(.LCPI26_1)
 ; RV64IFD-NEXT:    fld fa4, %lo(.LCPI26_1)(a0)
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT:    neg a0, a0
 ; RV64IFD-NEXT:    fmin.d fa5, fa5, fa4
-; RV64IFD-NEXT:    fcvt.l.d a0, fa5, rtz
-; RV64IFD-NEXT:    feq.d a1, fa0, fa0
-; RV64IFD-NEXT:    seqz a1, a1
-; RV64IFD-NEXT:    addi a1, a1, -1
-; RV64IFD-NEXT:    and a0, a1, a0
+; RV64IFD-NEXT:    fcvt.l.d a1, fa5, rtz
+; RV64IFD-NEXT:    and a0, a0, a1
 ; RV64IFD-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -1595,13 +1551,12 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI30_0)(a0)
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI30_1)
 ; RV32IFD-NEXT:    fld fa4, %lo(.LCPI30_1)(a0)
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    neg a0, a0
 ; RV32IFD-NEXT:    fmax.d fa5, fa0, fa5
 ; RV32IFD-NEXT:    fmin.d fa5, fa5, fa4
-; RV32IFD-NEXT:    fcvt.w.d a0, fa5, rtz
-; RV32IFD-NEXT:    feq.d a1, fa0, fa0
-; RV32IFD-NEXT:    seqz a1, a1
-; RV32IFD-NEXT:    addi a1, a1, -1
-; RV32IFD-NEXT:    and a0, a1, a0
+; RV32IFD-NEXT:    fcvt.w.d a1, fa5, rtz
+; RV32IFD-NEXT:    and a0, a0, a1
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fcvt_w_s_sat_i8:
@@ -1610,13 +1565,12 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV64IFD-NEXT:    fld fa5, %lo(.LCPI30_0)(a0)
 ; RV64IFD-NEXT:    lui a0, %hi(.LCPI30_1)
 ; RV64IFD-NEXT:    fld fa4, %lo(.LCPI30_1)(a0)
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV64IFD-NEXT:    fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT:    neg a0, a0
 ; RV64IFD-NEXT:    fmin.d fa5, fa5, fa4
-; RV64IFD-NEXT:    fcvt.l.d a0, fa5, rtz
-; RV64IFD-NEXT:    feq.d a1, fa0, fa0
-; RV64IFD-NEXT:    seqz a1, a1
-; RV64IFD-NEXT:    addi a1, a1, -1
-; RV64IFD-NEXT:    and a0, a1, a0
+; RV64IFD-NEXT:    fcvt.l.d a1, fa5, rtz
+; RV64IFD-NEXT:    and a0, a0, a1
 ; RV64IFD-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i8:

diff  --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
index fa67e2a4616b4..03e646b7ac173 100644
--- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -31,27 +31,27 @@ define i64 @test_floor_si64(double %x) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi at plt
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    bnez s0, .LBB1_2
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB1_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB1_2:
-; RV32IFD-NEXT:    lui a2, %hi(.LCPI1_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI1_1)(a2)
-; RV32IFD-NEXT:    flt.d a2, fa5, fs0
-; RV32IFD-NEXT:    beqz a2, .LBB1_4
+; RV32IFD-NEXT:    lui a1, %hi(.LCPI1_1)
+; RV32IFD-NEXT:    fld fa5, %lo(.LCPI1_1)(a1)
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB1_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a1, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB1_4:
-; RV32IFD-NEXT:    feq.d a3, fs0, fs0
-; RV32IFD-NEXT:    seqz a3, a3
-; RV32IFD-NEXT:    addi a3, a3, -1
-; RV32IFD-NEXT:    and a1, a3, a1
-; RV32IFD-NEXT:    neg a2, a2
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    feq.d a1, fs0, fs0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -151,27 +151,27 @@ define i64 @test_ceil_si64(double %x) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi at plt
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    bnez s0, .LBB5_2
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB5_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB5_2:
-; RV32IFD-NEXT:    lui a2, %hi(.LCPI5_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI5_1)(a2)
-; RV32IFD-NEXT:    flt.d a2, fa5, fs0
-; RV32IFD-NEXT:    beqz a2, .LBB5_4
+; RV32IFD-NEXT:    lui a1, %hi(.LCPI5_1)
+; RV32IFD-NEXT:    fld fa5, %lo(.LCPI5_1)(a1)
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB5_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a1, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB5_4:
-; RV32IFD-NEXT:    feq.d a3, fs0, fs0
-; RV32IFD-NEXT:    seqz a3, a3
-; RV32IFD-NEXT:    addi a3, a3, -1
-; RV32IFD-NEXT:    and a1, a3, a1
-; RV32IFD-NEXT:    neg a2, a2
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    feq.d a1, fs0, fs0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -271,27 +271,27 @@ define i64 @test_trunc_si64(double %x) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi at plt
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    bnez s0, .LBB9_2
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB9_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB9_2:
-; RV32IFD-NEXT:    lui a2, %hi(.LCPI9_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI9_1)(a2)
-; RV32IFD-NEXT:    flt.d a2, fa5, fs0
-; RV32IFD-NEXT:    beqz a2, .LBB9_4
+; RV32IFD-NEXT:    lui a1, %hi(.LCPI9_1)
+; RV32IFD-NEXT:    fld fa5, %lo(.LCPI9_1)(a1)
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB9_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a1, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB9_4:
-; RV32IFD-NEXT:    feq.d a3, fs0, fs0
-; RV32IFD-NEXT:    seqz a3, a3
-; RV32IFD-NEXT:    addi a3, a3, -1
-; RV32IFD-NEXT:    and a1, a3, a1
-; RV32IFD-NEXT:    neg a2, a2
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    feq.d a1, fs0, fs0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -391,27 +391,27 @@ define i64 @test_round_si64(double %x) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi at plt
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    bnez s0, .LBB13_2
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB13_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB13_2:
-; RV32IFD-NEXT:    lui a2, %hi(.LCPI13_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI13_1)(a2)
-; RV32IFD-NEXT:    flt.d a2, fa5, fs0
-; RV32IFD-NEXT:    beqz a2, .LBB13_4
+; RV32IFD-NEXT:    lui a1, %hi(.LCPI13_1)
+; RV32IFD-NEXT:    fld fa5, %lo(.LCPI13_1)(a1)
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB13_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a1, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB13_4:
-; RV32IFD-NEXT:    feq.d a3, fs0, fs0
-; RV32IFD-NEXT:    seqz a3, a3
-; RV32IFD-NEXT:    addi a3, a3, -1
-; RV32IFD-NEXT:    and a1, a3, a1
-; RV32IFD-NEXT:    neg a2, a2
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    feq.d a1, fs0, fs0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -511,27 +511,27 @@ define i64 @test_roundeven_si64(double %x) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi at plt
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    bnez s0, .LBB17_2
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB17_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB17_2:
-; RV32IFD-NEXT:    lui a2, %hi(.LCPI17_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI17_1)(a2)
-; RV32IFD-NEXT:    flt.d a2, fa5, fs0
-; RV32IFD-NEXT:    beqz a2, .LBB17_4
+; RV32IFD-NEXT:    lui a1, %hi(.LCPI17_1)
+; RV32IFD-NEXT:    fld fa5, %lo(.LCPI17_1)(a1)
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB17_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a1, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB17_4:
-; RV32IFD-NEXT:    feq.d a3, fs0, fs0
-; RV32IFD-NEXT:    seqz a3, a3
-; RV32IFD-NEXT:    addi a3, a3, -1
-; RV32IFD-NEXT:    and a1, a3, a1
-; RV32IFD-NEXT:    neg a2, a2
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    feq.d a1, fs0, fs0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index fac48ff0ea826..fc9017ea64edc 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -520,27 +520,27 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32IF-NEXT:    fmv.w.x fa5, a0
 ; RV32IF-NEXT:    fle.s s0, fa5, fa0
 ; RV32IF-NEXT:    call __fixsfdi at plt
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    bnez s0, .LBB12_2
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    beqz s0, .LBB12_2
 ; RV32IF-NEXT:  # %bb.1: # %start
-; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB12_2: # %start
-; RV32IF-NEXT:    lui a2, %hi(.LCPI12_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI12_0)(a2)
-; RV32IF-NEXT:    flt.s a2, fa5, fs0
-; RV32IF-NEXT:    beqz a2, .LBB12_4
+; RV32IF-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI12_0)(a1)
+; RV32IF-NEXT:    flt.s a3, fa5, fs0
+; RV32IF-NEXT:    beqz a3, .LBB12_4
 ; RV32IF-NEXT:  # %bb.3:
-; RV32IF-NEXT:    addi a1, a3, -1
+; RV32IF-NEXT:    addi a2, a4, -1
 ; RV32IF-NEXT:  .LBB12_4: # %start
-; RV32IF-NEXT:    feq.s a3, fs0, fs0
-; RV32IF-NEXT:    seqz a3, a3
-; RV32IF-NEXT:    addi a3, a3, -1
-; RV32IF-NEXT:    and a1, a3, a1
-; RV32IF-NEXT:    neg a2, a2
-; RV32IF-NEXT:    neg a4, s0
-; RV32IF-NEXT:    and a0, a4, a0
-; RV32IF-NEXT:    or a0, a2, a0
+; RV32IF-NEXT:    feq.s a1, fs0, fs0
+; RV32IF-NEXT:    neg a4, a1
+; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, a3
+; RV32IF-NEXT:    neg a3, s0
 ; RV32IF-NEXT:    and a0, a3, a0
+; RV32IF-NEXT:    or a0, a2, a0
+; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -565,59 +565,49 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lui a1, 913408
 ; RV32I-NEXT:    call __gesf2 at plt
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixsfdi at plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    lui s4, 524288
-; RV32I-NEXT:    bgez s2, .LBB12_2
+; RV32I-NEXT:    lui s5, 524288
+; RV32I-NEXT:    bgez s1, .LBB12_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    lui s3, 524288
 ; RV32I-NEXT:  .LBB12_2: # %start
-; RV32I-NEXT:    lui s2, 389120
-; RV32I-NEXT:    addi s2, s2, -1
+; RV32I-NEXT:    lui a1, 389120
+; RV32I-NEXT:    addi a1, a1, -1
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __gtsf2 at plt
+; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    blez a0, .LBB12_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    addi s3, s4, -1
+; RV32I-NEXT:    addi s3, s5, -1
 ; RV32I-NEXT:  .LBB12_4: # %start
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __unordsf2 at plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and s3, a0, s3
-; RV32I-NEXT:    lui a1, 913408
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    call __gesf2 at plt
-; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and s1, a0, s1
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    call __gtsf2 at plt
-; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    or s1, a0, s1
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    call __unordsf2 at plt
-; RV32I-NEXT:    snez a0, a0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and a0, a0, s1
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    and a1, a0, s3
+; RV32I-NEXT:    slti a2, s1, 0
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a2, a2, s2
+; RV32I-NEXT:    sgtz a3, s4
+; RV32I-NEXT:    neg a3, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -756,27 +746,15 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32I-NEXT:    call __fixunssfdi at plt
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    and s3, s2, a0
-; RV32I-NEXT:    lui s2, 391168
-; RV32I-NEXT:    addi s2, s2, -1
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    call __gtsf2 at plt
-; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    or s3, a0, s3
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    call __gesf2 at plt
-; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and s1, a0, s1
+; RV32I-NEXT:    lui a1, 391168
+; RV32I-NEXT:    addi a1, a1, -1
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __gtsf2 at plt
 ; RV32I-NEXT:    sgtz a0, a0
 ; RV32I-NEXT:    neg a1, a0
-; RV32I-NEXT:    or a1, a1, s1
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    or a0, a1, s3
+; RV32I-NEXT:    and a2, s2, s1
+; RV32I-NEXT:    or a1, a1, a2
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -1146,32 +1124,30 @@ define signext i16 @fcvt_w_s_i16(float %a) nounwind {
 define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV32IF-LABEL: fcvt_w_s_sat_i16:
 ; RV32IF:       # %bb.0: # %start
-; RV32IF-NEXT:    lui a0, %hi(.LCPI24_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI24_0)(a0)
-; RV32IF-NEXT:    lui a0, 815104
-; RV32IF-NEXT:    fmv.w.x fa4, a0
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    neg a0, a0
+; RV32IF-NEXT:    lui a1, %hi(.LCPI24_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI24_0)(a1)
+; RV32IF-NEXT:    lui a1, 815104
+; RV32IF-NEXT:    fmv.w.x fa4, a1
 ; RV32IF-NEXT:    fmax.s fa4, fa0, fa4
 ; RV32IF-NEXT:    fmin.s fa5, fa4, fa5
-; RV32IF-NEXT:    fcvt.w.s a0, fa5, rtz
-; RV32IF-NEXT:    feq.s a1, fa0, fa0
-; RV32IF-NEXT:    seqz a1, a1
-; RV32IF-NEXT:    addi a1, a1, -1
-; RV32IF-NEXT:    and a0, a1, a0
+; RV32IF-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IF-NEXT:    and a0, a0, a1
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: fcvt_w_s_sat_i16:
 ; RV64IF:       # %bb.0: # %start
-; RV64IF-NEXT:    lui a0, %hi(.LCPI24_0)
-; RV64IF-NEXT:    flw fa5, %lo(.LCPI24_0)(a0)
-; RV64IF-NEXT:    lui a0, 815104
-; RV64IF-NEXT:    fmv.w.x fa4, a0
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    lui a1, %hi(.LCPI24_0)
+; RV64IF-NEXT:    flw fa5, %lo(.LCPI24_0)(a1)
+; RV64IF-NEXT:    lui a1, 815104
+; RV64IF-NEXT:    fmv.w.x fa4, a1
 ; RV64IF-NEXT:    fmax.s fa4, fa0, fa4
+; RV64IF-NEXT:    neg a0, a0
 ; RV64IF-NEXT:    fmin.s fa5, fa4, fa5
-; RV64IF-NEXT:    fcvt.l.s a0, fa5, rtz
-; RV64IF-NEXT:    feq.s a1, fa0, fa0
-; RV64IF-NEXT:    seqz a1, a1
-; RV64IF-NEXT:    addi a1, a1, -1
-; RV64IF-NEXT:    and a0, a1, a0
+; RV64IF-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IF-NEXT:    and a0, a0, a1
 ; RV64IF-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -1430,32 +1406,30 @@ define signext i8 @fcvt_w_s_i8(float %a) nounwind {
 define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
 ; RV32IF-LABEL: fcvt_w_s_sat_i8:
 ; RV32IF:       # %bb.0: # %start
-; RV32IF-NEXT:    lui a0, 798720
-; RV32IF-NEXT:    fmv.w.x fa5, a0
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    neg a0, a0
+; RV32IF-NEXT:    lui a1, 798720
+; RV32IF-NEXT:    fmv.w.x fa5, a1
 ; RV32IF-NEXT:    fmax.s fa5, fa0, fa5
-; RV32IF-NEXT:    lui a0, 274400
-; RV32IF-NEXT:    fmv.w.x fa4, a0
+; RV32IF-NEXT:    lui a1, 274400
+; RV32IF-NEXT:    fmv.w.x fa4, a1
 ; RV32IF-NEXT:    fmin.s fa5, fa5, fa4
-; RV32IF-NEXT:    fcvt.w.s a0, fa5, rtz
-; RV32IF-NEXT:    feq.s a1, fa0, fa0
-; RV32IF-NEXT:    seqz a1, a1
-; RV32IF-NEXT:    addi a1, a1, -1
-; RV32IF-NEXT:    and a0, a1, a0
+; RV32IF-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IF-NEXT:    and a0, a0, a1
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: fcvt_w_s_sat_i8:
 ; RV64IF:       # %bb.0: # %start
-; RV64IF-NEXT:    lui a0, 798720
-; RV64IF-NEXT:    fmv.w.x fa5, a0
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    neg a0, a0
+; RV64IF-NEXT:    lui a1, 798720
+; RV64IF-NEXT:    fmv.w.x fa5, a1
 ; RV64IF-NEXT:    fmax.s fa5, fa0, fa5
-; RV64IF-NEXT:    lui a0, 274400
-; RV64IF-NEXT:    fmv.w.x fa4, a0
+; RV64IF-NEXT:    lui a1, 274400
+; RV64IF-NEXT:    fmv.w.x fa4, a1
 ; RV64IF-NEXT:    fmin.s fa5, fa5, fa4
-; RV64IF-NEXT:    fcvt.l.s a0, fa5, rtz
-; RV64IF-NEXT:    feq.s a1, fa0, fa0
-; RV64IF-NEXT:    seqz a1, a1
-; RV64IF-NEXT:    addi a1, a1, -1
-; RV64IF-NEXT:    and a0, a1, a0
+; RV64IF-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IF-NEXT:    and a0, a0, a1
 ; RV64IF-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i8:

diff  --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
index a87fd92d5780e..4e85ab16c047b 100644
--- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -41,27 +41,27 @@ define i64 @test_floor_si64(float %x) nounwind {
 ; RV32IF-NEXT:    fle.s s0, fa5, fs0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixsfdi at plt
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    bnez s0, .LBB1_4
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    beqz s0, .LBB1_4
 ; RV32IF-NEXT:  # %bb.3:
-; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB1_4:
-; RV32IF-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI1_0)(a2)
-; RV32IF-NEXT:    flt.s a2, fa5, fs0
-; RV32IF-NEXT:    beqz a2, .LBB1_6
+; RV32IF-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI1_0)(a1)
+; RV32IF-NEXT:    flt.s a3, fa5, fs0
+; RV32IF-NEXT:    beqz a3, .LBB1_6
 ; RV32IF-NEXT:  # %bb.5:
-; RV32IF-NEXT:    addi a1, a3, -1
+; RV32IF-NEXT:    addi a2, a4, -1
 ; RV32IF-NEXT:  .LBB1_6:
-; RV32IF-NEXT:    feq.s a3, fs0, fs0
-; RV32IF-NEXT:    seqz a3, a3
-; RV32IF-NEXT:    addi a3, a3, -1
-; RV32IF-NEXT:    and a1, a3, a1
-; RV32IF-NEXT:    neg a4, s0
-; RV32IF-NEXT:    and a0, a4, a0
-; RV32IF-NEXT:    neg a2, a2
+; RV32IF-NEXT:    feq.s a1, fs0, fs0
+; RV32IF-NEXT:    neg a4, a1
+; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, s0
+; RV32IF-NEXT:    and a0, a2, a0
+; RV32IF-NEXT:    neg a2, a3
 ; RV32IF-NEXT:    or a0, a2, a0
-; RV32IF-NEXT:    and a0, a3, a0
+; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -182,27 +182,27 @@ define i64 @test_ceil_si64(float %x) nounwind {
 ; RV32IF-NEXT:    fle.s s0, fa5, fs0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixsfdi at plt
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    bnez s0, .LBB5_4
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    beqz s0, .LBB5_4
 ; RV32IF-NEXT:  # %bb.3:
-; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB5_4:
-; RV32IF-NEXT:    lui a2, %hi(.LCPI5_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI5_0)(a2)
-; RV32IF-NEXT:    flt.s a2, fa5, fs0
-; RV32IF-NEXT:    beqz a2, .LBB5_6
+; RV32IF-NEXT:    lui a1, %hi(.LCPI5_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI5_0)(a1)
+; RV32IF-NEXT:    flt.s a3, fa5, fs0
+; RV32IF-NEXT:    beqz a3, .LBB5_6
 ; RV32IF-NEXT:  # %bb.5:
-; RV32IF-NEXT:    addi a1, a3, -1
+; RV32IF-NEXT:    addi a2, a4, -1
 ; RV32IF-NEXT:  .LBB5_6:
-; RV32IF-NEXT:    feq.s a3, fs0, fs0
-; RV32IF-NEXT:    seqz a3, a3
-; RV32IF-NEXT:    addi a3, a3, -1
-; RV32IF-NEXT:    and a1, a3, a1
-; RV32IF-NEXT:    neg a4, s0
-; RV32IF-NEXT:    and a0, a4, a0
-; RV32IF-NEXT:    neg a2, a2
+; RV32IF-NEXT:    feq.s a1, fs0, fs0
+; RV32IF-NEXT:    neg a4, a1
+; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, s0
+; RV32IF-NEXT:    and a0, a2, a0
+; RV32IF-NEXT:    neg a2, a3
 ; RV32IF-NEXT:    or a0, a2, a0
-; RV32IF-NEXT:    and a0, a3, a0
+; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -323,27 +323,27 @@ define i64 @test_trunc_si64(float %x) nounwind {
 ; RV32IF-NEXT:    fle.s s0, fa5, fs0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixsfdi at plt
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    bnez s0, .LBB9_4
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    beqz s0, .LBB9_4
 ; RV32IF-NEXT:  # %bb.3:
-; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB9_4:
-; RV32IF-NEXT:    lui a2, %hi(.LCPI9_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI9_0)(a2)
-; RV32IF-NEXT:    flt.s a2, fa5, fs0
-; RV32IF-NEXT:    beqz a2, .LBB9_6
+; RV32IF-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI9_0)(a1)
+; RV32IF-NEXT:    flt.s a3, fa5, fs0
+; RV32IF-NEXT:    beqz a3, .LBB9_6
 ; RV32IF-NEXT:  # %bb.5:
-; RV32IF-NEXT:    addi a1, a3, -1
+; RV32IF-NEXT:    addi a2, a4, -1
 ; RV32IF-NEXT:  .LBB9_6:
-; RV32IF-NEXT:    feq.s a3, fs0, fs0
-; RV32IF-NEXT:    seqz a3, a3
-; RV32IF-NEXT:    addi a3, a3, -1
-; RV32IF-NEXT:    and a1, a3, a1
-; RV32IF-NEXT:    neg a4, s0
-; RV32IF-NEXT:    and a0, a4, a0
-; RV32IF-NEXT:    neg a2, a2
+; RV32IF-NEXT:    feq.s a1, fs0, fs0
+; RV32IF-NEXT:    neg a4, a1
+; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, s0
+; RV32IF-NEXT:    and a0, a2, a0
+; RV32IF-NEXT:    neg a2, a3
 ; RV32IF-NEXT:    or a0, a2, a0
-; RV32IF-NEXT:    and a0, a3, a0
+; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -464,27 +464,27 @@ define i64 @test_round_si64(float %x) nounwind {
 ; RV32IF-NEXT:    fle.s s0, fa5, fs0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixsfdi at plt
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    bnez s0, .LBB13_4
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    beqz s0, .LBB13_4
 ; RV32IF-NEXT:  # %bb.3:
-; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB13_4:
-; RV32IF-NEXT:    lui a2, %hi(.LCPI13_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI13_0)(a2)
-; RV32IF-NEXT:    flt.s a2, fa5, fs0
-; RV32IF-NEXT:    beqz a2, .LBB13_6
+; RV32IF-NEXT:    lui a1, %hi(.LCPI13_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI13_0)(a1)
+; RV32IF-NEXT:    flt.s a3, fa5, fs0
+; RV32IF-NEXT:    beqz a3, .LBB13_6
 ; RV32IF-NEXT:  # %bb.5:
-; RV32IF-NEXT:    addi a1, a3, -1
+; RV32IF-NEXT:    addi a2, a4, -1
 ; RV32IF-NEXT:  .LBB13_6:
-; RV32IF-NEXT:    feq.s a3, fs0, fs0
-; RV32IF-NEXT:    seqz a3, a3
-; RV32IF-NEXT:    addi a3, a3, -1
-; RV32IF-NEXT:    and a1, a3, a1
-; RV32IF-NEXT:    neg a4, s0
-; RV32IF-NEXT:    and a0, a4, a0
-; RV32IF-NEXT:    neg a2, a2
+; RV32IF-NEXT:    feq.s a1, fs0, fs0
+; RV32IF-NEXT:    neg a4, a1
+; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, s0
+; RV32IF-NEXT:    and a0, a2, a0
+; RV32IF-NEXT:    neg a2, a3
 ; RV32IF-NEXT:    or a0, a2, a0
-; RV32IF-NEXT:    and a0, a3, a0
+; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -605,27 +605,27 @@ define i64 @test_roundeven_si64(float %x) nounwind {
 ; RV32IF-NEXT:    fle.s s0, fa5, fs0
 ; RV32IF-NEXT:    fmv.s fa0, fs0
 ; RV32IF-NEXT:    call __fixsfdi at plt
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    bnez s0, .LBB17_4
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    beqz s0, .LBB17_4
 ; RV32IF-NEXT:  # %bb.3:
-; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB17_4:
-; RV32IF-NEXT:    lui a2, %hi(.LCPI17_0)
-; RV32IF-NEXT:    flw fa5, %lo(.LCPI17_0)(a2)
-; RV32IF-NEXT:    flt.s a2, fa5, fs0
-; RV32IF-NEXT:    beqz a2, .LBB17_6
+; RV32IF-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV32IF-NEXT:    flw fa5, %lo(.LCPI17_0)(a1)
+; RV32IF-NEXT:    flt.s a3, fa5, fs0
+; RV32IF-NEXT:    beqz a3, .LBB17_6
 ; RV32IF-NEXT:  # %bb.5:
-; RV32IF-NEXT:    addi a1, a3, -1
+; RV32IF-NEXT:    addi a2, a4, -1
 ; RV32IF-NEXT:  .LBB17_6:
-; RV32IF-NEXT:    feq.s a3, fs0, fs0
-; RV32IF-NEXT:    seqz a3, a3
-; RV32IF-NEXT:    addi a3, a3, -1
-; RV32IF-NEXT:    and a1, a3, a1
-; RV32IF-NEXT:    neg a4, s0
-; RV32IF-NEXT:    and a0, a4, a0
-; RV32IF-NEXT:    neg a2, a2
+; RV32IF-NEXT:    feq.s a1, fs0, fs0
+; RV32IF-NEXT:    neg a4, a1
+; RV32IF-NEXT:    and a1, a4, a2
+; RV32IF-NEXT:    neg a2, s0
+; RV32IF-NEXT:    and a0, a2, a0
+; RV32IF-NEXT:    neg a2, a3
 ; RV32IF-NEXT:    or a0, a2, a0
-; RV32IF-NEXT:    and a0, a3, a0
+; RV32IF-NEXT:    and a0, a4, a0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload

diff  --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 5b2ffe503a66a..62beaadef003c 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -84,65 +84,61 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32IZFH-LABEL: fcvt_si_h_sat:
 ; RV32IZFH:       # %bb.0: # %start
 ; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV32IZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
-; RV32IZFH-NEXT:    lui a0, 815104
-; RV32IZFH-NEXT:    fmv.w.x fa3, a0
-; RV32IZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV32IZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV32IZFH-NEXT:    fcvt.w.s a0, fa4, rtz
-; RV32IZFH-NEXT:    feq.s a1, fa5, fa5
-; RV32IZFH-NEXT:    seqz a1, a1
-; RV32IZFH-NEXT:    addi a1, a1, -1
-; RV32IZFH-NEXT:    and a0, a1, a0
+; RV32IZFH-NEXT:    feq.s a0, fa5, fa5
+; RV32IZFH-NEXT:    neg a0, a0
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV32IZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; RV32IZFH-NEXT:    lui a1, 815104
+; RV32IZFH-NEXT:    fmv.w.x fa3, a1
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IZFH-NEXT:    and a0, a0, a1
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: fcvt_si_h_sat:
 ; RV64IZFH:       # %bb.0: # %start
 ; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV64IZFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV64IZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
-; RV64IZFH-NEXT:    lui a0, 815104
-; RV64IZFH-NEXT:    fmv.w.x fa3, a0
-; RV64IZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV64IZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV64IZFH-NEXT:    fcvt.l.s a0, fa4, rtz
-; RV64IZFH-NEXT:    feq.s a1, fa5, fa5
-; RV64IZFH-NEXT:    seqz a1, a1
-; RV64IZFH-NEXT:    addi a1, a1, -1
-; RV64IZFH-NEXT:    and a0, a1, a0
+; RV64IZFH-NEXT:    feq.s a0, fa5, fa5
+; RV64IZFH-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV64IZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; RV64IZFH-NEXT:    lui a1, 815104
+; RV64IZFH-NEXT:    fmv.w.x fa3, a1
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV64IZFH-NEXT:    neg a0, a0
+; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IZFH-NEXT:    and a0, a0, a1
 ; RV64IZFH-NEXT:    ret
 ;
 ; RV32IDZFH-LABEL: fcvt_si_h_sat:
 ; RV32IDZFH:       # %bb.0: # %start
 ; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV32IDZFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV32IDZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
-; RV32IDZFH-NEXT:    lui a0, 815104
-; RV32IDZFH-NEXT:    fmv.w.x fa3, a0
-; RV32IDZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV32IDZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV32IDZFH-NEXT:    fcvt.w.s a0, fa4, rtz
-; RV32IDZFH-NEXT:    feq.s a1, fa5, fa5
-; RV32IDZFH-NEXT:    seqz a1, a1
-; RV32IDZFH-NEXT:    addi a1, a1, -1
-; RV32IDZFH-NEXT:    and a0, a1, a0
+; RV32IDZFH-NEXT:    feq.s a0, fa5, fa5
+; RV32IDZFH-NEXT:    neg a0, a0
+; RV32IDZFH-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV32IDZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; RV32IDZFH-NEXT:    lui a1, 815104
+; RV32IDZFH-NEXT:    fmv.w.x fa3, a1
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IDZFH-NEXT:    and a0, a0, a1
 ; RV32IDZFH-NEXT:    ret
 ;
 ; RV64IDZFH-LABEL: fcvt_si_h_sat:
 ; RV64IDZFH:       # %bb.0: # %start
 ; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV64IDZFH-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV64IDZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
-; RV64IDZFH-NEXT:    lui a0, 815104
-; RV64IDZFH-NEXT:    fmv.w.x fa3, a0
-; RV64IDZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV64IDZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV64IDZFH-NEXT:    fcvt.l.s a0, fa4, rtz
-; RV64IDZFH-NEXT:    feq.s a1, fa5, fa5
-; RV64IDZFH-NEXT:    seqz a1, a1
-; RV64IDZFH-NEXT:    addi a1, a1, -1
-; RV64IDZFH-NEXT:    and a0, a1, a0
+; RV64IDZFH-NEXT:    feq.s a0, fa5, fa5
+; RV64IDZFH-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV64IDZFH-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; RV64IDZFH-NEXT:    lui a1, 815104
+; RV64IDZFH-NEXT:    fmv.w.x fa3, a1
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV64IDZFH-NEXT:    neg a0, a0
+; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IDZFH-NEXT:    and a0, a0, a1
 ; RV64IDZFH-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_si_h_sat:
@@ -234,33 +230,31 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; CHECK32-IZFHMIN-LABEL: fcvt_si_h_sat:
 ; CHECK32-IZFHMIN:       # %bb.0: # %start
 ; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK32-IZFHMIN-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
-; CHECK32-IZFHMIN-NEXT:    lui a0, 815104
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa3, fa5, fa3
-; CHECK32-IZFHMIN-NEXT:    fmin.s fa4, fa3, fa4
-; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a0, fa4, rtz
-; CHECK32-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK32-IZFHMIN-NEXT:    addi a1, a1, -1
-; CHECK32-IZFHMIN-NEXT:    and a0, a1, a0
+; CHECK32-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK32-IZFHMIN-NEXT:    neg a0, a0
+; CHECK32-IZFHMIN-NEXT:    lui a1, %hi(.LCPI1_0)
+; CHECK32-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; CHECK32-IZFHMIN-NEXT:    lui a1, 815104
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a1
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
+; CHECK32-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK32-IZFHMIN-NEXT:    ret
 ;
 ; CHECK64-IZFHMIN-LABEL: fcvt_si_h_sat:
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
 ; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK64-IZFHMIN-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK64-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
-; CHECK64-IZFHMIN-NEXT:    lui a0, 815104
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa3, fa5, fa3
-; CHECK64-IZFHMIN-NEXT:    fmin.s fa4, fa3, fa4
-; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a0, fa4, rtz
-; CHECK64-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -1
-; CHECK64-IZFHMIN-NEXT:    and a0, a1, a0
+; CHECK64-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK64-IZFHMIN-NEXT:    lui a1, %hi(.LCPI1_0)
+; CHECK64-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; CHECK64-IZFHMIN-NEXT:    lui a1, 815104
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a1
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK64-IZFHMIN-NEXT:    neg a0, a0
+; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a1, fa5, rtz
+; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK64-IZFHMIN-NEXT:    ret
 start:
   %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a)
@@ -1024,27 +1018,27 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixsfdi at plt
-; RV32IZFH-NEXT:    lui a3, 524288
-; RV32IZFH-NEXT:    bnez s0, .LBB10_2
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    lui a2, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB10_2
 ; RV32IZFH-NEXT:  # %bb.1: # %start
-; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB10_2: # %start
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI10_0)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI10_0)(a2)
-; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFH-NEXT:    beqz a2, .LBB10_4
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFH-NEXT:    beqz a3, .LBB10_4
 ; RV32IZFH-NEXT:  # %bb.3:
-; RV32IZFH-NEXT:    addi a1, a3, -1
+; RV32IZFH-NEXT:    addi a2, a4, -1
 ; RV32IZFH-NEXT:  .LBB10_4: # %start
-; RV32IZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFH-NEXT:    seqz a3, a3
-; RV32IZFH-NEXT:    addi a3, a3, -1
-; RV32IZFH-NEXT:    and a1, a3, a1
-; RV32IZFH-NEXT:    neg a2, a2
-; RV32IZFH-NEXT:    neg a4, s0
-; RV32IZFH-NEXT:    and a0, a4, a0
-; RV32IZFH-NEXT:    or a0, a2, a0
+; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFH-NEXT:    neg a4, a1
+; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, a3
+; RV32IZFH-NEXT:    neg a3, s0
 ; RV32IZFH-NEXT:    and a0, a3, a0
+; RV32IZFH-NEXT:    or a0, a2, a0
+; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -1072,27 +1066,27 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IDZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IDZFH-NEXT:    call __fixsfdi at plt
-; RV32IDZFH-NEXT:    lui a3, 524288
-; RV32IDZFH-NEXT:    bnez s0, .LBB10_2
+; RV32IDZFH-NEXT:    lui a4, 524288
+; RV32IDZFH-NEXT:    lui a2, 524288
+; RV32IDZFH-NEXT:    beqz s0, .LBB10_2
 ; RV32IDZFH-NEXT:  # %bb.1: # %start
-; RV32IDZFH-NEXT:    lui a1, 524288
+; RV32IDZFH-NEXT:    mv a2, a1
 ; RV32IDZFH-NEXT:  .LBB10_2: # %start
-; RV32IDZFH-NEXT:    lui a2, %hi(.LCPI10_0)
-; RV32IDZFH-NEXT:    flw fa5, %lo(.LCPI10_0)(a2)
-; RV32IDZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IDZFH-NEXT:    beqz a2, .LBB10_4
+; RV32IDZFH-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV32IDZFH-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IDZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IDZFH-NEXT:    beqz a3, .LBB10_4
 ; RV32IDZFH-NEXT:  # %bb.3:
-; RV32IDZFH-NEXT:    addi a1, a3, -1
+; RV32IDZFH-NEXT:    addi a2, a4, -1
 ; RV32IDZFH-NEXT:  .LBB10_4: # %start
-; RV32IDZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IDZFH-NEXT:    seqz a3, a3
-; RV32IDZFH-NEXT:    addi a3, a3, -1
-; RV32IDZFH-NEXT:    and a1, a3, a1
-; RV32IDZFH-NEXT:    neg a2, a2
-; RV32IDZFH-NEXT:    neg a4, s0
-; RV32IDZFH-NEXT:    and a0, a4, a0
-; RV32IDZFH-NEXT:    or a0, a2, a0
+; RV32IDZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IDZFH-NEXT:    neg a4, a1
+; RV32IDZFH-NEXT:    and a1, a4, a2
+; RV32IDZFH-NEXT:    neg a2, a3
+; RV32IDZFH-NEXT:    neg a3, s0
 ; RV32IDZFH-NEXT:    and a0, a3, a0
+; RV32IDZFH-NEXT:    or a0, a2, a0
+; RV32IDZFH-NEXT:    and a0, a4, a0
 ; RV32IDZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IDZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IDZFH-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -1117,62 +1111,52 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    slli a0, a0, 16
 ; RV32I-NEXT:    srli a0, a0, 16
 ; RV32I-NEXT:    call __extendhfsf2 at plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a1, 913408
 ; RV32I-NEXT:    call __gesf2 at plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __fixsfdi at plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    lui s4, 524288
-; RV32I-NEXT:    bgez s3, .LBB10_2
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    lui s5, 524288
+; RV32I-NEXT:    bgez s0, .LBB10_2
 ; RV32I-NEXT:  # %bb.1: # %start
-; RV32I-NEXT:    lui s2, 524288
+; RV32I-NEXT:    lui s3, 524288
 ; RV32I-NEXT:  .LBB10_2: # %start
-; RV32I-NEXT:    lui s3, 389120
-; RV32I-NEXT:    addi s3, s3, -1
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    lui a1, 389120
+; RV32I-NEXT:    addi a1, a1, -1
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __gtsf2 at plt
+; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    blez a0, .LBB10_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    addi s2, s4, -1
+; RV32I-NEXT:    addi s3, s5, -1
 ; RV32I-NEXT:  .LBB10_4: # %start
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    call __unordsf2 at plt
-; RV32I-NEXT:    snez a0, a0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and s2, a0, s2
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s3
-; RV32I-NEXT:    call __gtsf2 at plt
-; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg s3, a0
-; RV32I-NEXT:    lui a1, 913408
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    call __gesf2 at plt
-; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and a0, a0, s1
-; RV32I-NEXT:    or s1, s3, a0
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __unordsf2 at plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and a0, a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    and a1, a0, s3
+; RV32I-NEXT:    sgtz a2, s4
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    slti a3, s0, 0
+; RV32I-NEXT:    addi a3, a3, -1
+; RV32I-NEXT:    and a3, a3, s2
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1233,27 +1217,27 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IFZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IFZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IFZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IFZFHMIN-NEXT:    lui a3, 524288
-; RV32IFZFHMIN-NEXT:    bnez s0, .LBB10_2
+; RV32IFZFHMIN-NEXT:    lui a4, 524288
+; RV32IFZFHMIN-NEXT:    lui a2, 524288
+; RV32IFZFHMIN-NEXT:    beqz s0, .LBB10_2
 ; RV32IFZFHMIN-NEXT:  # %bb.1: # %start
-; RV32IFZFHMIN-NEXT:    lui a1, 524288
+; RV32IFZFHMIN-NEXT:    mv a2, a1
 ; RV32IFZFHMIN-NEXT:  .LBB10_2: # %start
-; RV32IFZFHMIN-NEXT:    lui a2, %hi(.LCPI10_0)
-; RV32IFZFHMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a2)
-; RV32IFZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IFZFHMIN-NEXT:    beqz a2, .LBB10_4
+; RV32IFZFHMIN-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV32IFZFHMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IFZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IFZFHMIN-NEXT:    beqz a3, .LBB10_4
 ; RV32IFZFHMIN-NEXT:  # %bb.3:
-; RV32IFZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IFZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IFZFHMIN-NEXT:  .LBB10_4: # %start
-; RV32IFZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IFZFHMIN-NEXT:    seqz a3, a3
-; RV32IFZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IFZFHMIN-NEXT:    and a1, a3, a1
-; RV32IFZFHMIN-NEXT:    neg a2, a2
-; RV32IFZFHMIN-NEXT:    neg a4, s0
-; RV32IFZFHMIN-NEXT:    and a0, a4, a0
-; RV32IFZFHMIN-NEXT:    or a0, a2, a0
+; RV32IFZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IFZFHMIN-NEXT:    neg a4, a1
+; RV32IFZFHMIN-NEXT:    and a1, a4, a2
+; RV32IFZFHMIN-NEXT:    neg a2, a3
+; RV32IFZFHMIN-NEXT:    neg a3, s0
 ; RV32IFZFHMIN-NEXT:    and a0, a3, a0
+; RV32IFZFHMIN-NEXT:    or a0, a2, a0
+; RV32IFZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IFZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -1282,27 +1266,27 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IDZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IDZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IDZFHMIN-NEXT:    lui a3, 524288
-; RV32IDZFHMIN-NEXT:    bnez s0, .LBB10_2
+; RV32IDZFHMIN-NEXT:    lui a4, 524288
+; RV32IDZFHMIN-NEXT:    lui a2, 524288
+; RV32IDZFHMIN-NEXT:    beqz s0, .LBB10_2
 ; RV32IDZFHMIN-NEXT:  # %bb.1: # %start
-; RV32IDZFHMIN-NEXT:    lui a1, 524288
+; RV32IDZFHMIN-NEXT:    mv a2, a1
 ; RV32IDZFHMIN-NEXT:  .LBB10_2: # %start
-; RV32IDZFHMIN-NEXT:    lui a2, %hi(.LCPI10_0)
-; RV32IDZFHMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a2)
-; RV32IDZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IDZFHMIN-NEXT:    beqz a2, .LBB10_4
+; RV32IDZFHMIN-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV32IDZFHMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IDZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IDZFHMIN-NEXT:    beqz a3, .LBB10_4
 ; RV32IDZFHMIN-NEXT:  # %bb.3:
-; RV32IDZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IDZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IDZFHMIN-NEXT:  .LBB10_4: # %start
-; RV32IDZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IDZFHMIN-NEXT:    seqz a3, a3
-; RV32IDZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IDZFHMIN-NEXT:    and a1, a3, a1
-; RV32IDZFHMIN-NEXT:    neg a2, a2
-; RV32IDZFHMIN-NEXT:    neg a4, s0
-; RV32IDZFHMIN-NEXT:    and a0, a4, a0
-; RV32IDZFHMIN-NEXT:    or a0, a2, a0
+; RV32IDZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IDZFHMIN-NEXT:    neg a4, a1
+; RV32IDZFHMIN-NEXT:    and a1, a4, a2
+; RV32IDZFHMIN-NEXT:    neg a2, a3
+; RV32IDZFHMIN-NEXT:    neg a3, s0
 ; RV32IDZFHMIN-NEXT:    and a0, a3, a0
+; RV32IDZFHMIN-NEXT:    or a0, a2, a0
+; RV32IDZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IDZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IDZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IDZFHMIN-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -1456,53 +1440,36 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ;
 ; RV32I-LABEL: fcvt_lu_h_sat:
 ; RV32I:       # %bb.0: # %start
-; RV32I-NEXT:    addi sp, sp, -32
-; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    slli a0, a0, 16
 ; RV32I-NEXT:    srli a0, a0, 16
 ; RV32I-NEXT:    call __extendhfsf2 at plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    lui s1, 391168
-; RV32I-NEXT:    addi s1, s1, -1
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    lui a1, 391168
+; RV32I-NEXT:    addi a1, a1, -1
 ; RV32I-NEXT:    call __gtsf2 at plt
 ; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg s3, a0
+; RV32I-NEXT:    neg s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __gesf2 at plt
 ; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi s4, a0, -1
+; RV32I-NEXT:    addi s2, a0, -1
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixunssfdi at plt
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    and a0, s4, a0
-; RV32I-NEXT:    or s3, s3, a0
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s1
-; RV32I-NEXT:    call __gtsf2 at plt
-; RV32I-NEXT:    sgtz a0, a0
-; RV32I-NEXT:    neg s1, a0
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    call __gesf2 at plt
-; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    and a1, a0, s2
+; RV32I-NEXT:    and a0, s2, a0
+; RV32I-NEXT:    or a0, s1, a0
+; RV32I-NEXT:    and a1, s2, a1
 ; RV32I-NEXT:    or a1, s1, a1
-; RV32I-NEXT:    mv a0, s3
-; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: fcvt_lu_h_sat:
@@ -2772,65 +2739,61 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32IZFH-LABEL: fcvt_w_s_sat_i16:
 ; RV32IZFH:       # %bb.0: # %start
 ; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV32IZFH-NEXT:    lui a0, %hi(.LCPI32_0)
-; RV32IZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a0)
-; RV32IZFH-NEXT:    lui a0, 815104
-; RV32IZFH-NEXT:    fmv.w.x fa3, a0
-; RV32IZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV32IZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV32IZFH-NEXT:    fcvt.w.s a0, fa4, rtz
-; RV32IZFH-NEXT:    feq.s a1, fa5, fa5
-; RV32IZFH-NEXT:    seqz a1, a1
-; RV32IZFH-NEXT:    addi a1, a1, -1
-; RV32IZFH-NEXT:    and a0, a1, a0
+; RV32IZFH-NEXT:    feq.s a0, fa5, fa5
+; RV32IZFH-NEXT:    neg a0, a0
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI32_0)
+; RV32IZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
+; RV32IZFH-NEXT:    lui a1, 815104
+; RV32IZFH-NEXT:    fmv.w.x fa3, a1
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IZFH-NEXT:    and a0, a0, a1
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: fcvt_w_s_sat_i16:
 ; RV64IZFH:       # %bb.0: # %start
 ; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV64IZFH-NEXT:    lui a0, %hi(.LCPI32_0)
-; RV64IZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a0)
-; RV64IZFH-NEXT:    lui a0, 815104
-; RV64IZFH-NEXT:    fmv.w.x fa3, a0
-; RV64IZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV64IZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV64IZFH-NEXT:    fcvt.l.s a0, fa4, rtz
-; RV64IZFH-NEXT:    feq.s a1, fa5, fa5
-; RV64IZFH-NEXT:    seqz a1, a1
-; RV64IZFH-NEXT:    addi a1, a1, -1
-; RV64IZFH-NEXT:    and a0, a1, a0
+; RV64IZFH-NEXT:    feq.s a0, fa5, fa5
+; RV64IZFH-NEXT:    lui a1, %hi(.LCPI32_0)
+; RV64IZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
+; RV64IZFH-NEXT:    lui a1, 815104
+; RV64IZFH-NEXT:    fmv.w.x fa3, a1
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV64IZFH-NEXT:    neg a0, a0
+; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IZFH-NEXT:    and a0, a0, a1
 ; RV64IZFH-NEXT:    ret
 ;
 ; RV32IDZFH-LABEL: fcvt_w_s_sat_i16:
 ; RV32IDZFH:       # %bb.0: # %start
 ; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV32IDZFH-NEXT:    lui a0, %hi(.LCPI32_0)
-; RV32IDZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a0)
-; RV32IDZFH-NEXT:    lui a0, 815104
-; RV32IDZFH-NEXT:    fmv.w.x fa3, a0
-; RV32IDZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV32IDZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV32IDZFH-NEXT:    fcvt.w.s a0, fa4, rtz
-; RV32IDZFH-NEXT:    feq.s a1, fa5, fa5
-; RV32IDZFH-NEXT:    seqz a1, a1
-; RV32IDZFH-NEXT:    addi a1, a1, -1
-; RV32IDZFH-NEXT:    and a0, a1, a0
+; RV32IDZFH-NEXT:    feq.s a0, fa5, fa5
+; RV32IDZFH-NEXT:    neg a0, a0
+; RV32IDZFH-NEXT:    lui a1, %hi(.LCPI32_0)
+; RV32IDZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
+; RV32IDZFH-NEXT:    lui a1, 815104
+; RV32IDZFH-NEXT:    fmv.w.x fa3, a1
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IDZFH-NEXT:    and a0, a0, a1
 ; RV32IDZFH-NEXT:    ret
 ;
 ; RV64IDZFH-LABEL: fcvt_w_s_sat_i16:
 ; RV64IDZFH:       # %bb.0: # %start
 ; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV64IDZFH-NEXT:    lui a0, %hi(.LCPI32_0)
-; RV64IDZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a0)
-; RV64IDZFH-NEXT:    lui a0, 815104
-; RV64IDZFH-NEXT:    fmv.w.x fa3, a0
-; RV64IDZFH-NEXT:    fmax.s fa3, fa5, fa3
-; RV64IDZFH-NEXT:    fmin.s fa4, fa3, fa4
-; RV64IDZFH-NEXT:    fcvt.l.s a0, fa4, rtz
-; RV64IDZFH-NEXT:    feq.s a1, fa5, fa5
-; RV64IDZFH-NEXT:    seqz a1, a1
-; RV64IDZFH-NEXT:    addi a1, a1, -1
-; RV64IDZFH-NEXT:    and a0, a1, a0
+; RV64IDZFH-NEXT:    feq.s a0, fa5, fa5
+; RV64IDZFH-NEXT:    lui a1, %hi(.LCPI32_0)
+; RV64IDZFH-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
+; RV64IDZFH-NEXT:    lui a1, 815104
+; RV64IDZFH-NEXT:    fmv.w.x fa3, a1
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa3
+; RV64IDZFH-NEXT:    neg a0, a0
+; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IDZFH-NEXT:    and a0, a0, a1
 ; RV64IDZFH-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -2926,33 +2889,31 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; CHECK32-IZFHMIN-LABEL: fcvt_w_s_sat_i16:
 ; CHECK32-IZFHMIN:       # %bb.0: # %start
 ; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK32-IZFHMIN-NEXT:    lui a0, %hi(.LCPI32_0)
-; CHECK32-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI32_0)(a0)
-; CHECK32-IZFHMIN-NEXT:    lui a0, 815104
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa3, fa5, fa3
-; CHECK32-IZFHMIN-NEXT:    fmin.s fa4, fa3, fa4
-; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a0, fa4, rtz
-; CHECK32-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK32-IZFHMIN-NEXT:    addi a1, a1, -1
-; CHECK32-IZFHMIN-NEXT:    and a0, a1, a0
+; CHECK32-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK32-IZFHMIN-NEXT:    neg a0, a0
+; CHECK32-IZFHMIN-NEXT:    lui a1, %hi(.LCPI32_0)
+; CHECK32-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
+; CHECK32-IZFHMIN-NEXT:    lui a1, 815104
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a1
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
+; CHECK32-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK32-IZFHMIN-NEXT:    ret
 ;
 ; CHECK64-IZFHMIN-LABEL: fcvt_w_s_sat_i16:
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
 ; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK64-IZFHMIN-NEXT:    lui a0, %hi(.LCPI32_0)
-; CHECK64-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI32_0)(a0)
-; CHECK64-IZFHMIN-NEXT:    lui a0, 815104
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa3, fa5, fa3
-; CHECK64-IZFHMIN-NEXT:    fmin.s fa4, fa3, fa4
-; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a0, fa4, rtz
-; CHECK64-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -1
-; CHECK64-IZFHMIN-NEXT:    and a0, a1, a0
+; CHECK64-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK64-IZFHMIN-NEXT:    lui a1, %hi(.LCPI32_0)
+; CHECK64-IZFHMIN-NEXT:    flw fa4, %lo(.LCPI32_0)(a1)
+; CHECK64-IZFHMIN-NEXT:    lui a1, 815104
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a1
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK64-IZFHMIN-NEXT:    neg a0, a0
+; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a1, fa5, rtz
+; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK64-IZFHMIN-NEXT:    ret
 start:
   %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a)
@@ -3236,65 +3197,61 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
 ; RV32IZFH-LABEL: fcvt_w_s_sat_i8:
 ; RV32IZFH:       # %bb.0: # %start
 ; RV32IZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV32IZFH-NEXT:    lui a0, 798720
-; RV32IZFH-NEXT:    fmv.w.x fa4, a0
-; RV32IZFH-NEXT:    fmax.s fa4, fa5, fa4
-; RV32IZFH-NEXT:    lui a0, 274400
-; RV32IZFH-NEXT:    fmv.w.x fa3, a0
-; RV32IZFH-NEXT:    fmin.s fa4, fa4, fa3
-; RV32IZFH-NEXT:    fcvt.w.s a0, fa4, rtz
-; RV32IZFH-NEXT:    feq.s a1, fa5, fa5
-; RV32IZFH-NEXT:    seqz a1, a1
-; RV32IZFH-NEXT:    addi a1, a1, -1
-; RV32IZFH-NEXT:    and a0, a1, a0
+; RV32IZFH-NEXT:    feq.s a0, fa5, fa5
+; RV32IZFH-NEXT:    neg a0, a0
+; RV32IZFH-NEXT:    lui a1, 798720
+; RV32IZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    lui a1, 274400
+; RV32IZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV32IZFH-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IZFH-NEXT:    and a0, a0, a1
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: fcvt_w_s_sat_i8:
 ; RV64IZFH:       # %bb.0: # %start
 ; RV64IZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV64IZFH-NEXT:    lui a0, 798720
-; RV64IZFH-NEXT:    fmv.w.x fa4, a0
-; RV64IZFH-NEXT:    fmax.s fa4, fa5, fa4
-; RV64IZFH-NEXT:    lui a0, 274400
-; RV64IZFH-NEXT:    fmv.w.x fa3, a0
-; RV64IZFH-NEXT:    fmin.s fa4, fa4, fa3
-; RV64IZFH-NEXT:    fcvt.l.s a0, fa4, rtz
-; RV64IZFH-NEXT:    feq.s a1, fa5, fa5
-; RV64IZFH-NEXT:    seqz a1, a1
-; RV64IZFH-NEXT:    addi a1, a1, -1
-; RV64IZFH-NEXT:    and a0, a1, a0
+; RV64IZFH-NEXT:    feq.s a0, fa5, fa5
+; RV64IZFH-NEXT:    neg a0, a0
+; RV64IZFH-NEXT:    lui a1, 798720
+; RV64IZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    lui a1, 274400
+; RV64IZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV64IZFH-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IZFH-NEXT:    and a0, a0, a1
 ; RV64IZFH-NEXT:    ret
 ;
 ; RV32IDZFH-LABEL: fcvt_w_s_sat_i8:
 ; RV32IDZFH:       # %bb.0: # %start
 ; RV32IDZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV32IDZFH-NEXT:    lui a0, 798720
-; RV32IDZFH-NEXT:    fmv.w.x fa4, a0
-; RV32IDZFH-NEXT:    fmax.s fa4, fa5, fa4
-; RV32IDZFH-NEXT:    lui a0, 274400
-; RV32IDZFH-NEXT:    fmv.w.x fa3, a0
-; RV32IDZFH-NEXT:    fmin.s fa4, fa4, fa3
-; RV32IDZFH-NEXT:    fcvt.w.s a0, fa4, rtz
-; RV32IDZFH-NEXT:    feq.s a1, fa5, fa5
-; RV32IDZFH-NEXT:    seqz a1, a1
-; RV32IDZFH-NEXT:    addi a1, a1, -1
-; RV32IDZFH-NEXT:    and a0, a1, a0
+; RV32IDZFH-NEXT:    feq.s a0, fa5, fa5
+; RV32IDZFH-NEXT:    neg a0, a0
+; RV32IDZFH-NEXT:    lui a1, 798720
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    lui a1, 274400
+; RV32IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV32IDZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV32IDZFH-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32IDZFH-NEXT:    and a0, a0, a1
 ; RV32IDZFH-NEXT:    ret
 ;
 ; RV64IDZFH-LABEL: fcvt_w_s_sat_i8:
 ; RV64IDZFH:       # %bb.0: # %start
 ; RV64IDZFH-NEXT:    fcvt.s.h fa5, fa0
-; RV64IDZFH-NEXT:    lui a0, 798720
-; RV64IDZFH-NEXT:    fmv.w.x fa4, a0
-; RV64IDZFH-NEXT:    fmax.s fa4, fa5, fa4
-; RV64IDZFH-NEXT:    lui a0, 274400
-; RV64IDZFH-NEXT:    fmv.w.x fa3, a0
-; RV64IDZFH-NEXT:    fmin.s fa4, fa4, fa3
-; RV64IDZFH-NEXT:    fcvt.l.s a0, fa4, rtz
-; RV64IDZFH-NEXT:    feq.s a1, fa5, fa5
-; RV64IDZFH-NEXT:    seqz a1, a1
-; RV64IDZFH-NEXT:    addi a1, a1, -1
-; RV64IDZFH-NEXT:    and a0, a1, a0
+; RV64IDZFH-NEXT:    feq.s a0, fa5, fa5
+; RV64IDZFH-NEXT:    neg a0, a0
+; RV64IDZFH-NEXT:    lui a1, 798720
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IDZFH-NEXT:    fmax.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    lui a1, 274400
+; RV64IDZFH-NEXT:    fmv.w.x fa4, a1
+; RV64IDZFH-NEXT:    fmin.s fa5, fa5, fa4
+; RV64IDZFH-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64IDZFH-NEXT:    and a0, a0, a1
 ; RV64IDZFH-NEXT:    ret
 ;
 ; RV32I-LABEL: fcvt_w_s_sat_i8:
@@ -3386,33 +3343,31 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
 ; CHECK32-IZFHMIN-LABEL: fcvt_w_s_sat_i8:
 ; CHECK32-IZFHMIN:       # %bb.0: # %start
 ; CHECK32-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK32-IZFHMIN-NEXT:    lui a0, 798720
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a0
-; CHECK32-IZFHMIN-NEXT:    fmax.s fa4, fa5, fa4
-; CHECK32-IZFHMIN-NEXT:    lui a0, 274400
-; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK32-IZFHMIN-NEXT:    fmin.s fa4, fa4, fa3
-; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a0, fa4, rtz
-; CHECK32-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK32-IZFHMIN-NEXT:    addi a1, a1, -1
-; CHECK32-IZFHMIN-NEXT:    and a0, a1, a0
+; CHECK32-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK32-IZFHMIN-NEXT:    neg a0, a0
+; CHECK32-IZFHMIN-NEXT:    lui a1, 798720
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    lui a1, 274400
+; CHECK32-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK32-IZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
+; CHECK32-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK32-IZFHMIN-NEXT:    ret
 ;
 ; CHECK64-IZFHMIN-LABEL: fcvt_w_s_sat_i8:
 ; CHECK64-IZFHMIN:       # %bb.0: # %start
 ; CHECK64-IZFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; CHECK64-IZFHMIN-NEXT:    lui a0, 798720
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a0
-; CHECK64-IZFHMIN-NEXT:    fmax.s fa4, fa5, fa4
-; CHECK64-IZFHMIN-NEXT:    lui a0, 274400
-; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK64-IZFHMIN-NEXT:    fmin.s fa4, fa4, fa3
-; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a0, fa4, rtz
-; CHECK64-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -1
-; CHECK64-IZFHMIN-NEXT:    and a0, a1, a0
+; CHECK64-IZFHMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK64-IZFHMIN-NEXT:    neg a0, a0
+; CHECK64-IZFHMIN-NEXT:    lui a1, 798720
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64-IZFHMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    lui a1, 274400
+; CHECK64-IZFHMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64-IZFHMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK64-IZFHMIN-NEXT:    fcvt.l.s a1, fa5, rtz
+; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK64-IZFHMIN-NEXT:    ret
 start:
   %0 = tail call i8 @llvm.fptosi.sat.i8.f16(half %a)

diff  --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
index 63af17b80a41b..de3930a157128 100644
--- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -67,27 +67,27 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixsfdi at plt
-; RV32IZFH-NEXT:    lui a3, 524288
-; RV32IZFH-NEXT:    bnez s0, .LBB1_4
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    lui a2, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB1_4
 ; RV32IZFH-NEXT:  # %bb.3:
-; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB1_4:
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI1_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI1_1)(a2)
-; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFH-NEXT:    beqz a2, .LBB1_6
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI1_1)
+; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI1_1)(a1)
+; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFH-NEXT:    beqz a3, .LBB1_6
 ; RV32IZFH-NEXT:  # %bb.5:
-; RV32IZFH-NEXT:    addi a1, a3, -1
+; RV32IZFH-NEXT:    addi a2, a4, -1
 ; RV32IZFH-NEXT:  .LBB1_6:
-; RV32IZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFH-NEXT:    seqz a3, a3
-; RV32IZFH-NEXT:    addi a3, a3, -1
-; RV32IZFH-NEXT:    and a1, a3, a1
-; RV32IZFH-NEXT:    neg a4, s0
-; RV32IZFH-NEXT:    and a0, a4, a0
-; RV32IZFH-NEXT:    neg a2, a2
+; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFH-NEXT:    neg a4, a1
+; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, s0
+; RV32IZFH-NEXT:    and a0, a2, a0
+; RV32IZFH-NEXT:    neg a2, a3
 ; RV32IZFH-NEXT:    or a0, a2, a0
-; RV32IZFH-NEXT:    and a0, a3, a0
+; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -127,27 +127,27 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IZFHMIN-NEXT:    lui a3, 524288
-; RV32IZFHMIN-NEXT:    bnez s0, .LBB1_4
+; RV32IZFHMIN-NEXT:    lui a4, 524288
+; RV32IZFHMIN-NEXT:    lui a2, 524288
+; RV32IZFHMIN-NEXT:    beqz s0, .LBB1_4
 ; RV32IZFHMIN-NEXT:  # %bb.3:
-; RV32IZFHMIN-NEXT:    lui a1, 524288
+; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB1_4:
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI1_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI1_0)(a2)
-; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT:    beqz a2, .LBB1_6
+; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI1_0)(a1)
+; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT:    beqz a3, .LBB1_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
-; RV32IZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IZFHMIN-NEXT:  .LBB1_6:
-; RV32IZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFHMIN-NEXT:    seqz a3, a3
-; RV32IZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IZFHMIN-NEXT:    and a1, a3, a1
-; RV32IZFHMIN-NEXT:    neg a4, s0
-; RV32IZFHMIN-NEXT:    and a0, a4, a0
-; RV32IZFHMIN-NEXT:    neg a2, a2
+; RV32IZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFHMIN-NEXT:    neg a4, a1
+; RV32IZFHMIN-NEXT:    and a1, a4, a2
+; RV32IZFHMIN-NEXT:    neg a2, s0
+; RV32IZFHMIN-NEXT:    and a0, a2, a0
+; RV32IZFHMIN-NEXT:    neg a2, a3
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
-; RV32IZFHMIN-NEXT:    and a0, a3, a0
+; RV32IZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -407,27 +407,27 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixsfdi at plt
-; RV32IZFH-NEXT:    lui a3, 524288
-; RV32IZFH-NEXT:    bnez s0, .LBB5_4
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    lui a2, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB5_4
 ; RV32IZFH-NEXT:  # %bb.3:
-; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB5_4:
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI5_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI5_1)(a2)
-; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFH-NEXT:    beqz a2, .LBB5_6
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI5_1)
+; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI5_1)(a1)
+; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFH-NEXT:    beqz a3, .LBB5_6
 ; RV32IZFH-NEXT:  # %bb.5:
-; RV32IZFH-NEXT:    addi a1, a3, -1
+; RV32IZFH-NEXT:    addi a2, a4, -1
 ; RV32IZFH-NEXT:  .LBB5_6:
-; RV32IZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFH-NEXT:    seqz a3, a3
-; RV32IZFH-NEXT:    addi a3, a3, -1
-; RV32IZFH-NEXT:    and a1, a3, a1
-; RV32IZFH-NEXT:    neg a4, s0
-; RV32IZFH-NEXT:    and a0, a4, a0
-; RV32IZFH-NEXT:    neg a2, a2
+; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFH-NEXT:    neg a4, a1
+; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, s0
+; RV32IZFH-NEXT:    and a0, a2, a0
+; RV32IZFH-NEXT:    neg a2, a3
 ; RV32IZFH-NEXT:    or a0, a2, a0
-; RV32IZFH-NEXT:    and a0, a3, a0
+; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -467,27 +467,27 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IZFHMIN-NEXT:    lui a3, 524288
-; RV32IZFHMIN-NEXT:    bnez s0, .LBB5_4
+; RV32IZFHMIN-NEXT:    lui a4, 524288
+; RV32IZFHMIN-NEXT:    lui a2, 524288
+; RV32IZFHMIN-NEXT:    beqz s0, .LBB5_4
 ; RV32IZFHMIN-NEXT:  # %bb.3:
-; RV32IZFHMIN-NEXT:    lui a1, 524288
+; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB5_4:
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI5_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI5_0)(a2)
-; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT:    beqz a2, .LBB5_6
+; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI5_0)
+; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI5_0)(a1)
+; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT:    beqz a3, .LBB5_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
-; RV32IZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IZFHMIN-NEXT:  .LBB5_6:
-; RV32IZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFHMIN-NEXT:    seqz a3, a3
-; RV32IZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IZFHMIN-NEXT:    and a1, a3, a1
-; RV32IZFHMIN-NEXT:    neg a4, s0
-; RV32IZFHMIN-NEXT:    and a0, a4, a0
-; RV32IZFHMIN-NEXT:    neg a2, a2
+; RV32IZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFHMIN-NEXT:    neg a4, a1
+; RV32IZFHMIN-NEXT:    and a1, a4, a2
+; RV32IZFHMIN-NEXT:    neg a2, s0
+; RV32IZFHMIN-NEXT:    and a0, a2, a0
+; RV32IZFHMIN-NEXT:    neg a2, a3
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
-; RV32IZFHMIN-NEXT:    and a0, a3, a0
+; RV32IZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -747,27 +747,27 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixsfdi at plt
-; RV32IZFH-NEXT:    lui a3, 524288
-; RV32IZFH-NEXT:    bnez s0, .LBB9_4
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    lui a2, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB9_4
 ; RV32IZFH-NEXT:  # %bb.3:
-; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB9_4:
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI9_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI9_1)(a2)
-; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFH-NEXT:    beqz a2, .LBB9_6
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI9_1)
+; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI9_1)(a1)
+; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFH-NEXT:    beqz a3, .LBB9_6
 ; RV32IZFH-NEXT:  # %bb.5:
-; RV32IZFH-NEXT:    addi a1, a3, -1
+; RV32IZFH-NEXT:    addi a2, a4, -1
 ; RV32IZFH-NEXT:  .LBB9_6:
-; RV32IZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFH-NEXT:    seqz a3, a3
-; RV32IZFH-NEXT:    addi a3, a3, -1
-; RV32IZFH-NEXT:    and a1, a3, a1
-; RV32IZFH-NEXT:    neg a4, s0
-; RV32IZFH-NEXT:    and a0, a4, a0
-; RV32IZFH-NEXT:    neg a2, a2
+; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFH-NEXT:    neg a4, a1
+; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, s0
+; RV32IZFH-NEXT:    and a0, a2, a0
+; RV32IZFH-NEXT:    neg a2, a3
 ; RV32IZFH-NEXT:    or a0, a2, a0
-; RV32IZFH-NEXT:    and a0, a3, a0
+; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -807,27 +807,27 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IZFHMIN-NEXT:    lui a3, 524288
-; RV32IZFHMIN-NEXT:    bnez s0, .LBB9_4
+; RV32IZFHMIN-NEXT:    lui a4, 524288
+; RV32IZFHMIN-NEXT:    lui a2, 524288
+; RV32IZFHMIN-NEXT:    beqz s0, .LBB9_4
 ; RV32IZFHMIN-NEXT:  # %bb.3:
-; RV32IZFHMIN-NEXT:    lui a1, 524288
+; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB9_4:
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI9_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI9_0)(a2)
-; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT:    beqz a2, .LBB9_6
+; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI9_0)(a1)
+; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT:    beqz a3, .LBB9_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
-; RV32IZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IZFHMIN-NEXT:  .LBB9_6:
-; RV32IZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFHMIN-NEXT:    seqz a3, a3
-; RV32IZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IZFHMIN-NEXT:    and a1, a3, a1
-; RV32IZFHMIN-NEXT:    neg a4, s0
-; RV32IZFHMIN-NEXT:    and a0, a4, a0
-; RV32IZFHMIN-NEXT:    neg a2, a2
+; RV32IZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFHMIN-NEXT:    neg a4, a1
+; RV32IZFHMIN-NEXT:    and a1, a4, a2
+; RV32IZFHMIN-NEXT:    neg a2, s0
+; RV32IZFHMIN-NEXT:    and a0, a2, a0
+; RV32IZFHMIN-NEXT:    neg a2, a3
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
-; RV32IZFHMIN-NEXT:    and a0, a3, a0
+; RV32IZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -1087,27 +1087,27 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixsfdi at plt
-; RV32IZFH-NEXT:    lui a3, 524288
-; RV32IZFH-NEXT:    bnez s0, .LBB13_4
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    lui a2, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB13_4
 ; RV32IZFH-NEXT:  # %bb.3:
-; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB13_4:
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI13_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI13_1)(a2)
-; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFH-NEXT:    beqz a2, .LBB13_6
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI13_1)
+; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI13_1)(a1)
+; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFH-NEXT:    beqz a3, .LBB13_6
 ; RV32IZFH-NEXT:  # %bb.5:
-; RV32IZFH-NEXT:    addi a1, a3, -1
+; RV32IZFH-NEXT:    addi a2, a4, -1
 ; RV32IZFH-NEXT:  .LBB13_6:
-; RV32IZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFH-NEXT:    seqz a3, a3
-; RV32IZFH-NEXT:    addi a3, a3, -1
-; RV32IZFH-NEXT:    and a1, a3, a1
-; RV32IZFH-NEXT:    neg a4, s0
-; RV32IZFH-NEXT:    and a0, a4, a0
-; RV32IZFH-NEXT:    neg a2, a2
+; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFH-NEXT:    neg a4, a1
+; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, s0
+; RV32IZFH-NEXT:    and a0, a2, a0
+; RV32IZFH-NEXT:    neg a2, a3
 ; RV32IZFH-NEXT:    or a0, a2, a0
-; RV32IZFH-NEXT:    and a0, a3, a0
+; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -1147,27 +1147,27 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IZFHMIN-NEXT:    lui a3, 524288
-; RV32IZFHMIN-NEXT:    bnez s0, .LBB13_4
+; RV32IZFHMIN-NEXT:    lui a4, 524288
+; RV32IZFHMIN-NEXT:    lui a2, 524288
+; RV32IZFHMIN-NEXT:    beqz s0, .LBB13_4
 ; RV32IZFHMIN-NEXT:  # %bb.3:
-; RV32IZFHMIN-NEXT:    lui a1, 524288
+; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB13_4:
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI13_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI13_0)(a2)
-; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT:    beqz a2, .LBB13_6
+; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI13_0)
+; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI13_0)(a1)
+; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT:    beqz a3, .LBB13_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
-; RV32IZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IZFHMIN-NEXT:  .LBB13_6:
-; RV32IZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFHMIN-NEXT:    seqz a3, a3
-; RV32IZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IZFHMIN-NEXT:    and a1, a3, a1
-; RV32IZFHMIN-NEXT:    neg a4, s0
-; RV32IZFHMIN-NEXT:    and a0, a4, a0
-; RV32IZFHMIN-NEXT:    neg a2, a2
+; RV32IZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFHMIN-NEXT:    neg a4, a1
+; RV32IZFHMIN-NEXT:    and a1, a4, a2
+; RV32IZFHMIN-NEXT:    neg a2, s0
+; RV32IZFHMIN-NEXT:    and a0, a2, a0
+; RV32IZFHMIN-NEXT:    neg a2, a3
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
-; RV32IZFHMIN-NEXT:    and a0, a3, a0
+; RV32IZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -1427,27 +1427,27 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZFH-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFH-NEXT:    fmv.s fa0, fs0
 ; RV32IZFH-NEXT:    call __fixsfdi at plt
-; RV32IZFH-NEXT:    lui a3, 524288
-; RV32IZFH-NEXT:    bnez s0, .LBB17_4
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    lui a2, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB17_4
 ; RV32IZFH-NEXT:  # %bb.3:
-; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB17_4:
-; RV32IZFH-NEXT:    lui a2, %hi(.LCPI17_1)
-; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI17_1)(a2)
-; RV32IZFH-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFH-NEXT:    beqz a2, .LBB17_6
+; RV32IZFH-NEXT:    lui a1, %hi(.LCPI17_1)
+; RV32IZFH-NEXT:    flw fa5, %lo(.LCPI17_1)(a1)
+; RV32IZFH-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFH-NEXT:    beqz a3, .LBB17_6
 ; RV32IZFH-NEXT:  # %bb.5:
-; RV32IZFH-NEXT:    addi a1, a3, -1
+; RV32IZFH-NEXT:    addi a2, a4, -1
 ; RV32IZFH-NEXT:  .LBB17_6:
-; RV32IZFH-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFH-NEXT:    seqz a3, a3
-; RV32IZFH-NEXT:    addi a3, a3, -1
-; RV32IZFH-NEXT:    and a1, a3, a1
-; RV32IZFH-NEXT:    neg a4, s0
-; RV32IZFH-NEXT:    and a0, a4, a0
-; RV32IZFH-NEXT:    neg a2, a2
+; RV32IZFH-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFH-NEXT:    neg a4, a1
+; RV32IZFH-NEXT:    and a1, a4, a2
+; RV32IZFH-NEXT:    neg a2, s0
+; RV32IZFH-NEXT:    and a0, a2, a0
+; RV32IZFH-NEXT:    neg a2, a3
 ; RV32IZFH-NEXT:    or a0, a2, a0
-; RV32IZFH-NEXT:    and a0, a3, a0
+; RV32IZFH-NEXT:    and a0, a4, a0
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
@@ -1487,27 +1487,27 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZFHMIN-NEXT:    fle.s s0, fa5, fs0
 ; RV32IZFHMIN-NEXT:    fmv.s fa0, fs0
 ; RV32IZFHMIN-NEXT:    call __fixsfdi at plt
-; RV32IZFHMIN-NEXT:    lui a3, 524288
-; RV32IZFHMIN-NEXT:    bnez s0, .LBB17_4
+; RV32IZFHMIN-NEXT:    lui a4, 524288
+; RV32IZFHMIN-NEXT:    lui a2, 524288
+; RV32IZFHMIN-NEXT:    beqz s0, .LBB17_4
 ; RV32IZFHMIN-NEXT:  # %bb.3:
-; RV32IZFHMIN-NEXT:    lui a1, 524288
+; RV32IZFHMIN-NEXT:    mv a2, a1
 ; RV32IZFHMIN-NEXT:  .LBB17_4:
-; RV32IZFHMIN-NEXT:    lui a2, %hi(.LCPI17_0)
-; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI17_0)(a2)
-; RV32IZFHMIN-NEXT:    flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT:    beqz a2, .LBB17_6
+; RV32IZFHMIN-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV32IZFHMIN-NEXT:    flw fa5, %lo(.LCPI17_0)(a1)
+; RV32IZFHMIN-NEXT:    flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT:    beqz a3, .LBB17_6
 ; RV32IZFHMIN-NEXT:  # %bb.5:
-; RV32IZFHMIN-NEXT:    addi a1, a3, -1
+; RV32IZFHMIN-NEXT:    addi a2, a4, -1
 ; RV32IZFHMIN-NEXT:  .LBB17_6:
-; RV32IZFHMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFHMIN-NEXT:    seqz a3, a3
-; RV32IZFHMIN-NEXT:    addi a3, a3, -1
-; RV32IZFHMIN-NEXT:    and a1, a3, a1
-; RV32IZFHMIN-NEXT:    neg a4, s0
-; RV32IZFHMIN-NEXT:    and a0, a4, a0
-; RV32IZFHMIN-NEXT:    neg a2, a2
+; RV32IZFHMIN-NEXT:    feq.s a1, fs0, fs0
+; RV32IZFHMIN-NEXT:    neg a4, a1
+; RV32IZFHMIN-NEXT:    and a1, a4, a2
+; RV32IZFHMIN-NEXT:    neg a2, s0
+; RV32IZFHMIN-NEXT:    and a0, a2, a0
+; RV32IZFHMIN-NEXT:    neg a2, a3
 ; RV32IZFHMIN-NEXT:    or a0, a2, a0
-; RV32IZFHMIN-NEXT:    and a0, a3, a0
+; RV32IZFHMIN-NEXT:    and a0, a4, a0
 ; RV32IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFHMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index ab3e01e2ff615..c6e880769e5ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -191,68 +191,62 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vfmv.f.s fa5, v8
 ; RV32-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV32-NEXT:    fld fa5, %lo(.LCPI10_0)(a0)
+; RV32-NEXT:    fld fa4, %lo(.LCPI10_0)(a0)
 ; RV32-NEXT:    lui a0, %hi(.LCPI10_1)
-; RV32-NEXT:    fld fa4, %lo(.LCPI10_1)(a0)
-; RV32-NEXT:    vslidedown.vi v9, v8, 1
-; RV32-NEXT:    vfmv.f.s fa3, v9
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    fld fa3, %lo(.LCPI10_1)(a0)
+; RV32-NEXT:    feq.d a0, fa5, fa5
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa5, fa5, fa4
+; RV32-NEXT:    fmin.d fa5, fa5, fa3
+; RV32-NEXT:    fcvt.w.d a2, fa5, rtz
+; RV32-NEXT:    and a0, a0, a2
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vfmv.f.s fa5, v8
+; RV32-NEXT:    feq.d a2, fa5, fa5
+; RV32-NEXT:    neg a2, a2
+; RV32-NEXT:    fmax.d fa5, fa5, fa4
+; RV32-NEXT:    fmin.d fa5, fa5, fa3
+; RV32-NEXT:    fcvt.w.d a3, fa5, rtz
+; RV32-NEXT:    and a2, a2, a3
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a0
-; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; RV32-NEXT:    vfmv.f.s fa3, v8
-; RV32-NEXT:    fmax.d fa5, fa3, fa5
-; RV32-NEXT:    fmin.d fa5, fa5, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa5, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
-; RV32-NEXT:    vsetivli zero, 2, e8, mf8, tu, ma
-; RV32-NEXT:    vmv.s.x v9, a0
-; RV32-NEXT:    vse8.v v9, (a1)
+; RV32-NEXT:    vmv.v.x v8, a2
+; RV32-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
+; RV32-NEXT:    vmv.s.x v8, a0
+; RV32-NEXT:    vse8.v v8, (a1)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: fp2si_v2f64_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vfmv.f.s fa5, v8
 ; RV64-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV64-NEXT:    fld fa5, %lo(.LCPI10_0)(a0)
+; RV64-NEXT:    fld fa4, %lo(.LCPI10_0)(a0)
 ; RV64-NEXT:    lui a0, %hi(.LCPI10_1)
-; RV64-NEXT:    fld fa4, %lo(.LCPI10_1)(a0)
-; RV64-NEXT:    vslidedown.vi v9, v8, 1
-; RV64-NEXT:    vfmv.f.s fa3, v9
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addi a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    fld fa3, %lo(.LCPI10_1)(a0)
+; RV64-NEXT:    feq.d a0, fa5, fa5
+; RV64-NEXT:    neg a0, a0
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.l.d a2, fa5, rtz
+; RV64-NEXT:    and a0, a0, a2
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v8, 1
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    feq.d a2, fa5, fa5
+; RV64-NEXT:    neg a2, a2
+; RV64-NEXT:    fmax.d fa5, fa5, fa4
+; RV64-NEXT:    fmin.d fa5, fa5, fa3
+; RV64-NEXT:    fcvt.l.d a3, fa5, rtz
+; RV64-NEXT:    and a2, a2, a3
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT:    vmv.v.x v9, a0
-; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; RV64-NEXT:    vfmv.f.s fa3, v8
-; RV64-NEXT:    fmax.d fa5, fa3, fa5
-; RV64-NEXT:    fmin.d fa5, fa5, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa5, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addi a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
-; RV64-NEXT:    vsetivli zero, 2, e8, mf8, tu, ma
-; RV64-NEXT:    vmv.s.x v9, a0
-; RV64-NEXT:    vse8.v v9, (a1)
+; RV64-NEXT:    vmv.v.x v8, a2
+; RV64-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a0
+; RV64-NEXT:    vse8.v v8, (a1)
 ; RV64-NEXT:    ret
   %a = load <2 x double>, ptr %x
   %d = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> %a)
@@ -324,89 +318,81 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vfmv.f.s fa3, v8
 ; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
 ; RV32-NEXT:    lui a0, %hi(.LCPI12_1)
 ; RV32-NEXT:    fld fa4, %lo(.LCPI12_1)(a0)
-; RV32-NEXT:    vfmv.f.s fa3, v8
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 8(sp)
 ; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v12, v8, 7
 ; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 15(sp)
 ; RV32-NEXT:    vslidedown.vi v12, v8, 6
 ; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 14(sp)
 ; RV32-NEXT:    vslidedown.vi v12, v8, 5
 ; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 13(sp)
 ; RV32-NEXT:    vslidedown.vi v12, v8, 4
 ; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 12(sp)
 ; RV32-NEXT:    vslidedown.vi v12, v8, 3
 ; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 11(sp)
 ; RV32-NEXT:    vslidedown.vi v12, v8, 2
 ; RV32-NEXT:    vfmv.f.s fa3, v12
-; RV32-NEXT:    fmax.d fa2, fa3, fa5
-; RV32-NEXT:    fmin.d fa2, fa2, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa2, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    fmax.d fa3, fa3, fa5
+; RV32-NEXT:    fmin.d fa3, fa3, fa4
+; RV32-NEXT:    fcvt.w.d a2, fa3, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 10(sp)
 ; RV32-NEXT:    vslidedown.vi v8, v8, 1
 ; RV32-NEXT:    vfmv.f.s fa3, v8
+; RV32-NEXT:    feq.d a0, fa3, fa3
+; RV32-NEXT:    neg a0, a0
 ; RV32-NEXT:    fmax.d fa5, fa3, fa5
 ; RV32-NEXT:    fmin.d fa5, fa5, fa4
-; RV32-NEXT:    fcvt.w.d a0, fa5, rtz
-; RV32-NEXT:    feq.d a2, fa3, fa3
-; RV32-NEXT:    seqz a2, a2
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    and a0, a2, a0
+; RV32-NEXT:    fcvt.w.d a2, fa5, rtz
+; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    sb a0, 9(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
@@ -421,89 +407,81 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vfmv.f.s fa3, v8
 ; RV64-NEXT:    lui a0, %hi(.LCPI12_0)
 ; RV64-NEXT:    fld fa5, %lo(.LCPI12_0)(a0)
 ; RV64-NEXT:    lui a0, %hi(.LCPI12_1)
 ; RV64-NEXT:    fld fa4, %lo(.LCPI12_1)(a0)
-; RV64-NEXT:    vfmv.f.s fa3, v8
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 8(sp)
 ; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
 ; RV64-NEXT:    vslidedown.vi v12, v8, 7
 ; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 15(sp)
 ; RV64-NEXT:    vslidedown.vi v12, v8, 6
 ; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 14(sp)
 ; RV64-NEXT:    vslidedown.vi v12, v8, 5
 ; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 13(sp)
 ; RV64-NEXT:    vslidedown.vi v12, v8, 4
 ; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 12(sp)
 ; RV64-NEXT:    vslidedown.vi v12, v8, 3
 ; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 11(sp)
 ; RV64-NEXT:    vslidedown.vi v12, v8, 2
 ; RV64-NEXT:    vfmv.f.s fa3, v12
-; RV64-NEXT:    fmax.d fa2, fa3, fa5
-; RV64-NEXT:    fmin.d fa2, fa2, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa2, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
+; RV64-NEXT:    fmax.d fa3, fa3, fa5
+; RV64-NEXT:    fmin.d fa3, fa3, fa4
+; RV64-NEXT:    fcvt.l.d a2, fa3, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 10(sp)
 ; RV64-NEXT:    vslidedown.vi v8, v8, 1
 ; RV64-NEXT:    vfmv.f.s fa3, v8
+; RV64-NEXT:    feq.d a0, fa3, fa3
+; RV64-NEXT:    negw a0, a0
 ; RV64-NEXT:    fmax.d fa5, fa3, fa5
 ; RV64-NEXT:    fmin.d fa5, fa5, fa4
-; RV64-NEXT:    fcvt.l.d a0, fa5, rtz
-; RV64-NEXT:    feq.d a2, fa3, fa3
-; RV64-NEXT:    seqz a2, a2
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
+; RV64-NEXT:    fcvt.l.d a2, fa5, rtz
+; RV64-NEXT:    and a0, a0, a2
 ; RV64-NEXT:    sb a0, 9(sp)
 ; RV64-NEXT:    addi a0, sp, 8
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 172f3aa0e32d6..3ca01cfa3a8f7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -355,148 +355,126 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI9_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r11, r10, d0
-; CHECK-NEXT:    str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    str.w r10, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI9_1
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r5, r3, d0
-; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI9_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r0, #-2147483648
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r0, #-2147483648
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne.w r11, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r11, #-2147483648
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r11, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-2147483648
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r5, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r10, #-1
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r11, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r10, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI9_0:
-; CHECK-NEXT:    .long 4290772992 @ double 2147483647
-; CHECK-NEXT:    .long 1105199103
-; CHECK-NEXT:  .LCPI9_1:
 ; CHECK-NEXT:    .long 0 @ double -2147483648
 ; CHECK-NEXT:    .long 3252682752
+; CHECK-NEXT:  .LCPI9_1:
+; CHECK-NEXT:    .long 4290772992 @ double 2147483647
+; CHECK-NEXT:    .long 1105199103
     %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
@@ -2768,148 +2746,126 @@ define arm_aapcs_vfpcc <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI33_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r11, r10, d0
-; CHECK-NEXT:    str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    str.w r10, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI33_1
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r5, r3, d0
-; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI33_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    mvneq r0, #127
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #127
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mvnne r11, #127
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    mvneq r11, #127
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #127
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r5, #127
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #127
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r10, #-1
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r11, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r10, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI33_0:
-; CHECK-NEXT:    .long 0 @ double 127
-; CHECK-NEXT:    .long 1080016896
-; CHECK-NEXT:  .LCPI33_1:
 ; CHECK-NEXT:    .long 0 @ double -128
 ; CHECK-NEXT:    .long 3227516928
+; CHECK-NEXT:  .LCPI33_1:
+; CHECK-NEXT:    .long 0 @ double 127
+; CHECK-NEXT:    .long 1080016896
     %x = call <2 x i8> @llvm.fptosi.sat.v2f64.v2i8(<2 x double> %f)
     ret <2 x i8> %x
 }
@@ -2923,147 +2879,129 @@ define arm_aapcs_vfpcc <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI34_0
-; CHECK-NEXT:    vmov r10, r11, d9
-; CHECK-NEXT:    vmov r8, r5, d0
-; CHECK-NEXT:    str.w r8, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI34_1
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    vmov r8, r7, d9
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    strd r3, r2, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI34_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r1, #-1
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldrd r5, r2, [sp, #16] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r8, r1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r8, #-1
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    movweq r9, #61440
-; CHECK-NEXT:    movteq r9, #65535
-; CHECK-NEXT:    ldrd r2, r3, [sp, #24] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    str r4, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    movweq r0, #61440
-; CHECK-NEXT:    movteq r0, #65535
-; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r0, #4095
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #24] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r9, #4095
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r5, #61440
+; CHECK-NEXT:    movtne r5, #65535
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r11, #61440
+; CHECK-NEXT:    movtne r11, #65535
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r11, #4095
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #4095
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r8, r0
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r0
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI34_0:
-; CHECK-NEXT:    .long 0 @ double 4095
-; CHECK-NEXT:    .long 1085275648
-; CHECK-NEXT:  .LCPI34_1:
 ; CHECK-NEXT:    .long 0 @ double -4096
 ; CHECK-NEXT:    .long 3232759808
+; CHECK-NEXT:  .LCPI34_1:
+; CHECK-NEXT:    .long 0 @ double 4095
+; CHECK-NEXT:    .long 1085275648
     %x = call <2 x i13> @llvm.fptosi.sat.v2f64.v2i13(<2 x double> %f)
     ret <2 x i13> %x
 }
@@ -3077,147 +3015,129 @@ define arm_aapcs_vfpcc <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI35_0
-; CHECK-NEXT:    vmov r10, r11, d9
-; CHECK-NEXT:    vmov r8, r5, d0
-; CHECK-NEXT:    str.w r8, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI35_1
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    vmov r8, r7, d9
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    strd r3, r2, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI35_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r1, #-1
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldrd r5, r2, [sp, #16] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r8, r1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r8, #-1
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    movweq r9, #32768
-; CHECK-NEXT:    movteq r9, #65535
-; CHECK-NEXT:    ldrd r2, r3, [sp, #24] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    str r4, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    movweq r0, #32768
-; CHECK-NEXT:    movteq r0, #65535
-; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r0, #32767
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #24] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r9, #32767
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r5, #32768
+; CHECK-NEXT:    movtne r5, #65535
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r11, #32768
+; CHECK-NEXT:    movtne r11, #65535
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r11, #32767
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #32767
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r8, r0
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r0
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI35_0:
-; CHECK-NEXT:    .long 0 @ double 32767
-; CHECK-NEXT:    .long 1088421824
-; CHECK-NEXT:  .LCPI35_1:
 ; CHECK-NEXT:    .long 0 @ double -32768
 ; CHECK-NEXT:    .long 3235905536
+; CHECK-NEXT:  .LCPI35_1:
+; CHECK-NEXT:    .long 0 @ double 32767
+; CHECK-NEXT:    .long 1088421824
     %x = call <2 x i16> @llvm.fptosi.sat.v2f64.v2i16(<2 x double> %f)
     ret <2 x i16> %x
 }
@@ -3231,163 +3151,129 @@ define arm_aapcs_vfpcc <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #48
-; CHECK-NEXT:    sub sp, #48
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI36_0
-; CHECK-NEXT:    vmov r6, r5, d9
-; CHECK-NEXT:    vmov r8, r3, d0
-; CHECK-NEXT:    str r3, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    str.w r8, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI36_1
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    vmov r7, r6, d9
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    strd r2, r3, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    str r0, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r1, #-1
-; CHECK-NEXT:    cmp.w r10, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r1, #0
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r1
-; CHECK-NEXT:    str r5, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    vmov r7, r9, d8
-; CHECK-NEXT:    mov r5, r6
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    ldr r4, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r6, r9
-; CHECK-NEXT:    str.w r9, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    ldr.w r9, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    ldr.w r8, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r6, r5
-; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vldr d0, .LCPI36_1
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov r8, r0, d8
+; CHECK-NEXT:    vmov r11, r10, d0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    clz r0, r4
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    ittt ne
+; CHECK-NEXT:    movwne r9, #0
+; CHECK-NEXT:    movtne r9, #65532
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    movweq r11, #0
-; CHECK-NEXT:    movteq r11, #65532
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r11, #65535
-; CHECK-NEXT:    movtne r11, #3
-; CHECK-NEXT:    str r5, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    ldr r5, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    movwne r9, #65535
+; CHECK-NEXT:    movtne r9, #3
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    mov r6, r8
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r4, #0
-; CHECK-NEXT:    movteq r4, #65532
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r11, r4
+; CHECK-NEXT:    lsr.w r10, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    movtne r4, #65532
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    itt ne
 ; CHECK-NEXT:    movwne r4, #65535
 ; CHECK-NEXT:    movtne r4, #3
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r10, #-1
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    ldr r1, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    mov r3, r1
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #0
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r11, r4
-; CHECK-NEXT:    vmov q0[3], q0[1], r10, r0
-; CHECK-NEXT:    add sp, #48
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r4, r9
+; CHECK-NEXT:    vmov q0[3], q0[1], r7, r0
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI36_0:
-; CHECK-NEXT:    .long 0 @ double 262143
-; CHECK-NEXT:    .long 1091567608
-; CHECK-NEXT:  .LCPI36_1:
 ; CHECK-NEXT:    .long 0 @ double -262144
 ; CHECK-NEXT:    .long 3239051264
+; CHECK-NEXT:  .LCPI36_1:
+; CHECK-NEXT:    .long 0 @ double 262143
+; CHECK-NEXT:    .long 1091567608
     %x = call <2 x i19> @llvm.fptosi.sat.v2f64.v2i19(<2 x double> %f)
     ret <2 x i19> %x
 }
@@ -3401,148 +3287,126 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double>
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI37_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r11, r10, d0
-; CHECK-NEXT:    str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    str.w r10, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI37_1
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r5, r3, d0
-; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI37_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r0, #-2147483648
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r0, #-2147483648
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne.w r11, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r11, #-2147483648
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r11, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-2147483648
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r5, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r10, #-1
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r11, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r10, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI37_0:
-; CHECK-NEXT:    .long 4290772992 @ double 2147483647
-; CHECK-NEXT:    .long 1105199103
-; CHECK-NEXT:  .LCPI37_1:
 ; CHECK-NEXT:    .long 0 @ double -2147483648
 ; CHECK-NEXT:    .long 3252682752
+; CHECK-NEXT:  .LCPI37_1:
+; CHECK-NEXT:    .long 4290772992 @ double 2147483647
+; CHECK-NEXT:    .long 1105199103
     %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
@@ -3556,160 +3420,130 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI38_0
-; CHECK-NEXT:    vmov r8, r5, d9
+; CHECK-NEXT:    vmov r7, r6, d9
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    str r2, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI38_1
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    csel r0, r0, r11, ne
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vldr d0, .LCPI38_1
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov r8, r0, d8
+; CHECK-NEXT:    vmov r11, r10, d0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    clz r0, r4
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    movtne r1, #65534
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    movne.w r9, #0
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    vmov r6, r7, d8
-; CHECK-NEXT:    mov r11, r5
-; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    movweq r10, #0
-; CHECK-NEXT:    movteq r10, #65534
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r10, #65535
-; CHECK-NEXT:    movtne r10, #1
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r9, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r5, r11
-; CHECK-NEXT:    ldr.w r11, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    movwne r5, #65535
+; CHECK-NEXT:    movtne r5, #1
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #0
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r0, #0
-; CHECK-NEXT:    movteq r0, #65534
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r0, #65535
-; CHECK-NEXT:    movtne r0, #1
-; CHECK-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    str r0, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    movne r5, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r11, r4
+; CHECK-NEXT:    lsr.w r10, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    movtne r7, #65534
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r7, #65535
+; CHECK-NEXT:    movtne r7, #1
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r5, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r10, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r4, r9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r7, r1
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI38_0:
-; CHECK-NEXT:    .long 4294967280 @ double 562949953421311
-; CHECK-NEXT:    .long 1124073471
-; CHECK-NEXT:  .LCPI38_1:
 ; CHECK-NEXT:    .long 0 @ double -562949953421312
 ; CHECK-NEXT:    .long 3271557120
+; CHECK-NEXT:  .LCPI38_1:
+; CHECK-NEXT:    .long 4294967280 @ double 562949953421311
+; CHECK-NEXT:    .long 1124073471
     %x = call <2 x i50> @llvm.fptosi.sat.v2f64.v2i50(<2 x double> %f)
     ret <2 x i50> %x
 }
@@ -3723,145 +3557,126 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI39_0
-; CHECK-NEXT:    vmov r8, r5, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    str.w r11, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI39_1
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    vmov r8, r7, d9
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI39_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp.w r10, #0
-; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    csel r0, r0, r10, ne
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    movne.w r11, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r4, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    ldr.w r10, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    mov r9, r1
-; CHECK-NEXT:    csel r11, r0, r11, ne
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #24] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r0, #-2147483648
-; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r0, #-2147483648
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldrd r2, r3, [sp, #24] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r9, #-2147483648
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r9, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-2147483648
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r2, #-2147483648
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-2147483648
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r4, #-2147483648
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r11, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r9, r10
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI39_0:
-; CHECK-NEXT:    .long 4294967295 @ double 9.2233720368547748E+18
-; CHECK-NEXT:    .long 1138753535
-; CHECK-NEXT:  .LCPI39_1:
 ; CHECK-NEXT:    .long 0 @ double -9.2233720368547758E+18
 ; CHECK-NEXT:    .long 3286237184
+; CHECK-NEXT:  .LCPI39_1:
+; CHECK-NEXT:    .long 4294967295 @ double 9.2233720368547748E+18
+; CHECK-NEXT:    .long 1138753535
     %x = call <2 x i64> @llvm.fptosi.sat.v2f64.v2i64(<2 x double> %f)
     ret <2 x i64> %x
 }

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index d7a5b63261e79..8ea12bd1fc0de 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -328,114 +328,95 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI9_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI9_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r6, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vldr d0, .LCPI9_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #-1
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r7, #1
 ; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI9_0:
-; CHECK-NEXT:    .long 4292870144 @ double 4294967295
-; CHECK-NEXT:    .long 1106247679
-; CHECK-NEXT:  .LCPI9_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI9_1:
+; CHECK-NEXT:    .long 4292870144 @ double 4294967295
+; CHECK-NEXT:    .long 1106247679
     %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
@@ -2224,114 +2205,95 @@ define arm_aapcs_vfpcc <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI33_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI33_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r6, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vldr d0, .LCPI33_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #255
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #255
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #255
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r7, #1
 ; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #255
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI33_0:
-; CHECK-NEXT:    .long 0 @ double 255
-; CHECK-NEXT:    .long 1081073664
-; CHECK-NEXT:  .LCPI33_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI33_1:
+; CHECK-NEXT:    .long 0 @ double 255
+; CHECK-NEXT:    .long 1081073664
     %x = call <2 x i8> @llvm.fptoui.sat.v2f64.v2i8(<2 x double> %f)
     ret <2 x i8> %x
 }
@@ -2345,114 +2307,95 @@ define arm_aapcs_vfpcc <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI34_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI34_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r6, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vldr d0, .LCPI34_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r0, #8191
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r9, #8191
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #8191
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r7, #1
 ; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r0, #8191
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI34_0:
-; CHECK-NEXT:    .long 0 @ double 8191
-; CHECK-NEXT:    .long 1086324480
-; CHECK-NEXT:  .LCPI34_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI34_1:
+; CHECK-NEXT:    .long 0 @ double 8191
+; CHECK-NEXT:    .long 1086324480
     %x = call <2 x i13> @llvm.fptoui.sat.v2f64.v2i13(<2 x double> %f)
     ret <2 x i13> %x
 }
@@ -2466,114 +2409,95 @@ define arm_aapcs_vfpcc <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI35_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI35_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r6, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vldr d0, .LCPI35_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r0, #65535
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movwne r9, #65535
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #65535
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r7, #1
 ; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r0, #65535
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI35_0:
-; CHECK-NEXT:    .long 0 @ double 65535
-; CHECK-NEXT:    .long 1089470432
-; CHECK-NEXT:  .LCPI35_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI35_1:
+; CHECK-NEXT:    .long 0 @ double 65535
+; CHECK-NEXT:    .long 1089470432
     %x = call <2 x i16> @llvm.fptoui.sat.v2f64.v2i16(<2 x double> %f)
     ret <2 x i16> %x
 }
@@ -2587,105 +2511,84 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI36_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI36_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    vmov r5, r7, d0
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    vmov r8, r6, d9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r0, #65535
-; CHECK-NEXT:    movtne r0, #7
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r9, #65535
-; CHECK-NEXT:    movtne r9, #7
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r5, #65535
+; CHECK-NEXT:    movtne r5, #7
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r0, #65535
+; CHECK-NEXT:    movtne r0, #7
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2710,114 +2613,95 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI37_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI37_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r6, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vldr d0, .LCPI37_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r5, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI37_0:
-; CHECK-NEXT:    .long 4292870144 @ double 4294967295
-; CHECK-NEXT:    .long 1106247679
-; CHECK-NEXT:  .LCPI37_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI37_1:
+; CHECK-NEXT:    .long 4292870144 @ double 4294967295
+; CHECK-NEXT:    .long 1106247679
     %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
     ret <2 x i32> %x
 }
@@ -2831,105 +2715,84 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI38_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI38_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    vmov r5, r7, d0
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    vmov r8, r6, d9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    csel r0, r1, r8, ne
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r0, #65535
-; CHECK-NEXT:    movtne r0, #3
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    csel r9, r1, r9, ne
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r9, #65535
-; CHECK-NEXT:    movtne r9, #3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r5, #-1
-; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r7, #65535
+; CHECK-NEXT:    movtne r7, #3
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r1, #65535
+; CHECK-NEXT:    movtne r1, #3
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
 ; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[3], q0[1], r9, r0
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2954,114 +2817,95 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI39_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vldr d0, .LCPI39_1
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r6, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    csel r0, r0, r8, ne
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vldr d0, .LCPI39_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #-1
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r5, #-1
+; CHECK-NEXT:    movne r7, #1
 ; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r1
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r5
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #-1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI39_0:
-; CHECK-NEXT:    .long 4294967295 @ double 1.844674407370955E+19
-; CHECK-NEXT:    .long 1139802111
-; CHECK-NEXT:  .LCPI39_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI39_1:
+; CHECK-NEXT:    .long 4294967295 @ double 1.844674407370955E+19
+; CHECK-NEXT:    .long 1139802111
     %x = call <2 x i64> @llvm.fptoui.sat.v2f64.v2i64(<2 x double> %f)
     ret <2 x i64> %x
 }

diff  --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 5a60a9e00aa3f..76e15ca0bf919 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -183,7 +183,7 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-4096, %cx # imm = 0xF000
+; X86-X87-NEXT:    movl $61440, %ecx # imm = 0xF000
 ; X86-X87-NEXT:    jb .LBB2_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -255,7 +255,7 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-32768, %cx # imm = 0x8000
+; X86-X87-NEXT:    movl $32768, %ecx # imm = 0x8000
 ; X86-X87-NEXT:    jb .LBB3_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -1203,7 +1203,7 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-4096, %cx # imm = 0xF000
+; X86-X87-NEXT:    movl $61440, %ecx # imm = 0xF000
 ; X86-X87-NEXT:    jb .LBB12_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -1275,7 +1275,7 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-32768, %cx # imm = 0x8000
+; X86-X87-NEXT:    movl $32768, %ecx # imm = 0x8000
 ; X86-X87-NEXT:    jb .LBB13_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -2268,7 +2268,7 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-4096, %cx # imm = 0xF000
+; X86-X87-NEXT:    movl $61440, %ecx # imm = 0xF000
 ; X86-X87-NEXT:    jb .LBB22_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -2364,7 +2364,7 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-32768, %cx # imm = 0x8000
+; X86-X87-NEXT:    movl $32768, %ecx # imm = 0x8000
 ; X86-X87-NEXT:    jb .LBB23_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -3495,7 +3495,7 @@ define i13 @test_signed_i13_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-4096, %cx # imm = 0xF000
+; X86-X87-NEXT:    movl $61440, %ecx # imm = 0xF000
 ; X86-X87-NEXT:    jb .LBB32_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -3536,24 +3536,22 @@ define i13 @test_signed_i13_f80(x86_fp80 %f) nounwind {
 ; X86-SSE-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X86-SSE-NEXT:    fists {{[0-9]+}}(%esp)
 ; X86-SSE-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-SSE-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-SSE-NEXT:    fxch %st(1)
 ; X86-SSE-NEXT:    fucomi %st(1), %st
 ; X86-SSE-NEXT:    fstp %st(1)
-; X86-SSE-NEXT:    movw $-4096, %ax # imm = 0xF000
-; X86-SSE-NEXT:    jb .LBB32_2
-; X86-SSE-NEXT:  # %bb.1:
-; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT:  .LBB32_2:
+; X86-SSE-NEXT:    movl $61440, %ecx # imm = 0xF000
+; X86-SSE-NEXT:    cmovael %eax, %ecx
 ; X86-SSE-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-SSE-NEXT:    fxch %st(1)
 ; X86-SSE-NEXT:    fucomi %st(1), %st
 ; X86-SSE-NEXT:    fstp %st(1)
-; X86-SSE-NEXT:    movl $4095, %ecx # imm = 0xFFF
-; X86-SSE-NEXT:    cmovbel %eax, %ecx
+; X86-SSE-NEXT:    movl $4095, %edx # imm = 0xFFF
+; X86-SSE-NEXT:    cmovbel %ecx, %edx
 ; X86-SSE-NEXT:    xorl %eax, %eax
 ; X86-SSE-NEXT:    fucompi %st(0), %st
-; X86-SSE-NEXT:    cmovnpl %ecx, %eax
+; X86-SSE-NEXT:    cmovnpl %edx, %eax
 ; X86-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-SSE-NEXT:    addl $8, %esp
 ; X86-SSE-NEXT:    retl
@@ -3568,24 +3566,22 @@ define i13 @test_signed_i13_f80(x86_fp80 %f) nounwind {
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fists -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
 ; X64-NEXT:    fxch %st(1)
 ; X64-NEXT:    fucomi %st(1), %st
 ; X64-NEXT:    fstp %st(1)
-; X64-NEXT:    movw $-4096, %ax # imm = 0xF000
-; X64-NEXT:    jb .LBB32_2
-; X64-NEXT:  # %bb.1:
-; X64-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:  .LBB32_2:
+; X64-NEXT:    movl $61440, %ecx # imm = 0xF000
+; X64-NEXT:    cmovael %eax, %ecx
 ; X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
 ; X64-NEXT:    fxch %st(1)
 ; X64-NEXT:    fucomi %st(1), %st
 ; X64-NEXT:    fstp %st(1)
-; X64-NEXT:    movl $4095, %ecx # imm = 0xFFF
-; X64-NEXT:    cmovbel %eax, %ecx
+; X64-NEXT:    movl $4095, %edx # imm = 0xFFF
+; X64-NEXT:    cmovbel %ecx, %edx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    fucompi %st(0), %st
-; X64-NEXT:    cmovnpl %ecx, %eax
+; X64-NEXT:    cmovnpl %edx, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
     %x = call i13 @llvm.fptosi.sat.i13.f80(x86_fp80 %f)
@@ -3611,7 +3607,7 @@ define i16 @test_signed_i16_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    fnstsw %ax
 ; X86-X87-NEXT:    # kill: def $ah killed $ah killed $ax
 ; X86-X87-NEXT:    sahf
-; X86-X87-NEXT:    movw $-32768, %cx # imm = 0x8000
+; X86-X87-NEXT:    movl $32768, %ecx # imm = 0x8000
 ; X86-X87-NEXT:    jb .LBB33_2
 ; X86-X87-NEXT:  # %bb.1:
 ; X86-X87-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
@@ -3652,24 +3648,22 @@ define i16 @test_signed_i16_f80(x86_fp80 %f) nounwind {
 ; X86-SSE-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X86-SSE-NEXT:    fists {{[0-9]+}}(%esp)
 ; X86-SSE-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-SSE-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-SSE-NEXT:    fxch %st(1)
 ; X86-SSE-NEXT:    fucomi %st(1), %st
 ; X86-SSE-NEXT:    fstp %st(1)
-; X86-SSE-NEXT:    movw $-32768, %ax # imm = 0x8000
-; X86-SSE-NEXT:    jb .LBB33_2
-; X86-SSE-NEXT:  # %bb.1:
-; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT:  .LBB33_2:
+; X86-SSE-NEXT:    movl $32768, %ecx # imm = 0x8000
+; X86-SSE-NEXT:    cmovael %eax, %ecx
 ; X86-SSE-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-SSE-NEXT:    fxch %st(1)
 ; X86-SSE-NEXT:    fucomi %st(1), %st
 ; X86-SSE-NEXT:    fstp %st(1)
-; X86-SSE-NEXT:    movl $32767, %ecx # imm = 0x7FFF
-; X86-SSE-NEXT:    cmovbel %eax, %ecx
+; X86-SSE-NEXT:    movl $32767, %edx # imm = 0x7FFF
+; X86-SSE-NEXT:    cmovbel %ecx, %edx
 ; X86-SSE-NEXT:    xorl %eax, %eax
 ; X86-SSE-NEXT:    fucompi %st(0), %st
-; X86-SSE-NEXT:    cmovnpl %ecx, %eax
+; X86-SSE-NEXT:    cmovnpl %edx, %eax
 ; X86-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-SSE-NEXT:    addl $8, %esp
 ; X86-SSE-NEXT:    retl
@@ -3684,24 +3678,22 @@ define i16 @test_signed_i16_f80(x86_fp80 %f) nounwind {
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fists -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
 ; X64-NEXT:    fxch %st(1)
 ; X64-NEXT:    fucomi %st(1), %st
 ; X64-NEXT:    fstp %st(1)
-; X64-NEXT:    movw $-32768, %ax # imm = 0x8000
-; X64-NEXT:    jb .LBB33_2
-; X64-NEXT:  # %bb.1:
-; X64-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:  .LBB33_2:
+; X64-NEXT:    movl $32768, %ecx # imm = 0x8000
+; X64-NEXT:    cmovael %eax, %ecx
 ; X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
 ; X64-NEXT:    fxch %st(1)
 ; X64-NEXT:    fucomi %st(1), %st
 ; X64-NEXT:    fstp %st(1)
-; X64-NEXT:    movl $32767, %ecx # imm = 0x7FFF
-; X64-NEXT:    cmovbel %eax, %ecx
+; X64-NEXT:    movl $32767, %edx # imm = 0x7FFF
+; X64-NEXT:    cmovbel %ecx, %edx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    fucompi %st(0), %st
-; X64-NEXT:    cmovnpl %ecx, %eax
+; X64-NEXT:    cmovnpl %edx, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
     %x = call i16 @llvm.fptosi.sat.i16.f80(x86_fp80 %f)


        


More information about the llvm-commits mailing list