[llvm] bbdf243 - [DAGCombine] Fold redundant select

Samuel Parker via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 2 01:43:37 PST 2023


Author: Samuel Parker
Date: 2023-02-02T09:43:21Z
New Revision: bbdf24357932b064f2aa18ea1356b474e0220dde

URL: https://github.com/llvm/llvm-project/commit/bbdf24357932b064f2aa18ea1356b474e0220dde
DIFF: https://github.com/llvm/llvm-project/commit/bbdf24357932b064f2aa18ea1356b474e0220dde.diff

LOG: [DAGCombine] Fold redundant select

If a chain of two selects share a true/false value and are controlled
by two setcc nodes, that are never both true, we can fold away one of
the selects. So, the following:
(select (setcc X, const0, eq), Y,
  (select (setcc X, const1, eq), Z, Y))

Can be combined to:
  select (setcc X, const1, eq) Z, Y

Differential Revision: https://reviews.llvm.org/D142535

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/ARM/fpclamptosat.ll
    llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
    llvm/test/CodeGen/RISCV/fpclamptosat.ll
    llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
    llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
    llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
    llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
    llvm/test/CodeGen/X86/fpclamptosat.ll
    llvm/test/CodeGen/X86/fpclamptosat_vec.ll
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9463b82cb8d2c..b61a2eef9dc21 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10893,6 +10893,73 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       }
     }
 
+    // If we have a chain of two selects, which share a true/false value and
+    // both are controlled from the two setcc nodes which cannot produce the
+    // same value, we can fold away N.
+    // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y
+    auto IsSelect = [](SDValue Op) {
+      return Op->getOpcode() == ISD::SELECT;
+    };
+    if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) {
+      auto AreSame = [](SDValue Op0, SDValue Op1) {
+        if (Op0 == Op1)
+          return true;
+        auto *C0 = dyn_cast<ConstantSDNode>(Op0);
+        auto *C1 = dyn_cast<ConstantSDNode>(Op1);
+        return C0 && C1 &&
+               APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
+      };
+
+      SDValue OtherSelect;
+      bool SelectsShareOp = false;
+      if (IsSelect(N1)) {
+        OtherSelect = N1;
+        SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2);
+      } else {
+        OtherSelect = N2;
+        SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1);
+      }
+
+      auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) {
+        if (SetCC0->getOpcode() != ISD::SETCC ||
+            SetCC1->getOpcode() != ISD::SETCC ||
+            SetCC0->getOperand(0) != SetCC1->getOperand(0))
+          return false;
+
+        ISD::CondCode CC0 = cast<CondCodeSDNode>(SetCC0.getOperand(2))->get();
+        ISD::CondCode CC1 = cast<CondCodeSDNode>(SetCC1.getOperand(2))->get();
+        auto *C0 = dyn_cast<ConstantSDNode>(SetCC0.getOperand(1));
+        auto *C1 = dyn_cast<ConstantSDNode>(SetCC1.getOperand(1));
+        if (!C0 || !C1)
+          return false;
+
+        bool ConstantsAreSame =
+          APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
+        auto IsEqual = [](ISD::CondCode CC) {
+          return CC == ISD::SETEQ;
+        };
+        auto IsNotEqual = [](ISD::CondCode CC) {
+          return CC == ISD::SETLT || CC == ISD::SETULT ||
+                 CC == ISD::SETGT || CC == ISD::SETUGT ||
+                 CC == ISD::SETNE;
+        };
+
+        if (ConstantsAreSame && IsNotEqual(CC0) && IsEqual(CC1))
+          return true;
+        if (ConstantsAreSame && IsNotEqual(CC1) && IsEqual(CC0))
+          return true;
+        if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1))
+          return true;
+
+        return false;
+      };
+
+      SDValue SetCC0 = N0;
+      SDValue SetCC1 = OtherSelect.getOperand(0);
+      if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1))
+        return OtherSelect;
+    }
+
     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
         (!LegalOperations &&
          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {

diff  --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 91c1a21964309..1f5e3051e92c1 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -3099,130 +3099,117 @@ define i64 @stest_f64i64_mm(double %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #20
+; SOFT-NEXT:    sub sp, #20
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    ldr r5, .LCPI45_0
-; SOFT-NEXT:    cmp r1, r5
+; SOFT-NEXT:    ldr r0, .LCPI45_0
+; SOFT-NEXT:    cmp r1, r0
+; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    blo .LBB45_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    ldr r5, .LCPI45_0
 ; SOFT-NEXT:  .LBB45_2: @ %entry
 ; SOFT-NEXT:    cmp r3, #0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bmi .LBB45_4
 ; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    ldr r1, .LCPI45_0
 ; SOFT-NEXT:  .LBB45_4: @ %entry
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    mov r7, r2
-; SOFT-NEXT:    orrs r7, r3
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    orrs r0, r3
+; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    beq .LBB45_6
 ; SOFT-NEXT:  @ %bb.5: @ %entry
-; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:  .LBB45_6: @ %entry
 ; SOFT-NEXT:    movs r0, #0
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mvns r2, r0
-; SOFT-NEXT:    cmp r4, r5
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    blo .LBB45_8
+; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r7, r3
+; SOFT-NEXT:    bmi .LBB45_8
 ; SOFT-NEXT:  @ %bb.7: @ %entry
-; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB45_8: @ %entry
-; SOFT-NEXT:    cmp r4, r5
-; SOFT-NEXT:    mov r4, r6
-; SOFT-NEXT:    bne .LBB45_26
+; SOFT-NEXT:    movs r1, #1
+; SOFT-NEXT:    lsls r1, r1, #31
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    bge .LBB45_10
 ; SOFT-NEXT:  @ %bb.9: @ %entry
-; SOFT-NEXT:    cmp r3, #0
-; SOFT-NEXT:    bpl .LBB45_27
+; SOFT-NEXT:    mov r6, r1
 ; SOFT-NEXT:  .LBB45_10: @ %entry
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB45_12
-; SOFT-NEXT:  .LBB45_11: @ %entry
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r5, r1
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bhi .LBB45_12
+; SOFT-NEXT:  @ %bb.11: @ %entry
+; SOFT-NEXT:    mov r0, r1
 ; SOFT-NEXT:  .LBB45_12: @ %entry
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r5, r0, #31
-; SOFT-NEXT:    cmp r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    bhi .LBB45_14
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    asrs r0, r3, #31
+; SOFT-NEXT:    ands r0, r2
+; SOFT-NEXT:    ands r0, r7
+; SOFT-NEXT:    adds r0, r0, #1
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    beq .LBB45_14
 ; SOFT-NEXT:  @ %bb.13: @ %entry
-; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    str r6, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB45_14: @ %entry
-; SOFT-NEXT:    cmp r1, r5
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    beq .LBB45_16
+; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    mvns r0, r0
+; SOFT-NEXT:    ldr r2, .LCPI45_0
+; SOFT-NEXT:    cmp r4, r2
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    blo .LBB45_16
 ; SOFT-NEXT:  @ %bb.15: @ %entry
-; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:  .LBB45_16: @ %entry
-; SOFT-NEXT:    cmp r3, #0
-; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:    bpl .LBB45_28
+; SOFT-NEXT:    cmp r4, r2
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    beq .LBB45_18
 ; SOFT-NEXT:  @ %bb.17: @ %entry
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    blt .LBB45_29
+; SOFT-NEXT:    mov r4, r6
 ; SOFT-NEXT:  .LBB45_18: @ %entry
 ; SOFT-NEXT:    cmp r3, #0
-; SOFT-NEXT:    beq .LBB45_20
-; SOFT-NEXT:  .LBB45_19:
-; SOFT-NEXT:    asrs r3, r3, #31
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    ands r3, r2
+; SOFT-NEXT:    bmi .LBB45_20
+; SOFT-NEXT:  @ %bb.19: @ %entry
+; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB45_20: @ %entry
-; SOFT-NEXT:    ands r3, r7
-; SOFT-NEXT:    adds r2, r3, #1
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB45_22
 ; SOFT-NEXT:  @ %bb.21: @ %entry
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB45_22: @ %entry
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    mov r3, r1
-; SOFT-NEXT:    blt .LBB45_30
+; SOFT-NEXT:    cmp r5, r1
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bhi .LBB45_24
 ; SOFT-NEXT:  @ %bb.23: @ %entry
-; SOFT-NEXT:    cmp r1, r5
-; SOFT-NEXT:    bls .LBB45_31
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB45_24: @ %entry
-; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    bne .LBB45_32
-; SOFT-NEXT:  .LBB45_25: @ %entry
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r5, r1
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    beq .LBB45_26
+; SOFT-NEXT:  @ %bb.25: @ %entry
+; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:  .LBB45_26: @ %entry
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r3, #0
-; SOFT-NEXT:    bmi .LBB45_10
-; SOFT-NEXT:  .LBB45_27: @ %entry
-; SOFT-NEXT:    mov r6, r2
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB45_11
-; SOFT-NEXT:    b .LBB45_12
+; SOFT-NEXT:    bge .LBB45_28
+; SOFT-NEXT:  @ %bb.27: @ %entry
+; SOFT-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB45_28: @ %entry
-; SOFT-NEXT:    mov r7, r6
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bge .LBB45_18
-; SOFT-NEXT:  .LBB45_29: @ %entry
-; SOFT-NEXT:    mov r4, r6
-; SOFT-NEXT:    cmp r3, #0
-; SOFT-NEXT:    bne .LBB45_19
-; SOFT-NEXT:    b .LBB45_20
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    beq .LBB45_30
+; SOFT-NEXT:  @ %bb.29: @ %entry
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB45_30: @ %entry
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    cmp r1, r5
-; SOFT-NEXT:    bhi .LBB45_24
-; SOFT-NEXT:  .LBB45_31: @ %entry
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    beq .LBB45_25
-; SOFT-NEXT:  .LBB45_32: @ %entry
-; SOFT-NEXT:    mov r1, r3
-; SOFT-NEXT:    add sp, #12
+; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.33:
+; SOFT-NEXT:  @ %bb.31:
 ; SOFT-NEXT:  .LCPI45_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -3244,46 +3231,43 @@ define i64 @stest_f64i64_mm(double %x) {
 ; VFP2-NEXT:    mov.w r5, #0
 ; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r4, r12
-; VFP2-NEXT:    orrs.w r9, r2, r3
+; VFP2-NEXT:    orrs.w r7, r2, r3
 ; VFP2-NEXT:    it eq
 ; VFP2-NEXT:    moveq r4, r1
 ; VFP2-NEXT:    cmp r3, #0
 ; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r5, r3
-; VFP2-NEXT:    cmp.w r5, #-1
-; VFP2-NEXT:    mov.w r7, #-2147483648
+; VFP2-NEXT:    and.w r2, r2, r3, asr #31
 ; VFP2-NEXT:    mov.w r1, #-2147483648
+; VFP2-NEXT:    cmp.w r5, #-1
+; VFP2-NEXT:    mov.w r6, #-2147483648
+; VFP2-NEXT:    and.w r2, r2, r5
 ; VFP2-NEXT:    it gt
-; VFP2-NEXT:    movgt r7, r4
+; VFP2-NEXT:    movgt r6, r4
 ; VFP2-NEXT:    cmp.w r4, #-2147483648
-; VFP2-NEXT:    mov r6, r3
 ; VFP2-NEXT:    it hi
 ; VFP2-NEXT:    movhi r1, r4
-; VFP2-NEXT:    cmp r3, #0
-; VFP2-NEXT:    it ne
-; VFP2-NEXT:    andne.w r6, r2, r6, asr #31
-; VFP2-NEXT:    and.w r2, r6, r5
-; VFP2-NEXT:    mov.w r6, #-1
 ; VFP2-NEXT:    adds r2, #1
 ; VFP2-NEXT:    it ne
-; VFP2-NEXT:    movne r1, r7
-; VFP2-NEXT:    mov.w r7, #-1
+; VFP2-NEXT:    movne r1, r6
+; VFP2-NEXT:    mov.w r6, #-1
 ; VFP2-NEXT:    cmp r12, r8
 ; VFP2-NEXT:    it lo
-; VFP2-NEXT:    movlo r7, r0
-; VFP2-NEXT:    mov.w lr, #0
+; VFP2-NEXT:    movlo r6, r0
 ; VFP2-NEXT:    it eq
-; VFP2-NEXT:    moveq r7, r0
+; VFP2-NEXT:    moveq r6, r0
 ; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    mov.w r9, #-1
 ; VFP2-NEXT:    it pl
-; VFP2-NEXT:    movpl r0, r6
-; VFP2-NEXT:    cmp.w r9, #0
+; VFP2-NEXT:    movpl r0, r9
+; VFP2-NEXT:    cmp r7, #0
 ; VFP2-NEXT:    mov.w r3, #0
 ; VFP2-NEXT:    it eq
-; VFP2-NEXT:    moveq r0, r7
+; VFP2-NEXT:    moveq r0, r6
 ; VFP2-NEXT:    cmp.w r4, #-2147483648
 ; VFP2-NEXT:    it hi
 ; VFP2-NEXT:    movhi r3, r0
+; VFP2-NEXT:    mov.w lr, #0
 ; VFP2-NEXT:    it eq
 ; VFP2-NEXT:    moveq r3, r0
 ; VFP2-NEXT:    cmp.w r5, #-1
@@ -3297,10 +3281,8 @@ define i64 @stest_f64i64_mm(double %x) {
 ;
 ; FULL-LABEL: stest_f64i64_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; FULL-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; FULL-NEXT:    .pad #4
-; FULL-NEXT:    sub sp, #4
+; FULL-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; FULL-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; FULL-NEXT:    bl __fixdfti
 ; FULL-NEXT:    mvn r12, #-2147483648
 ; FULL-NEXT:    cmp r1, r12
@@ -3313,17 +3295,14 @@ define i64 @stest_f64i64_mm(double %x) {
 ; FULL-NEXT:    cmp r3, #0
 ; FULL-NEXT:    mov.w r7, #-2147483648
 ; FULL-NEXT:    csel r6, r3, lr, mi
-; FULL-NEXT:    mov r5, r3
+; FULL-NEXT:    and.w r2, r2, r3, asr #31
 ; FULL-NEXT:    cmp.w r6, #-1
-; FULL-NEXT:    csel r9, r4, r7, gt
+; FULL-NEXT:    and.w r2, r2, r6
+; FULL-NEXT:    csel r5, r4, r7, gt
 ; FULL-NEXT:    cmp.w r4, #-2147483648
 ; FULL-NEXT:    csel r7, r4, r7, hi
-; FULL-NEXT:    cmp r3, #0
-; FULL-NEXT:    it ne
-; FULL-NEXT:    andne.w r5, r2, r5, asr #31
-; FULL-NEXT:    and.w r2, r5, r6
-; FULL-NEXT:    adds r5, r2, #1
-; FULL-NEXT:    csel r2, r7, r9, eq
+; FULL-NEXT:    adds r2, #1
+; FULL-NEXT:    csel r5, r7, r5, eq
 ; FULL-NEXT:    mov.w r7, #-1
 ; FULL-NEXT:    cmp r1, r12
 ; FULL-NEXT:    csel r1, r0, r7, lo
@@ -3337,11 +3316,10 @@ define i64 @stest_f64i64_mm(double %x) {
 ; FULL-NEXT:    csel r1, r0, r1, eq
 ; FULL-NEXT:    cmp.w r6, #-1
 ; FULL-NEXT:    csel r0, r0, lr, gt
-; FULL-NEXT:    cmp r5, #0
+; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r1, r0, eq
-; FULL-NEXT:    mov r1, r2
-; FULL-NEXT:    add sp, #4
-; FULL-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; FULL-NEXT:    mov r1, r5
+; FULL-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi double %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3693,7 +3671,6 @@ define i64 @stest_f32i64_mm(float %x) {
 ; SOFT-NEXT:    bl __fixsfti
 ; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    ldr r0, .LCPI48_0
 ; SOFT-NEXT:    cmp r1, r0
 ; SOFT-NEXT:    mov r5, r1
@@ -3701,114 +3678,105 @@ define i64 @stest_f32i64_mm(float %x) {
 ; SOFT-NEXT:  @ %bb.1: @ %entry
 ; SOFT-NEXT:    ldr r5, .LCPI48_0
 ; SOFT-NEXT:  .LBB48_2: @ %entry
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bmi .LBB48_4
 ; SOFT-NEXT:  @ %bb.3: @ %entry
 ; SOFT-NEXT:    ldr r1, .LCPI48_0
 ; SOFT-NEXT:  .LBB48_4: @ %entry
-; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    orrs r0, r7
+; SOFT-NEXT:    orrs r0, r3
 ; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    beq .LBB48_6
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:  .LBB48_6: @ %entry
-; SOFT-NEXT:    movs r1, #0
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    movs r0, #0
+; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    bmi .LBB48_8
 ; SOFT-NEXT:  @ %bb.7: @ %entry
-; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB48_8: @ %entry
 ; SOFT-NEXT:    movs r1, #1
 ; SOFT-NEXT:    lsls r1, r1, #31
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    mov r6, r5
 ; SOFT-NEXT:    bge .LBB48_10
 ; SOFT-NEXT:  @ %bb.9: @ %entry
 ; SOFT-NEXT:    mov r6, r1
 ; SOFT-NEXT:  .LBB48_10: @ %entry
 ; SOFT-NEXT:    cmp r5, r1
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bhi .LBB48_12
 ; SOFT-NEXT:  @ %bb.11: @ %entry
-; SOFT-NEXT:    mov r3, r1
+; SOFT-NEXT:    mov r0, r1
 ; SOFT-NEXT:  .LBB48_12: @ %entry
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB48_14
-; SOFT-NEXT:  @ %bb.13: @ %entry
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    b .LBB48_15
-; SOFT-NEXT:  .LBB48_14:
-; SOFT-NEXT:    asrs r3, r7, #31
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    ands r3, r0
-; SOFT-NEXT:  .LBB48_15: @ %entry
-; SOFT-NEXT:    ands r3, r2
-; SOFT-NEXT:    adds r0, r3, #1
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    asrs r0, r3, #31
+; SOFT-NEXT:    ands r0, r2
+; SOFT-NEXT:    ands r0, r7
+; SOFT-NEXT:    adds r0, r0, #1
 ; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB48_17
-; SOFT-NEXT:  @ %bb.16: @ %entry
+; SOFT-NEXT:    beq .LBB48_14
+; SOFT-NEXT:  @ %bb.13: @ %entry
 ; SOFT-NEXT:    str r6, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:  .LBB48_17: @ %entry
-; SOFT-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    mvns r6, r3
-; SOFT-NEXT:    ldr r0, .LCPI48_0
-; SOFT-NEXT:    cmp r4, r0
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    blo .LBB48_19
-; SOFT-NEXT:  @ %bb.18: @ %entry
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:  .LBB48_19: @ %entry
-; SOFT-NEXT:    cmp r4, r0
+; SOFT-NEXT:  .LBB48_14: @ %entry
+; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    mvns r0, r0
+; SOFT-NEXT:    ldr r2, .LCPI48_0
+; SOFT-NEXT:    cmp r4, r2
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    blo .LBB48_16
+; SOFT-NEXT:  @ %bb.15: @ %entry
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:  .LBB48_16: @ %entry
+; SOFT-NEXT:    cmp r4, r2
 ; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB48_21
-; SOFT-NEXT:  @ %bb.20: @ %entry
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:  .LBB48_21: @ %entry
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bmi .LBB48_23
-; SOFT-NEXT:  @ %bb.22: @ %entry
-; SOFT-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:  .LBB48_23: @ %entry
+; SOFT-NEXT:    beq .LBB48_18
+; SOFT-NEXT:  @ %bb.17: @ %entry
+; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:  .LBB48_18: @ %entry
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bmi .LBB48_20
+; SOFT-NEXT:  @ %bb.19: @ %entry
+; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:  .LBB48_20: @ %entry
 ; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB48_25
-; SOFT-NEXT:  @ %bb.24: @ %entry
+; SOFT-NEXT:    beq .LBB48_22
+; SOFT-NEXT:  @ %bb.21: @ %entry
 ; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:  .LBB48_25: @ %entry
+; SOFT-NEXT:  .LBB48_22: @ %entry
 ; SOFT-NEXT:    cmp r5, r1
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bhi .LBB48_27
-; SOFT-NEXT:  @ %bb.26: @ %entry
-; SOFT-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB48_27: @ %entry
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bhi .LBB48_24
+; SOFT-NEXT:  @ %bb.23: @ %entry
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:  .LBB48_24: @ %entry
 ; SOFT-NEXT:    cmp r5, r1
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    beq .LBB48_29
-; SOFT-NEXT:  @ %bb.28: @ %entry
-; SOFT-NEXT:    mov r0, r3
-; SOFT-NEXT:  .LBB48_29: @ %entry
-; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    bge .LBB48_31
-; SOFT-NEXT:  @ %bb.30: @ %entry
+; SOFT-NEXT:    beq .LBB48_26
+; SOFT-NEXT:  @ %bb.25: @ %entry
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:  .LBB48_26: @ %entry
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bge .LBB48_28
+; SOFT-NEXT:  @ %bb.27: @ %entry
 ; SOFT-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB48_31: @ %entry
+; SOFT-NEXT:  .LBB48_28: @ %entry
 ; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB48_33
-; SOFT-NEXT:  @ %bb.32: @ %entry
+; SOFT-NEXT:    beq .LBB48_30
+; SOFT-NEXT:  @ %bb.29: @ %entry
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:  .LBB48_33: @ %entry
+; SOFT-NEXT:  .LBB48_30: @ %entry
+; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.34:
+; SOFT-NEXT:  @ %bb.31:
 ; SOFT-NEXT:  .LCPI48_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -3830,46 +3798,43 @@ define i64 @stest_f32i64_mm(float %x) {
 ; VFP2-NEXT:    mov.w r5, #0
 ; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r4, r12
-; VFP2-NEXT:    orrs.w r9, r2, r3
+; VFP2-NEXT:    orrs.w r7, r2, r3
 ; VFP2-NEXT:    it eq
 ; VFP2-NEXT:    moveq r4, r1
 ; VFP2-NEXT:    cmp r3, #0
 ; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r5, r3
-; VFP2-NEXT:    cmp.w r5, #-1
-; VFP2-NEXT:    mov.w r7, #-2147483648
+; VFP2-NEXT:    and.w r2, r2, r3, asr #31
 ; VFP2-NEXT:    mov.w r1, #-2147483648
+; VFP2-NEXT:    cmp.w r5, #-1
+; VFP2-NEXT:    mov.w r6, #-2147483648
+; VFP2-NEXT:    and.w r2, r2, r5
 ; VFP2-NEXT:    it gt
-; VFP2-NEXT:    movgt r7, r4
+; VFP2-NEXT:    movgt r6, r4
 ; VFP2-NEXT:    cmp.w r4, #-2147483648
-; VFP2-NEXT:    mov r6, r3
 ; VFP2-NEXT:    it hi
 ; VFP2-NEXT:    movhi r1, r4
-; VFP2-NEXT:    cmp r3, #0
-; VFP2-NEXT:    it ne
-; VFP2-NEXT:    andne.w r6, r2, r6, asr #31
-; VFP2-NEXT:    and.w r2, r6, r5
-; VFP2-NEXT:    mov.w r6, #-1
 ; VFP2-NEXT:    adds r2, #1
 ; VFP2-NEXT:    it ne
-; VFP2-NEXT:    movne r1, r7
-; VFP2-NEXT:    mov.w r7, #-1
+; VFP2-NEXT:    movne r1, r6
+; VFP2-NEXT:    mov.w r6, #-1
 ; VFP2-NEXT:    cmp r12, r8
 ; VFP2-NEXT:    it lo
-; VFP2-NEXT:    movlo r7, r0
-; VFP2-NEXT:    mov.w lr, #0
+; VFP2-NEXT:    movlo r6, r0
 ; VFP2-NEXT:    it eq
-; VFP2-NEXT:    moveq r7, r0
+; VFP2-NEXT:    moveq r6, r0
 ; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    mov.w r9, #-1
 ; VFP2-NEXT:    it pl
-; VFP2-NEXT:    movpl r0, r6
-; VFP2-NEXT:    cmp.w r9, #0
+; VFP2-NEXT:    movpl r0, r9
+; VFP2-NEXT:    cmp r7, #0
 ; VFP2-NEXT:    mov.w r3, #0
 ; VFP2-NEXT:    it eq
-; VFP2-NEXT:    moveq r0, r7
+; VFP2-NEXT:    moveq r0, r6
 ; VFP2-NEXT:    cmp.w r4, #-2147483648
 ; VFP2-NEXT:    it hi
 ; VFP2-NEXT:    movhi r3, r0
+; VFP2-NEXT:    mov.w lr, #0
 ; VFP2-NEXT:    it eq
 ; VFP2-NEXT:    moveq r3, r0
 ; VFP2-NEXT:    cmp.w r5, #-1
@@ -3883,10 +3848,8 @@ define i64 @stest_f32i64_mm(float %x) {
 ;
 ; FULL-LABEL: stest_f32i64_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; FULL-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; FULL-NEXT:    .pad #4
-; FULL-NEXT:    sub sp, #4
+; FULL-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; FULL-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; FULL-NEXT:    bl __fixsfti
 ; FULL-NEXT:    mvn r12, #-2147483648
 ; FULL-NEXT:    cmp r1, r12
@@ -3899,17 +3862,14 @@ define i64 @stest_f32i64_mm(float %x) {
 ; FULL-NEXT:    cmp r3, #0
 ; FULL-NEXT:    mov.w r7, #-2147483648
 ; FULL-NEXT:    csel r6, r3, lr, mi
-; FULL-NEXT:    mov r5, r3
+; FULL-NEXT:    and.w r2, r2, r3, asr #31
 ; FULL-NEXT:    cmp.w r6, #-1
-; FULL-NEXT:    csel r9, r4, r7, gt
+; FULL-NEXT:    and.w r2, r2, r6
+; FULL-NEXT:    csel r5, r4, r7, gt
 ; FULL-NEXT:    cmp.w r4, #-2147483648
 ; FULL-NEXT:    csel r7, r4, r7, hi
-; FULL-NEXT:    cmp r3, #0
-; FULL-NEXT:    it ne
-; FULL-NEXT:    andne.w r5, r2, r5, asr #31
-; FULL-NEXT:    and.w r2, r5, r6
-; FULL-NEXT:    adds r5, r2, #1
-; FULL-NEXT:    csel r2, r7, r9, eq
+; FULL-NEXT:    adds r2, #1
+; FULL-NEXT:    csel r5, r7, r5, eq
 ; FULL-NEXT:    mov.w r7, #-1
 ; FULL-NEXT:    cmp r1, r12
 ; FULL-NEXT:    csel r1, r0, r7, lo
@@ -3923,11 +3883,10 @@ define i64 @stest_f32i64_mm(float %x) {
 ; FULL-NEXT:    csel r1, r0, r1, eq
 ; FULL-NEXT:    cmp.w r6, #-1
 ; FULL-NEXT:    csel r0, r0, lr, gt
-; FULL-NEXT:    cmp r5, #0
+; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r1, r0, eq
-; FULL-NEXT:    mov r1, r2
-; FULL-NEXT:    add sp, #4
-; FULL-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; FULL-NEXT:    mov r1, r5
+; FULL-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi float %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -4275,7 +4234,6 @@ define i64 @stest_f16i64_mm(half %x) {
 ; SOFT-NEXT:    bl __fixsfti
 ; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    ldr r0, .LCPI51_0
 ; SOFT-NEXT:    cmp r1, r0
 ; SOFT-NEXT:    mov r5, r1
@@ -4283,114 +4241,105 @@ define i64 @stest_f16i64_mm(half %x) {
 ; SOFT-NEXT:  @ %bb.1: @ %entry
 ; SOFT-NEXT:    ldr r5, .LCPI51_0
 ; SOFT-NEXT:  .LBB51_2: @ %entry
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bmi .LBB51_4
 ; SOFT-NEXT:  @ %bb.3: @ %entry
 ; SOFT-NEXT:    ldr r1, .LCPI51_0
 ; SOFT-NEXT:  .LBB51_4: @ %entry
-; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    orrs r0, r7
+; SOFT-NEXT:    orrs r0, r3
 ; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    beq .LBB51_6
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:  .LBB51_6: @ %entry
-; SOFT-NEXT:    movs r1, #0
-; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    movs r0, #0
+; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    bmi .LBB51_8
 ; SOFT-NEXT:  @ %bb.7: @ %entry
-; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB51_8: @ %entry
 ; SOFT-NEXT:    movs r1, #1
 ; SOFT-NEXT:    lsls r1, r1, #31
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    mov r6, r5
 ; SOFT-NEXT:    bge .LBB51_10
 ; SOFT-NEXT:  @ %bb.9: @ %entry
 ; SOFT-NEXT:    mov r6, r1
 ; SOFT-NEXT:  .LBB51_10: @ %entry
 ; SOFT-NEXT:    cmp r5, r1
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bhi .LBB51_12
 ; SOFT-NEXT:  @ %bb.11: @ %entry
-; SOFT-NEXT:    mov r3, r1
+; SOFT-NEXT:    mov r0, r1
 ; SOFT-NEXT:  .LBB51_12: @ %entry
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB51_14
-; SOFT-NEXT:  @ %bb.13: @ %entry
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    b .LBB51_15
-; SOFT-NEXT:  .LBB51_14:
-; SOFT-NEXT:    asrs r3, r7, #31
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    ands r3, r0
-; SOFT-NEXT:  .LBB51_15: @ %entry
-; SOFT-NEXT:    ands r3, r2
-; SOFT-NEXT:    adds r0, r3, #1
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    asrs r0, r3, #31
+; SOFT-NEXT:    ands r0, r2
+; SOFT-NEXT:    ands r0, r7
+; SOFT-NEXT:    adds r0, r0, #1
 ; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB51_17
-; SOFT-NEXT:  @ %bb.16: @ %entry
+; SOFT-NEXT:    beq .LBB51_14
+; SOFT-NEXT:  @ %bb.13: @ %entry
 ; SOFT-NEXT:    str r6, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:  .LBB51_17: @ %entry
-; SOFT-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:    mvns r6, r3
-; SOFT-NEXT:    ldr r0, .LCPI51_0
-; SOFT-NEXT:    cmp r4, r0
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    blo .LBB51_19
-; SOFT-NEXT:  @ %bb.18: @ %entry
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:  .LBB51_19: @ %entry
-; SOFT-NEXT:    cmp r4, r0
+; SOFT-NEXT:  .LBB51_14: @ %entry
+; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    mvns r0, r0
+; SOFT-NEXT:    ldr r2, .LCPI51_0
+; SOFT-NEXT:    cmp r4, r2
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    blo .LBB51_16
+; SOFT-NEXT:  @ %bb.15: @ %entry
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:  .LBB51_16: @ %entry
+; SOFT-NEXT:    cmp r4, r2
 ; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB51_21
-; SOFT-NEXT:  @ %bb.20: @ %entry
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:  .LBB51_21: @ %entry
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bmi .LBB51_23
-; SOFT-NEXT:  @ %bb.22: @ %entry
-; SOFT-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:  .LBB51_23: @ %entry
+; SOFT-NEXT:    beq .LBB51_18
+; SOFT-NEXT:  @ %bb.17: @ %entry
+; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:  .LBB51_18: @ %entry
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bmi .LBB51_20
+; SOFT-NEXT:  @ %bb.19: @ %entry
+; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:  .LBB51_20: @ %entry
 ; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB51_25
-; SOFT-NEXT:  @ %bb.24: @ %entry
+; SOFT-NEXT:    beq .LBB51_22
+; SOFT-NEXT:  @ %bb.21: @ %entry
 ; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:  .LBB51_25: @ %entry
+; SOFT-NEXT:  .LBB51_22: @ %entry
 ; SOFT-NEXT:    cmp r5, r1
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bhi .LBB51_27
-; SOFT-NEXT:  @ %bb.26: @ %entry
-; SOFT-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB51_27: @ %entry
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bhi .LBB51_24
+; SOFT-NEXT:  @ %bb.23: @ %entry
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:  .LBB51_24: @ %entry
 ; SOFT-NEXT:    cmp r5, r1
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    beq .LBB51_29
-; SOFT-NEXT:  @ %bb.28: @ %entry
-; SOFT-NEXT:    mov r0, r3
-; SOFT-NEXT:  .LBB51_29: @ %entry
-; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    bge .LBB51_31
-; SOFT-NEXT:  @ %bb.30: @ %entry
+; SOFT-NEXT:    beq .LBB51_26
+; SOFT-NEXT:  @ %bb.25: @ %entry
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:  .LBB51_26: @ %entry
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bge .LBB51_28
+; SOFT-NEXT:  @ %bb.27: @ %entry
 ; SOFT-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB51_31: @ %entry
+; SOFT-NEXT:  .LBB51_28: @ %entry
 ; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB51_33
-; SOFT-NEXT:  @ %bb.32: @ %entry
+; SOFT-NEXT:    beq .LBB51_30
+; SOFT-NEXT:  @ %bb.29: @ %entry
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:  .LBB51_33: @ %entry
+; SOFT-NEXT:  .LBB51_30: @ %entry
+; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.34:
+; SOFT-NEXT:  @ %bb.31:
 ; SOFT-NEXT:  .LCPI51_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -4415,46 +4364,43 @@ define i64 @stest_f16i64_mm(half %x) {
 ; VFP2-NEXT:    mov.w r5, #0
 ; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r4, r12
-; VFP2-NEXT:    orrs.w r9, r2, r3
+; VFP2-NEXT:    orrs.w r7, r2, r3
 ; VFP2-NEXT:    it eq
 ; VFP2-NEXT:    moveq r4, r1
 ; VFP2-NEXT:    cmp r3, #0
 ; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r5, r3
-; VFP2-NEXT:    cmp.w r5, #-1
-; VFP2-NEXT:    mov.w r7, #-2147483648
+; VFP2-NEXT:    and.w r2, r2, r3, asr #31
 ; VFP2-NEXT:    mov.w r1, #-2147483648
+; VFP2-NEXT:    cmp.w r5, #-1
+; VFP2-NEXT:    mov.w r6, #-2147483648
+; VFP2-NEXT:    and.w r2, r2, r5
 ; VFP2-NEXT:    it gt
-; VFP2-NEXT:    movgt r7, r4
+; VFP2-NEXT:    movgt r6, r4
 ; VFP2-NEXT:    cmp.w r4, #-2147483648
-; VFP2-NEXT:    mov r6, r3
 ; VFP2-NEXT:    it hi
 ; VFP2-NEXT:    movhi r1, r4
-; VFP2-NEXT:    cmp r3, #0
-; VFP2-NEXT:    it ne
-; VFP2-NEXT:    andne.w r6, r2, r6, asr #31
-; VFP2-NEXT:    and.w r2, r6, r5
-; VFP2-NEXT:    mov.w r6, #-1
 ; VFP2-NEXT:    adds r2, #1
 ; VFP2-NEXT:    it ne
-; VFP2-NEXT:    movne r1, r7
-; VFP2-NEXT:    mov.w r7, #-1
+; VFP2-NEXT:    movne r1, r6
+; VFP2-NEXT:    mov.w r6, #-1
 ; VFP2-NEXT:    cmp r12, r8
 ; VFP2-NEXT:    it lo
-; VFP2-NEXT:    movlo r7, r0
-; VFP2-NEXT:    mov.w lr, #0
+; VFP2-NEXT:    movlo r6, r0
 ; VFP2-NEXT:    it eq
-; VFP2-NEXT:    moveq r7, r0
+; VFP2-NEXT:    moveq r6, r0
 ; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    mov.w r9, #-1
 ; VFP2-NEXT:    it pl
-; VFP2-NEXT:    movpl r0, r6
-; VFP2-NEXT:    cmp.w r9, #0
+; VFP2-NEXT:    movpl r0, r9
+; VFP2-NEXT:    cmp r7, #0
 ; VFP2-NEXT:    mov.w r3, #0
 ; VFP2-NEXT:    it eq
-; VFP2-NEXT:    moveq r0, r7
+; VFP2-NEXT:    moveq r0, r6
 ; VFP2-NEXT:    cmp.w r4, #-2147483648
 ; VFP2-NEXT:    it hi
 ; VFP2-NEXT:    movhi r3, r0
+; VFP2-NEXT:    mov.w lr, #0
 ; VFP2-NEXT:    it eq
 ; VFP2-NEXT:    moveq r3, r0
 ; VFP2-NEXT:    cmp.w r5, #-1
@@ -4468,10 +4414,8 @@ define i64 @stest_f16i64_mm(half %x) {
 ;
 ; FULL-LABEL: stest_f16i64_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; FULL-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; FULL-NEXT:    .pad #4
-; FULL-NEXT:    sub sp, #4
+; FULL-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; FULL-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; FULL-NEXT:    vmov.f16 r0, s0
 ; FULL-NEXT:    vmov s0, r0
 ; FULL-NEXT:    bl __fixhfti
@@ -4486,17 +4430,14 @@ define i64 @stest_f16i64_mm(half %x) {
 ; FULL-NEXT:    cmp r3, #0
 ; FULL-NEXT:    mov.w r7, #-2147483648
 ; FULL-NEXT:    csel r6, r3, lr, mi
-; FULL-NEXT:    mov r5, r3
+; FULL-NEXT:    and.w r2, r2, r3, asr #31
 ; FULL-NEXT:    cmp.w r6, #-1
-; FULL-NEXT:    csel r9, r4, r7, gt
+; FULL-NEXT:    and.w r2, r2, r6
+; FULL-NEXT:    csel r5, r4, r7, gt
 ; FULL-NEXT:    cmp.w r4, #-2147483648
 ; FULL-NEXT:    csel r7, r4, r7, hi
-; FULL-NEXT:    cmp r3, #0
-; FULL-NEXT:    it ne
-; FULL-NEXT:    andne.w r5, r2, r5, asr #31
-; FULL-NEXT:    and.w r2, r5, r6
-; FULL-NEXT:    adds r5, r2, #1
-; FULL-NEXT:    csel r2, r7, r9, eq
+; FULL-NEXT:    adds r2, #1
+; FULL-NEXT:    csel r5, r7, r5, eq
 ; FULL-NEXT:    mov.w r7, #-1
 ; FULL-NEXT:    cmp r1, r12
 ; FULL-NEXT:    csel r1, r0, r7, lo
@@ -4510,11 +4451,10 @@ define i64 @stest_f16i64_mm(half %x) {
 ; FULL-NEXT:    csel r1, r0, r1, eq
 ; FULL-NEXT:    cmp.w r6, #-1
 ; FULL-NEXT:    csel r0, r0, lr, gt
-; FULL-NEXT:    cmp r5, #0
+; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r1, r0, eq
-; FULL-NEXT:    mov r1, r2
-; FULL-NEXT:    add sp, #4
-; FULL-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; FULL-NEXT:    mov r1, r5
+; FULL-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi half %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)

diff  --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index a4d470b72d4ea..db6f33128236a 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -3671,95 +3671,93 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    cmn r1, #-2147483647
+; CHECK-NEXT:    mvn r0, #-2147483648
+; CHECK-NEXT:    mvn r5, #-2147483648
+; CHECK-NEXT:    movlo r0, r1
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    mov r10, #0
-; CHECK-NEXT:    andne r0, r2, r0, asr #31
 ; CHECK-NEXT:    mov r11, r1
-; CHECK-NEXT:    movmi r10, r3
-; CHECK-NEXT:    and r1, r0, r10
-; CHECK-NEXT:    cmn r11, #-2147483647
-; CHECK-NEXT:    mvn r0, #-2147483648
-; CHECK-NEXT:    movlo r0, r11
+; CHECK-NEXT:    movmi r5, r1
+; CHECK-NEXT:    orrs r1, r2, r3
+; CHECK-NEXT:    mov r8, #0
+; CHECK-NEXT:    moveq r5, r0
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mvn r8, #-2147483648
+; CHECK-NEXT:    and r0, r2, r3, asr #31
+; CHECK-NEXT:    movmi r8, r3
+; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    and r1, r0, r8
 ; CHECK-NEXT:    vorr d0, d8, d8
-; CHECK-NEXT:    movmi r8, r11
-; CHECK-NEXT:    orrs r2, r2, r3
-; CHECK-NEXT:    moveq r8, r0
-; CHECK-NEXT:    cmn r10, #1
+; CHECK-NEXT:    cmn r8, #1
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    mov r9, #-2147483648
-; CHECK-NEXT:    movgt r0, r8
-; CHECK-NEXT:    cmp r8, #-2147483648
-; CHECK-NEXT:    movhi r9, r8
+; CHECK-NEXT:    mov r10, #-2147483648
+; CHECK-NEXT:    movgt r0, r5
+; CHECK-NEXT:    cmp r5, #-2147483648
+; CHECK-NEXT:    movhi r10, r5
 ; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    mov r6, r3
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r9, r3
 ; CHECK-NEXT:    mvn r7, #-2147483648
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    movne r9, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    movne r10, r0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    cmn r1, #-2147483647
-; CHECK-NEXT:    mvn r5, #0
-; CHECK-NEXT:    movlo r5, r0
+; CHECK-NEXT:    mvn r6, #0
+; CHECK-NEXT:    movlo r6, r0
 ; CHECK-NEXT:    mvn r4, #0
-; CHECK-NEXT:    moveq r5, r0
+; CHECK-NEXT:    moveq r6, r0
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movpl r0, r4
 ; CHECK-NEXT:    orrs r12, r2, r3
-; CHECK-NEXT:    moveq r0, r5
+; CHECK-NEXT:    moveq r0, r6
 ; CHECK-NEXT:    cmn r1, #-2147483647
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    movlo r5, r1
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    and r2, r2, r3, asr #31
+; CHECK-NEXT:    movlo r6, r1
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movmi r7, r1
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    moveq r7, r5
+; CHECK-NEXT:    moveq r7, r6
 ; CHECK-NEXT:    cmp r7, #-2147483648
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    movhi r1, r0
 ; CHECK-NEXT:    mov r12, #0
-; CHECK-NEXT:    moveq r1, r0
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    movhi r12, r0
 ; CHECK-NEXT:    mvn r6, #0
-; CHECK-NEXT:    movmi r6, r5
+; CHECK-NEXT:    moveq r12, r0
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    movmi r6, r1
 ; CHECK-NEXT:    cmn r11, #-2147483647
-; CHECK-NEXT:    movlo r4, r5
-; CHECK-NEXT:    moveq r4, r5
-; CHECK-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movlo r4, r1
+; CHECK-NEXT:    moveq r4, r1
+; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    movne r4, r6
-; CHECK-NEXT:    cmp r8, #-2147483648
+; CHECK-NEXT:    cmp r5, #-2147483648
 ; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    movhi r6, r4
 ; CHECK-NEXT:    moveq r6, r4
-; CHECK-NEXT:    cmn r10, #1
-; CHECK-NEXT:    movle r4, r12
-; CHECK-NEXT:    cmn r5, #1
+; CHECK-NEXT:    cmn r8, #1
+; CHECK-NEXT:    movle r4, r5
+; CHECK-NEXT:    cmn r1, #1
 ; CHECK-NEXT:    moveq r4, r6
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    mov r6, #0
 ; CHECK-NEXT:    vmov.32 d1[0], r4
 ; CHECK-NEXT:    movmi r6, r3
 ; CHECK-NEXT:    cmn r6, #1
-; CHECK-NEXT:    movle r0, r12
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    andne r3, r2, r3, asr #31
-; CHECK-NEXT:    and r2, r3, r6
+; CHECK-NEXT:    and r2, r2, r6
+; CHECK-NEXT:    movle r0, r5
 ; CHECK-NEXT:    cmn r2, #1
-; CHECK-NEXT:    moveq r0, r1
-; CHECK-NEXT:    cmn r6, #1
 ; CHECK-NEXT:    mov r1, #-2147483648
+; CHECK-NEXT:    moveq r0, r12
+; CHECK-NEXT:    cmn r6, #1
 ; CHECK-NEXT:    vmov.32 d0[0], r0
 ; CHECK-NEXT:    movgt r1, r7
 ; CHECK-NEXT:    cmp r7, #-2147483648
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    vmov.32 d1[1], r9
 ; CHECK-NEXT:    movls r7, r0
 ; CHECK-NEXT:    cmn r2, #1
+; CHECK-NEXT:    vmov.32 d1[1], r10
 ; CHECK-NEXT:    movne r7, r1
 ; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    add sp, sp, #16
@@ -3947,95 +3945,93 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    mov r10, #0
 ; CHECK-NEXT:    vmov.f32 s0, s16
-; CHECK-NEXT:    andne r0, r2, r0, asr #31
-; CHECK-NEXT:    mov r11, r1
-; CHECK-NEXT:    movmi r10, r3
-; CHECK-NEXT:    and r1, r0, r10
-; CHECK-NEXT:    cmn r11, #-2147483647
+; CHECK-NEXT:    cmn r1, #-2147483647
 ; CHECK-NEXT:    mvn r0, #-2147483648
-; CHECK-NEXT:    mvn r8, #-2147483648
-; CHECK-NEXT:    movlo r0, r11
+; CHECK-NEXT:    movlo r0, r1
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    movmi r8, r11
-; CHECK-NEXT:    orrs r2, r2, r3
-; CHECK-NEXT:    moveq r8, r0
-; CHECK-NEXT:    cmn r10, #1
+; CHECK-NEXT:    mvn r5, #-2147483648
+; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    movmi r5, r1
+; CHECK-NEXT:    orrs r1, r2, r3
+; CHECK-NEXT:    moveq r5, r0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov r8, #0
+; CHECK-NEXT:    and r0, r2, r3, asr #31
+; CHECK-NEXT:    movmi r8, r3
+; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    and r1, r0, r8
+; CHECK-NEXT:    cmn r8, #1
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    mov r9, #-2147483648
-; CHECK-NEXT:    movgt r0, r8
-; CHECK-NEXT:    cmp r8, #-2147483648
-; CHECK-NEXT:    movhi r9, r8
+; CHECK-NEXT:    mov r10, #-2147483648
+; CHECK-NEXT:    movgt r0, r5
+; CHECK-NEXT:    cmp r5, #-2147483648
+; CHECK-NEXT:    movhi r10, r5
 ; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    mov r6, r3
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r9, r3
 ; CHECK-NEXT:    mvn r7, #-2147483648
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    movne r9, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    movne r10, r0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    cmn r1, #-2147483647
-; CHECK-NEXT:    mvn r5, #0
-; CHECK-NEXT:    movlo r5, r0
+; CHECK-NEXT:    mvn r6, #0
+; CHECK-NEXT:    movlo r6, r0
 ; CHECK-NEXT:    mvn r4, #0
-; CHECK-NEXT:    moveq r5, r0
+; CHECK-NEXT:    moveq r6, r0
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movpl r0, r4
 ; CHECK-NEXT:    orrs r12, r2, r3
-; CHECK-NEXT:    moveq r0, r5
+; CHECK-NEXT:    moveq r0, r6
 ; CHECK-NEXT:    cmn r1, #-2147483647
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    movlo r5, r1
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    and r2, r2, r3, asr #31
+; CHECK-NEXT:    movlo r6, r1
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movmi r7, r1
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    moveq r7, r5
+; CHECK-NEXT:    moveq r7, r6
 ; CHECK-NEXT:    cmp r7, #-2147483648
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    movhi r1, r0
 ; CHECK-NEXT:    mov r12, #0
-; CHECK-NEXT:    moveq r1, r0
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    movhi r12, r0
 ; CHECK-NEXT:    mvn r6, #0
-; CHECK-NEXT:    movmi r6, r5
+; CHECK-NEXT:    moveq r12, r0
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    movmi r6, r1
 ; CHECK-NEXT:    cmn r11, #-2147483647
-; CHECK-NEXT:    movlo r4, r5
-; CHECK-NEXT:    moveq r4, r5
-; CHECK-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movlo r4, r1
+; CHECK-NEXT:    moveq r4, r1
+; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    movne r4, r6
-; CHECK-NEXT:    cmp r8, #-2147483648
+; CHECK-NEXT:    cmp r5, #-2147483648
 ; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    movhi r6, r4
 ; CHECK-NEXT:    moveq r6, r4
-; CHECK-NEXT:    cmn r10, #1
-; CHECK-NEXT:    movle r4, r12
-; CHECK-NEXT:    cmn r5, #1
+; CHECK-NEXT:    cmn r8, #1
+; CHECK-NEXT:    movle r4, r5
+; CHECK-NEXT:    cmn r1, #1
 ; CHECK-NEXT:    moveq r4, r6
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    mov r6, #0
 ; CHECK-NEXT:    vmov.32 d1[0], r4
 ; CHECK-NEXT:    movmi r6, r3
 ; CHECK-NEXT:    cmn r6, #1
-; CHECK-NEXT:    movle r0, r12
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    andne r3, r2, r3, asr #31
-; CHECK-NEXT:    and r2, r3, r6
+; CHECK-NEXT:    and r2, r2, r6
+; CHECK-NEXT:    movle r0, r5
 ; CHECK-NEXT:    cmn r2, #1
-; CHECK-NEXT:    moveq r0, r1
-; CHECK-NEXT:    cmn r6, #1
 ; CHECK-NEXT:    mov r1, #-2147483648
+; CHECK-NEXT:    moveq r0, r12
+; CHECK-NEXT:    cmn r6, #1
 ; CHECK-NEXT:    vmov.32 d0[0], r0
 ; CHECK-NEXT:    movgt r1, r7
 ; CHECK-NEXT:    cmp r7, #-2147483648
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    vmov.32 d1[1], r9
 ; CHECK-NEXT:    movls r7, r0
 ; CHECK-NEXT:    cmn r2, #1
+; CHECK-NEXT:    vmov.32 d1[1], r10
 ; CHECK-NEXT:    movne r7, r1
 ; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    add sp, sp, #16
@@ -4224,100 +4220,100 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEON-NEXT:    cmp r3, #0
-; CHECK-NEON-NEXT:    mov r0, r3
-; CHECK-NEON-NEXT:    mov r10, #0
-; CHECK-NEON-NEXT:    andne r0, r2, r0, asr #31
-; CHECK-NEON-NEXT:    mov r11, r1
-; CHECK-NEON-NEXT:    movmi r10, r3
-; CHECK-NEON-NEXT:    and r1, r0, r10
-; CHECK-NEON-NEXT:    cmn r11, #-2147483647
+; CHECK-NEON-NEXT:    mov r5, r0
+; CHECK-NEON-NEXT:    cmn r1, #-2147483647
 ; CHECK-NEON-NEXT:    mvn r0, #-2147483648
-; CHECK-NEON-NEXT:    movlo r0, r11
+; CHECK-NEON-NEXT:    mvn r11, #-2147483648
+; CHECK-NEON-NEXT:    movlo r0, r1
 ; CHECK-NEON-NEXT:    cmp r3, #0
-; CHECK-NEON-NEXT:    mvn r8, #-2147483648
-; CHECK-NEON-NEXT:    mov r9, #-2147483648
-; CHECK-NEON-NEXT:    movmi r8, r11
-; CHECK-NEON-NEXT:    orrs r2, r2, r3
-; CHECK-NEON-NEXT:    moveq r8, r0
-; CHECK-NEON-NEXT:    cmn r10, #1
+; CHECK-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEON-NEXT:    movmi r11, r1
+; CHECK-NEON-NEXT:    orrs r1, r2, r3
+; CHECK-NEON-NEXT:    mov r8, #0
+; CHECK-NEON-NEXT:    moveq r11, r0
+; CHECK-NEON-NEXT:    cmp r3, #0
+; CHECK-NEON-NEXT:    and r0, r2, r3, asr #31
+; CHECK-NEON-NEXT:    movmi r8, r3
+; CHECK-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEON-NEXT:    and r1, r0, r8
+; CHECK-NEON-NEXT:    cmn r8, #1
 ; CHECK-NEON-NEXT:    mov r0, #-2147483648
-; CHECK-NEON-NEXT:    mov r6, r3
-; CHECK-NEON-NEXT:    movgt r0, r8
-; CHECK-NEON-NEXT:    cmp r8, #-2147483648
-; CHECK-NEON-NEXT:    movhi r9, r8
+; CHECK-NEON-NEXT:    movgt r0, r11
+; CHECK-NEON-NEXT:    cmp r11, #-2147483648
+; CHECK-NEON-NEXT:    mov r2, #-2147483648
+; CHECK-NEON-NEXT:    mov r9, r3
+; CHECK-NEON-NEXT:    movhi r2, r11
 ; CHECK-NEON-NEXT:    cmn r1, #1
-; CHECK-NEON-NEXT:    movne r9, r0
+; CHECK-NEON-NEXT:    movne r2, r0
 ; CHECK-NEON-NEXT:    vmov r0, s16
-; CHECK-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEON-NEXT:    mvn r7, #-2147483648
+; CHECK-NEON-NEXT:    mvn r10, #-2147483648
+; CHECK-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
 ; CHECK-NEON-NEXT:    str r2, [sp, #4] @ 4-byte Spill
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixsfti
 ; CHECK-NEON-NEXT:    cmn r1, #-2147483647
-; CHECK-NEON-NEXT:    mvn r5, #0
-; CHECK-NEON-NEXT:    movlo r5, r0
+; CHECK-NEON-NEXT:    mvn r6, #0
+; CHECK-NEON-NEXT:    movlo r6, r0
 ; CHECK-NEON-NEXT:    mvn r4, #0
-; CHECK-NEON-NEXT:    moveq r5, r0
+; CHECK-NEON-NEXT:    moveq r6, r0
 ; CHECK-NEON-NEXT:    cmp r3, #0
 ; CHECK-NEON-NEXT:    movpl r0, r4
 ; CHECK-NEON-NEXT:    orrs r12, r2, r3
-; CHECK-NEON-NEXT:    moveq r0, r5
+; CHECK-NEON-NEXT:    moveq r0, r6
 ; CHECK-NEON-NEXT:    cmn r1, #-2147483647
-; CHECK-NEON-NEXT:    mvn r5, #-2147483648
-; CHECK-NEON-NEXT:    movlo r5, r1
+; CHECK-NEON-NEXT:    mvn r6, #-2147483648
+; CHECK-NEON-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEON-NEXT:    movlo r6, r1
 ; CHECK-NEON-NEXT:    cmp r3, #0
-; CHECK-NEON-NEXT:    movmi r7, r1
+; CHECK-NEON-NEXT:    movmi r10, r1
 ; CHECK-NEON-NEXT:    cmp r12, #0
-; CHECK-NEON-NEXT:    moveq r7, r5
-; CHECK-NEON-NEXT:    cmp r7, #-2147483648
+; CHECK-NEON-NEXT:    moveq r10, r6
+; CHECK-NEON-NEXT:    cmp r10, #-2147483648
 ; CHECK-NEON-NEXT:    mov r1, #0
-; CHECK-NEON-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEON-NEXT:    mvn r6, #0
 ; CHECK-NEON-NEXT:    movhi r1, r0
-; CHECK-NEON-NEXT:    mov r12, #0
+; CHECK-NEON-NEXT:    and r2, r2, r3, asr #31
 ; CHECK-NEON-NEXT:    moveq r1, r0
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    mvn r6, #0
+; CHECK-NEON-NEXT:    cmp r9, #0
 ; CHECK-NEON-NEXT:    movmi r6, r5
-; CHECK-NEON-NEXT:    cmn r11, #-2147483647
+; CHECK-NEON-NEXT:    cmn r7, #-2147483647
 ; CHECK-NEON-NEXT:    movlo r4, r5
+; CHECK-NEON-NEXT:    ldr r7, [sp] @ 4-byte Reload
 ; CHECK-NEON-NEXT:    moveq r4, r5
-; CHECK-NEON-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEON-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
 ; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEON-NEXT:    mov r5, #0
 ; CHECK-NEON-NEXT:    movne r4, r6
-; CHECK-NEON-NEXT:    cmp r8, #-2147483648
+; CHECK-NEON-NEXT:    cmp r11, #-2147483648
 ; CHECK-NEON-NEXT:    mov r6, #0
 ; CHECK-NEON-NEXT:    movhi r6, r4
 ; CHECK-NEON-NEXT:    moveq r6, r4
-; CHECK-NEON-NEXT:    cmn r10, #1
-; CHECK-NEON-NEXT:    movle r4, r12
-; CHECK-NEON-NEXT:    cmn r5, #1
+; CHECK-NEON-NEXT:    cmn r8, #1
+; CHECK-NEON-NEXT:    movle r4, r5
+; CHECK-NEON-NEXT:    cmn r7, #1
 ; CHECK-NEON-NEXT:    moveq r4, r6
 ; CHECK-NEON-NEXT:    cmp r3, #0
 ; CHECK-NEON-NEXT:    mov r6, #0
 ; CHECK-NEON-NEXT:    vmov.32 d1[0], r4
 ; CHECK-NEON-NEXT:    movmi r6, r3
 ; CHECK-NEON-NEXT:    cmn r6, #1
-; CHECK-NEON-NEXT:    movle r0, r12
-; CHECK-NEON-NEXT:    cmp r3, #0
-; CHECK-NEON-NEXT:    andne r3, r2, r3, asr #31
-; CHECK-NEON-NEXT:    and r2, r3, r6
+; CHECK-NEON-NEXT:    and r2, r2, r6
+; CHECK-NEON-NEXT:    movle r0, r5
 ; CHECK-NEON-NEXT:    cmn r2, #1
 ; CHECK-NEON-NEXT:    moveq r0, r1
 ; CHECK-NEON-NEXT:    cmn r6, #1
 ; CHECK-NEON-NEXT:    mov r1, #-2147483648
 ; CHECK-NEON-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEON-NEXT:    movgt r1, r7
-; CHECK-NEON-NEXT:    cmp r7, #-2147483648
+; CHECK-NEON-NEXT:    movgt r1, r10
+; CHECK-NEON-NEXT:    cmp r10, #-2147483648
 ; CHECK-NEON-NEXT:    mov r0, #-2147483648
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r9
-; CHECK-NEON-NEXT:    movls r7, r0
+; CHECK-NEON-NEXT:    movls r10, r0
+; CHECK-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEON-NEXT:    cmn r2, #1
-; CHECK-NEON-NEXT:    movne r7, r1
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
+; CHECK-NEON-NEXT:    movne r10, r1
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r0
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r10
 ; CHECK-NEON-NEXT:    add sp, sp, #16
 ; CHECK-NEON-NEXT:    vpop {d8}
 ; CHECK-NEON-NEXT:    add sp, sp, #4
@@ -4338,96 +4334,94 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfti
 ; CHECK-FP16-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-FP16-NEXT:    cmn r1, #-2147483647
+; CHECK-FP16-NEXT:    mvn r0, #-2147483648
+; CHECK-FP16-NEXT:    mvn r5, #-2147483648
+; CHECK-FP16-NEXT:    movlo r0, r1
 ; CHECK-FP16-NEXT:    cmp r3, #0
-; CHECK-FP16-NEXT:    mov r0, r3
-; CHECK-FP16-NEXT:    mov r10, #0
-; CHECK-FP16-NEXT:    andne r0, r2, r0, asr #31
 ; CHECK-FP16-NEXT:    mov r11, r1
-; CHECK-FP16-NEXT:    movmi r10, r3
-; CHECK-FP16-NEXT:    and r1, r0, r10
-; CHECK-FP16-NEXT:    cmn r11, #-2147483647
-; CHECK-FP16-NEXT:    mvn r0, #-2147483648
-; CHECK-FP16-NEXT:    movlo r0, r11
+; CHECK-FP16-NEXT:    movmi r5, r1
+; CHECK-FP16-NEXT:    orrs r1, r2, r3
+; CHECK-FP16-NEXT:    mov r8, #0
+; CHECK-FP16-NEXT:    moveq r5, r0
 ; CHECK-FP16-NEXT:    cmp r3, #0
-; CHECK-FP16-NEXT:    mvn r8, #-2147483648
-; CHECK-FP16-NEXT:    mov r9, #-2147483648
-; CHECK-FP16-NEXT:    movmi r8, r11
-; CHECK-FP16-NEXT:    orrs r2, r2, r3
-; CHECK-FP16-NEXT:    moveq r8, r0
-; CHECK-FP16-NEXT:    cmn r10, #1
+; CHECK-FP16-NEXT:    and r0, r2, r3, asr #31
+; CHECK-FP16-NEXT:    movmi r8, r3
+; CHECK-FP16-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-FP16-NEXT:    and r1, r0, r8
+; CHECK-FP16-NEXT:    cmn r8, #1
 ; CHECK-FP16-NEXT:    mov r0, #-2147483648
-; CHECK-FP16-NEXT:    mov r6, r3
-; CHECK-FP16-NEXT:    movgt r0, r8
-; CHECK-FP16-NEXT:    cmp r8, #-2147483648
-; CHECK-FP16-NEXT:    movhi r9, r8
+; CHECK-FP16-NEXT:    movgt r0, r5
+; CHECK-FP16-NEXT:    cmp r5, #-2147483648
+; CHECK-FP16-NEXT:    mov r10, #-2147483648
+; CHECK-FP16-NEXT:    mov r9, r3
+; CHECK-FP16-NEXT:    movhi r10, r5
 ; CHECK-FP16-NEXT:    cmn r1, #1
-; CHECK-FP16-NEXT:    movne r9, r0
+; CHECK-FP16-NEXT:    movne r10, r0
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d8[0]
-; CHECK-FP16-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; CHECK-FP16-NEXT:    mvn r7, #-2147483648
-; CHECK-FP16-NEXT:    str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FP16-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfti
 ; CHECK-FP16-NEXT:    cmn r1, #-2147483647
-; CHECK-FP16-NEXT:    mvn r5, #0
-; CHECK-FP16-NEXT:    movlo r5, r0
+; CHECK-FP16-NEXT:    mvn r6, #0
+; CHECK-FP16-NEXT:    movlo r6, r0
 ; CHECK-FP16-NEXT:    mvn r4, #0
-; CHECK-FP16-NEXT:    moveq r5, r0
+; CHECK-FP16-NEXT:    moveq r6, r0
 ; CHECK-FP16-NEXT:    cmp r3, #0
 ; CHECK-FP16-NEXT:    movpl r0, r4
 ; CHECK-FP16-NEXT:    orrs r12, r2, r3
-; CHECK-FP16-NEXT:    moveq r0, r5
+; CHECK-FP16-NEXT:    moveq r0, r6
 ; CHECK-FP16-NEXT:    cmn r1, #-2147483647
-; CHECK-FP16-NEXT:    mvn r5, #-2147483648
-; CHECK-FP16-NEXT:    movlo r5, r1
+; CHECK-FP16-NEXT:    mvn r6, #-2147483648
+; CHECK-FP16-NEXT:    and r2, r2, r3, asr #31
+; CHECK-FP16-NEXT:    movlo r6, r1
 ; CHECK-FP16-NEXT:    cmp r3, #0
 ; CHECK-FP16-NEXT:    movmi r7, r1
 ; CHECK-FP16-NEXT:    cmp r12, #0
-; CHECK-FP16-NEXT:    moveq r7, r5
+; CHECK-FP16-NEXT:    moveq r7, r6
 ; CHECK-FP16-NEXT:    cmp r7, #-2147483648
-; CHECK-FP16-NEXT:    mov r1, #0
-; CHECK-FP16-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-FP16-NEXT:    movhi r1, r0
 ; CHECK-FP16-NEXT:    mov r12, #0
-; CHECK-FP16-NEXT:    moveq r1, r0
-; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-FP16-NEXT:    movhi r12, r0
 ; CHECK-FP16-NEXT:    mvn r6, #0
-; CHECK-FP16-NEXT:    movmi r6, r5
+; CHECK-FP16-NEXT:    moveq r12, r0
+; CHECK-FP16-NEXT:    cmp r9, #0
+; CHECK-FP16-NEXT:    movmi r6, r1
 ; CHECK-FP16-NEXT:    cmn r11, #-2147483647
-; CHECK-FP16-NEXT:    movlo r4, r5
-; CHECK-FP16-NEXT:    moveq r4, r5
-; CHECK-FP16-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
-; CHECK-FP16-NEXT:    cmp r5, #0
-; CHECK-FP16-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-FP16-NEXT:    movlo r4, r1
+; CHECK-FP16-NEXT:    moveq r4, r1
+; CHECK-FP16-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-FP16-NEXT:    movne r4, r6
-; CHECK-FP16-NEXT:    cmp r8, #-2147483648
+; CHECK-FP16-NEXT:    cmp r5, #-2147483648
 ; CHECK-FP16-NEXT:    mov r6, #0
+; CHECK-FP16-NEXT:    mov r5, #0
 ; CHECK-FP16-NEXT:    movhi r6, r4
 ; CHECK-FP16-NEXT:    moveq r6, r4
-; CHECK-FP16-NEXT:    cmn r10, #1
-; CHECK-FP16-NEXT:    movle r4, r12
-; CHECK-FP16-NEXT:    cmn r5, #1
+; CHECK-FP16-NEXT:    cmn r8, #1
+; CHECK-FP16-NEXT:    movle r4, r5
+; CHECK-FP16-NEXT:    cmn r1, #1
 ; CHECK-FP16-NEXT:    moveq r4, r6
 ; CHECK-FP16-NEXT:    cmp r3, #0
 ; CHECK-FP16-NEXT:    mov r6, #0
 ; CHECK-FP16-NEXT:    vmov.32 d1[0], r4
 ; CHECK-FP16-NEXT:    movmi r6, r3
 ; CHECK-FP16-NEXT:    cmn r6, #1
-; CHECK-FP16-NEXT:    movle r0, r12
-; CHECK-FP16-NEXT:    cmp r3, #0
-; CHECK-FP16-NEXT:    andne r3, r2, r3, asr #31
-; CHECK-FP16-NEXT:    and r2, r3, r6
+; CHECK-FP16-NEXT:    and r2, r2, r6
+; CHECK-FP16-NEXT:    movle r0, r5
 ; CHECK-FP16-NEXT:    cmn r2, #1
-; CHECK-FP16-NEXT:    moveq r0, r1
-; CHECK-FP16-NEXT:    cmn r6, #1
 ; CHECK-FP16-NEXT:    mov r1, #-2147483648
+; CHECK-FP16-NEXT:    moveq r0, r12
+; CHECK-FP16-NEXT:    cmn r6, #1
 ; CHECK-FP16-NEXT:    vmov.32 d0[0], r0
 ; CHECK-FP16-NEXT:    movgt r1, r7
 ; CHECK-FP16-NEXT:    cmp r7, #-2147483648
 ; CHECK-FP16-NEXT:    mov r0, #-2147483648
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r9
 ; CHECK-FP16-NEXT:    movls r7, r0
 ; CHECK-FP16-NEXT:    cmn r2, #1
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r10
 ; CHECK-FP16-NEXT:    movne r7, r1
 ; CHECK-FP16-NEXT:    vmov.32 d0[1], r7
 ; CHECK-FP16-NEXT:    add sp, sp, #16

diff  --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 7eb7e14353329..a7d424190bbcd 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -2972,50 +2972,47 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    mv a1, a0
 ; RV32IF-NEXT:    addi a0, sp, 8
 ; RV32IF-NEXT:    call __fixdfti at plt
-; RV32IF-NEXT:    lw a0, 20(sp)
+; RV32IF-NEXT:    lw a1, 20(sp)
 ; RV32IF-NEXT:    lw t0, 8(sp)
 ; RV32IF-NEXT:    lw a4, 12(sp)
-; RV32IF-NEXT:    lw a1, 16(sp)
+; RV32IF-NEXT:    lw a0, 16(sp)
 ; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    addi a6, a3, -1
+; RV32IF-NEXT:    addi a5, a3, -1
 ; RV32IF-NEXT:    mv a2, t0
-; RV32IF-NEXT:    beq a4, a6, .LBB45_2
+; RV32IF-NEXT:    beq a4, a5, .LBB45_2
 ; RV32IF-NEXT:  # %bb.1: # %entry
-; RV32IF-NEXT:    sltu a2, a4, a6
+; RV32IF-NEXT:    sltu a2, a4, a5
 ; RV32IF-NEXT:    addi a2, a2, -1
 ; RV32IF-NEXT:    or a2, a2, t0
 ; RV32IF-NEXT:  .LBB45_2: # %entry
-; RV32IF-NEXT:    or a7, a1, a0
-; RV32IF-NEXT:    slti a5, a0, 0
+; RV32IF-NEXT:    or a7, a0, a1
+; RV32IF-NEXT:    slti a6, a1, 0
 ; RV32IF-NEXT:    bnez a7, .LBB45_16
 ; RV32IF-NEXT:  # %bb.3: # %entry
 ; RV32IF-NEXT:    mv t0, a4
-; RV32IF-NEXT:    bgez a0, .LBB45_17
+; RV32IF-NEXT:    bgez a1, .LBB45_17
 ; RV32IF-NEXT:  .LBB45_4: # %entry
-; RV32IF-NEXT:    bgeu a4, a6, .LBB45_18
+; RV32IF-NEXT:    bgeu a4, a5, .LBB45_18
 ; RV32IF-NEXT:  .LBB45_5: # %entry
 ; RV32IF-NEXT:    beqz a7, .LBB45_7
 ; RV32IF-NEXT:  .LBB45_6: # %entry
 ; RV32IF-NEXT:    mv a4, t0
 ; RV32IF-NEXT:  .LBB45_7: # %entry
-; RV32IF-NEXT:    srai a6, a0, 31
-; RV32IF-NEXT:    and a1, a6, a1
-; RV32IF-NEXT:    seqz a6, a0
-; RV32IF-NEXT:    neg a5, a5
-; RV32IF-NEXT:    and a5, a5, a0
-; RV32IF-NEXT:    addi a6, a6, -1
-; RV32IF-NEXT:    mv a0, a4
+; RV32IF-NEXT:    neg a5, a6
+; RV32IF-NEXT:    and a5, a5, a1
+; RV32IF-NEXT:    srai a1, a1, 31
+; RV32IF-NEXT:    mv t0, a4
 ; RV32IF-NEXT:    bgez a5, .LBB45_9
 ; RV32IF-NEXT:  # %bb.8: # %entry
-; RV32IF-NEXT:    lui a0, 524288
+; RV32IF-NEXT:    lui t0, 524288
 ; RV32IF-NEXT:  .LBB45_9: # %entry
-; RV32IF-NEXT:    and a6, a6, a1
+; RV32IF-NEXT:    and a0, a1, a0
 ; RV32IF-NEXT:    mv a1, a4
 ; RV32IF-NEXT:    bltu a3, a4, .LBB45_11
 ; RV32IF-NEXT:  # %bb.10: # %entry
 ; RV32IF-NEXT:    lui a1, 524288
 ; RV32IF-NEXT:  .LBB45_11: # %entry
-; RV32IF-NEXT:    and a6, a6, a5
+; RV32IF-NEXT:    and a6, a0, a5
 ; RV32IF-NEXT:    li a7, -1
 ; RV32IF-NEXT:    bne a6, a7, .LBB45_19
 ; RV32IF-NEXT:  # %bb.12: # %entry
@@ -3032,19 +3029,19 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    addi sp, sp, 32
 ; RV32IF-NEXT:    ret
 ; RV32IF-NEXT:  .LBB45_16: # %entry
-; RV32IF-NEXT:    addi a2, a5, -1
+; RV32IF-NEXT:    addi a2, a6, -1
 ; RV32IF-NEXT:    or a2, a2, t0
 ; RV32IF-NEXT:    mv t0, a4
-; RV32IF-NEXT:    bltz a0, .LBB45_4
+; RV32IF-NEXT:    bltz a1, .LBB45_4
 ; RV32IF-NEXT:  .LBB45_17: # %entry
-; RV32IF-NEXT:    mv t0, a6
-; RV32IF-NEXT:    bltu a4, a6, .LBB45_5
+; RV32IF-NEXT:    mv t0, a5
+; RV32IF-NEXT:    bltu a4, a5, .LBB45_5
 ; RV32IF-NEXT:  .LBB45_18: # %entry
-; RV32IF-NEXT:    mv a4, a6
+; RV32IF-NEXT:    mv a4, a5
 ; RV32IF-NEXT:    bnez a7, .LBB45_6
 ; RV32IF-NEXT:    j .LBB45_7
 ; RV32IF-NEXT:  .LBB45_19: # %entry
-; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    mv a1, t0
 ; RV32IF-NEXT:    mv a0, a2
 ; RV32IF-NEXT:    beq a4, a3, .LBB45_13
 ; RV32IF-NEXT:  .LBB45_20: # %entry
@@ -3111,50 +3108,47 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    .cfi_offset ra, -4
 ; RV32IFD-NEXT:    addi a0, sp, 8
 ; RV32IFD-NEXT:    call __fixdfti at plt
-; RV32IFD-NEXT:    lw a0, 20(sp)
+; RV32IFD-NEXT:    lw a1, 20(sp)
 ; RV32IFD-NEXT:    lw t0, 8(sp)
 ; RV32IFD-NEXT:    lw a4, 12(sp)
-; RV32IFD-NEXT:    lw a1, 16(sp)
+; RV32IFD-NEXT:    lw a0, 16(sp)
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    addi a6, a3, -1
+; RV32IFD-NEXT:    addi a5, a3, -1
 ; RV32IFD-NEXT:    mv a2, t0
-; RV32IFD-NEXT:    beq a4, a6, .LBB45_2
+; RV32IFD-NEXT:    beq a4, a5, .LBB45_2
 ; RV32IFD-NEXT:  # %bb.1: # %entry
-; RV32IFD-NEXT:    sltu a2, a4, a6
+; RV32IFD-NEXT:    sltu a2, a4, a5
 ; RV32IFD-NEXT:    addi a2, a2, -1
 ; RV32IFD-NEXT:    or a2, a2, t0
 ; RV32IFD-NEXT:  .LBB45_2: # %entry
-; RV32IFD-NEXT:    or a7, a1, a0
-; RV32IFD-NEXT:    slti a5, a0, 0
+; RV32IFD-NEXT:    or a7, a0, a1
+; RV32IFD-NEXT:    slti a6, a1, 0
 ; RV32IFD-NEXT:    bnez a7, .LBB45_16
 ; RV32IFD-NEXT:  # %bb.3: # %entry
 ; RV32IFD-NEXT:    mv t0, a4
-; RV32IFD-NEXT:    bgez a0, .LBB45_17
+; RV32IFD-NEXT:    bgez a1, .LBB45_17
 ; RV32IFD-NEXT:  .LBB45_4: # %entry
-; RV32IFD-NEXT:    bgeu a4, a6, .LBB45_18
+; RV32IFD-NEXT:    bgeu a4, a5, .LBB45_18
 ; RV32IFD-NEXT:  .LBB45_5: # %entry
 ; RV32IFD-NEXT:    beqz a7, .LBB45_7
 ; RV32IFD-NEXT:  .LBB45_6: # %entry
 ; RV32IFD-NEXT:    mv a4, t0
 ; RV32IFD-NEXT:  .LBB45_7: # %entry
-; RV32IFD-NEXT:    srai a6, a0, 31
-; RV32IFD-NEXT:    and a1, a6, a1
-; RV32IFD-NEXT:    seqz a6, a0
-; RV32IFD-NEXT:    neg a5, a5
-; RV32IFD-NEXT:    and a5, a5, a0
-; RV32IFD-NEXT:    addi a6, a6, -1
-; RV32IFD-NEXT:    mv a0, a4
+; RV32IFD-NEXT:    neg a5, a6
+; RV32IFD-NEXT:    and a5, a5, a1
+; RV32IFD-NEXT:    srai a1, a1, 31
+; RV32IFD-NEXT:    mv t0, a4
 ; RV32IFD-NEXT:    bgez a5, .LBB45_9
 ; RV32IFD-NEXT:  # %bb.8: # %entry
-; RV32IFD-NEXT:    lui a0, 524288
+; RV32IFD-NEXT:    lui t0, 524288
 ; RV32IFD-NEXT:  .LBB45_9: # %entry
-; RV32IFD-NEXT:    and a6, a6, a1
+; RV32IFD-NEXT:    and a0, a1, a0
 ; RV32IFD-NEXT:    mv a1, a4
 ; RV32IFD-NEXT:    bltu a3, a4, .LBB45_11
 ; RV32IFD-NEXT:  # %bb.10: # %entry
 ; RV32IFD-NEXT:    lui a1, 524288
 ; RV32IFD-NEXT:  .LBB45_11: # %entry
-; RV32IFD-NEXT:    and a6, a6, a5
+; RV32IFD-NEXT:    and a6, a0, a5
 ; RV32IFD-NEXT:    li a7, -1
 ; RV32IFD-NEXT:    bne a6, a7, .LBB45_19
 ; RV32IFD-NEXT:  # %bb.12: # %entry
@@ -3171,19 +3165,19 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ; RV32IFD-NEXT:  .LBB45_16: # %entry
-; RV32IFD-NEXT:    addi a2, a5, -1
+; RV32IFD-NEXT:    addi a2, a6, -1
 ; RV32IFD-NEXT:    or a2, a2, t0
 ; RV32IFD-NEXT:    mv t0, a4
-; RV32IFD-NEXT:    bltz a0, .LBB45_4
+; RV32IFD-NEXT:    bltz a1, .LBB45_4
 ; RV32IFD-NEXT:  .LBB45_17: # %entry
-; RV32IFD-NEXT:    mv t0, a6
-; RV32IFD-NEXT:    bltu a4, a6, .LBB45_5
+; RV32IFD-NEXT:    mv t0, a5
+; RV32IFD-NEXT:    bltu a4, a5, .LBB45_5
 ; RV32IFD-NEXT:  .LBB45_18: # %entry
-; RV32IFD-NEXT:    mv a4, a6
+; RV32IFD-NEXT:    mv a4, a5
 ; RV32IFD-NEXT:    bnez a7, .LBB45_6
 ; RV32IFD-NEXT:    j .LBB45_7
 ; RV32IFD-NEXT:  .LBB45_19: # %entry
-; RV32IFD-NEXT:    mv a1, a0
+; RV32IFD-NEXT:    mv a1, t0
 ; RV32IFD-NEXT:    mv a0, a2
 ; RV32IFD-NEXT:    beq a4, a3, .LBB45_13
 ; RV32IFD-NEXT:  .LBB45_20: # %entry
@@ -3246,11 +3240,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
 ; RV64-NEXT:    call __fixunsdfti at plt
-; RV64-NEXT:    snez a2, a1
-; RV64-NEXT:    addi a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
-; RV64-NEXT:    addi a1, a1, -1
-; RV64-NEXT:    seqz a1, a1
+; RV64-NEXT:    snez a1, a1
 ; RV64-NEXT:    addi a1, a1, -1
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -3374,12 +3364,8 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    li a2, 1
 ; RV64-NEXT:  .LBB47_2: # %entry
-; RV64-NEXT:    slti a3, a1, 1
-; RV64-NEXT:    neg a3, a3
-; RV64-NEXT:    and a0, a3, a0
-; RV64-NEXT:    addi a1, a1, -1
-; RV64-NEXT:    seqz a1, a1
-; RV64-NEXT:    addi a1, a1, -1
+; RV64-NEXT:    slti a1, a1, 1
+; RV64-NEXT:    neg a1, a1
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    beqz a2, .LBB47_4
 ; RV64-NEXT:  # %bb.3: # %entry
@@ -3476,50 +3462,47 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    .cfi_offset ra, -4
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
-; RV32-NEXT:    lw a0, 20(sp)
+; RV32-NEXT:    lw a1, 20(sp)
 ; RV32-NEXT:    lw t0, 8(sp)
 ; RV32-NEXT:    lw a4, 12(sp)
-; RV32-NEXT:    lw a1, 16(sp)
+; RV32-NEXT:    lw a0, 16(sp)
 ; RV32-NEXT:    lui a3, 524288
-; RV32-NEXT:    addi a6, a3, -1
+; RV32-NEXT:    addi a5, a3, -1
 ; RV32-NEXT:    mv a2, t0
-; RV32-NEXT:    beq a4, a6, .LBB48_2
+; RV32-NEXT:    beq a4, a5, .LBB48_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sltu a2, a4, a6
+; RV32-NEXT:    sltu a2, a4, a5
 ; RV32-NEXT:    addi a2, a2, -1
 ; RV32-NEXT:    or a2, a2, t0
 ; RV32-NEXT:  .LBB48_2: # %entry
-; RV32-NEXT:    or a7, a1, a0
-; RV32-NEXT:    slti a5, a0, 0
+; RV32-NEXT:    or a7, a0, a1
+; RV32-NEXT:    slti a6, a1, 0
 ; RV32-NEXT:    bnez a7, .LBB48_16
 ; RV32-NEXT:  # %bb.3: # %entry
 ; RV32-NEXT:    mv t0, a4
-; RV32-NEXT:    bgez a0, .LBB48_17
+; RV32-NEXT:    bgez a1, .LBB48_17
 ; RV32-NEXT:  .LBB48_4: # %entry
-; RV32-NEXT:    bgeu a4, a6, .LBB48_18
+; RV32-NEXT:    bgeu a4, a5, .LBB48_18
 ; RV32-NEXT:  .LBB48_5: # %entry
 ; RV32-NEXT:    beqz a7, .LBB48_7
 ; RV32-NEXT:  .LBB48_6: # %entry
 ; RV32-NEXT:    mv a4, t0
 ; RV32-NEXT:  .LBB48_7: # %entry
-; RV32-NEXT:    srai a6, a0, 31
-; RV32-NEXT:    and a1, a6, a1
-; RV32-NEXT:    seqz a6, a0
-; RV32-NEXT:    neg a5, a5
-; RV32-NEXT:    and a5, a5, a0
-; RV32-NEXT:    addi a6, a6, -1
-; RV32-NEXT:    mv a0, a4
+; RV32-NEXT:    neg a5, a6
+; RV32-NEXT:    and a5, a5, a1
+; RV32-NEXT:    srai a1, a1, 31
+; RV32-NEXT:    mv t0, a4
 ; RV32-NEXT:    bgez a5, .LBB48_9
 ; RV32-NEXT:  # %bb.8: # %entry
-; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    lui t0, 524288
 ; RV32-NEXT:  .LBB48_9: # %entry
-; RV32-NEXT:    and a6, a6, a1
+; RV32-NEXT:    and a0, a1, a0
 ; RV32-NEXT:    mv a1, a4
 ; RV32-NEXT:    bltu a3, a4, .LBB48_11
 ; RV32-NEXT:  # %bb.10: # %entry
 ; RV32-NEXT:    lui a1, 524288
 ; RV32-NEXT:  .LBB48_11: # %entry
-; RV32-NEXT:    and a6, a6, a5
+; RV32-NEXT:    and a6, a0, a5
 ; RV32-NEXT:    li a7, -1
 ; RV32-NEXT:    bne a6, a7, .LBB48_19
 ; RV32-NEXT:  # %bb.12: # %entry
@@ -3536,19 +3519,19 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB48_16: # %entry
-; RV32-NEXT:    addi a2, a5, -1
+; RV32-NEXT:    addi a2, a6, -1
 ; RV32-NEXT:    or a2, a2, t0
 ; RV32-NEXT:    mv t0, a4
-; RV32-NEXT:    bltz a0, .LBB48_4
+; RV32-NEXT:    bltz a1, .LBB48_4
 ; RV32-NEXT:  .LBB48_17: # %entry
-; RV32-NEXT:    mv t0, a6
-; RV32-NEXT:    bltu a4, a6, .LBB48_5
+; RV32-NEXT:    mv t0, a5
+; RV32-NEXT:    bltu a4, a5, .LBB48_5
 ; RV32-NEXT:  .LBB48_18: # %entry
-; RV32-NEXT:    mv a4, a6
+; RV32-NEXT:    mv a4, a5
 ; RV32-NEXT:    bnez a7, .LBB48_6
 ; RV32-NEXT:    j .LBB48_7
 ; RV32-NEXT:  .LBB48_19: # %entry
-; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    mv a1, t0
 ; RV32-NEXT:    mv a0, a2
 ; RV32-NEXT:    beq a4, a3, .LBB48_13
 ; RV32-NEXT:  .LBB48_20: # %entry
@@ -3609,11 +3592,7 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
 ; RV64-NEXT:    call __fixunssfti at plt
-; RV64-NEXT:    snez a2, a1
-; RV64-NEXT:    addi a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
-; RV64-NEXT:    addi a1, a1, -1
-; RV64-NEXT:    seqz a1, a1
+; RV64-NEXT:    snez a1, a1
 ; RV64-NEXT:    addi a1, a1, -1
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -3708,12 +3687,8 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    li a2, 1
 ; RV64-NEXT:  .LBB50_2: # %entry
-; RV64-NEXT:    slti a3, a1, 1
-; RV64-NEXT:    neg a3, a3
-; RV64-NEXT:    and a0, a3, a0
-; RV64-NEXT:    addi a1, a1, -1
-; RV64-NEXT:    seqz a1, a1
-; RV64-NEXT:    addi a1, a1, -1
+; RV64-NEXT:    slti a1, a1, 1
+; RV64-NEXT:    neg a1, a1
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    beqz a2, .LBB50_4
 ; RV64-NEXT:  # %bb.3: # %entry
@@ -3743,50 +3718,47 @@ define i64 @stest_f16i64_mm(half %x) {
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
-; RV32-NEXT:    lw a0, 20(sp)
+; RV32-NEXT:    lw a1, 20(sp)
 ; RV32-NEXT:    lw t0, 8(sp)
 ; RV32-NEXT:    lw a4, 12(sp)
-; RV32-NEXT:    lw a1, 16(sp)
+; RV32-NEXT:    lw a0, 16(sp)
 ; RV32-NEXT:    lui a3, 524288
-; RV32-NEXT:    addi a6, a3, -1
+; RV32-NEXT:    addi a5, a3, -1
 ; RV32-NEXT:    mv a2, t0
-; RV32-NEXT:    beq a4, a6, .LBB51_2
+; RV32-NEXT:    beq a4, a5, .LBB51_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sltu a2, a4, a6
+; RV32-NEXT:    sltu a2, a4, a5
 ; RV32-NEXT:    addi a2, a2, -1
 ; RV32-NEXT:    or a2, a2, t0
 ; RV32-NEXT:  .LBB51_2: # %entry
-; RV32-NEXT:    or a7, a1, a0
-; RV32-NEXT:    slti a5, a0, 0
+; RV32-NEXT:    or a7, a0, a1
+; RV32-NEXT:    slti a6, a1, 0
 ; RV32-NEXT:    bnez a7, .LBB51_16
 ; RV32-NEXT:  # %bb.3: # %entry
 ; RV32-NEXT:    mv t0, a4
-; RV32-NEXT:    bgez a0, .LBB51_17
+; RV32-NEXT:    bgez a1, .LBB51_17
 ; RV32-NEXT:  .LBB51_4: # %entry
-; RV32-NEXT:    bgeu a4, a6, .LBB51_18
+; RV32-NEXT:    bgeu a4, a5, .LBB51_18
 ; RV32-NEXT:  .LBB51_5: # %entry
 ; RV32-NEXT:    beqz a7, .LBB51_7
 ; RV32-NEXT:  .LBB51_6: # %entry
 ; RV32-NEXT:    mv a4, t0
 ; RV32-NEXT:  .LBB51_7: # %entry
-; RV32-NEXT:    srai a6, a0, 31
-; RV32-NEXT:    and a1, a6, a1
-; RV32-NEXT:    seqz a6, a0
-; RV32-NEXT:    neg a5, a5
-; RV32-NEXT:    and a5, a5, a0
-; RV32-NEXT:    addi a6, a6, -1
-; RV32-NEXT:    mv a0, a4
+; RV32-NEXT:    neg a5, a6
+; RV32-NEXT:    and a5, a5, a1
+; RV32-NEXT:    srai a1, a1, 31
+; RV32-NEXT:    mv t0, a4
 ; RV32-NEXT:    bgez a5, .LBB51_9
 ; RV32-NEXT:  # %bb.8: # %entry
-; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    lui t0, 524288
 ; RV32-NEXT:  .LBB51_9: # %entry
-; RV32-NEXT:    and a6, a6, a1
+; RV32-NEXT:    and a0, a1, a0
 ; RV32-NEXT:    mv a1, a4
 ; RV32-NEXT:    bltu a3, a4, .LBB51_11
 ; RV32-NEXT:  # %bb.10: # %entry
 ; RV32-NEXT:    lui a1, 524288
 ; RV32-NEXT:  .LBB51_11: # %entry
-; RV32-NEXT:    and a6, a6, a5
+; RV32-NEXT:    and a6, a0, a5
 ; RV32-NEXT:    li a7, -1
 ; RV32-NEXT:    bne a6, a7, .LBB51_19
 ; RV32-NEXT:  # %bb.12: # %entry
@@ -3803,19 +3775,19 @@ define i64 @stest_f16i64_mm(half %x) {
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB51_16: # %entry
-; RV32-NEXT:    addi a2, a5, -1
+; RV32-NEXT:    addi a2, a6, -1
 ; RV32-NEXT:    or a2, a2, t0
 ; RV32-NEXT:    mv t0, a4
-; RV32-NEXT:    bltz a0, .LBB51_4
+; RV32-NEXT:    bltz a1, .LBB51_4
 ; RV32-NEXT:  .LBB51_17: # %entry
-; RV32-NEXT:    mv t0, a6
-; RV32-NEXT:    bltu a4, a6, .LBB51_5
+; RV32-NEXT:    mv t0, a5
+; RV32-NEXT:    bltu a4, a5, .LBB51_5
 ; RV32-NEXT:  .LBB51_18: # %entry
-; RV32-NEXT:    mv a4, a6
+; RV32-NEXT:    mv a4, a5
 ; RV32-NEXT:    bnez a7, .LBB51_6
 ; RV32-NEXT:    j .LBB51_7
 ; RV32-NEXT:  .LBB51_19: # %entry
-; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    mv a1, t0
 ; RV32-NEXT:    mv a0, a2
 ; RV32-NEXT:    beq a4, a3, .LBB51_13
 ; RV32-NEXT:  .LBB51_20: # %entry
@@ -3922,11 +3894,7 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixunssfti at plt
-; RV64-NEXT:    snez a2, a1
-; RV64-NEXT:    addi a2, a2, -1
-; RV64-NEXT:    and a0, a2, a0
-; RV64-NEXT:    addi a1, a1, -1
-; RV64-NEXT:    seqz a1, a1
+; RV64-NEXT:    snez a1, a1
 ; RV64-NEXT:    addi a1, a1, -1
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -4025,12 +3993,8 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    li a2, 1
 ; RV64-NEXT:  .LBB53_2: # %entry
-; RV64-NEXT:    slti a3, a1, 1
-; RV64-NEXT:    neg a3, a3
-; RV64-NEXT:    and a0, a3, a0
-; RV64-NEXT:    addi a1, a1, -1
-; RV64-NEXT:    seqz a1, a1
-; RV64-NEXT:    addi a1, a1, -1
+; RV64-NEXT:    slti a1, a1, 1
+; RV64-NEXT:    neg a1, a1
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    beqz a2, .LBB53_4
 ; RV64-NEXT:  # %bb.3: # %entry

diff  --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
index 77faf67002eac..de9a54da50821 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
@@ -5572,20 +5572,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NOV-NEXT:    mv s1, a1
 ; CHECK-NOV-NEXT:    fmv.d fa0, fs0
 ; CHECK-NOV-NEXT:    call __fixunsdfti at plt
-; CHECK-NOV-NEXT:    snez a2, a1
-; CHECK-NOV-NEXT:    addi a2, a2, -1
-; CHECK-NOV-NEXT:    and a0, a2, a0
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    seqz a1, a1
+; CHECK-NOV-NEXT:    snez a1, a1
 ; CHECK-NOV-NEXT:    addi a1, a1, -1
 ; CHECK-NOV-NEXT:    and a0, a1, a0
 ; CHECK-NOV-NEXT:    snez a1, s1
 ; CHECK-NOV-NEXT:    addi a1, a1, -1
 ; CHECK-NOV-NEXT:    and a1, a1, s0
-; CHECK-NOV-NEXT:    addi s1, s1, -1
-; CHECK-NOV-NEXT:    seqz a2, s1
-; CHECK-NOV-NEXT:    addi a2, a2, -1
-; CHECK-NOV-NEXT:    and a1, a2, a1
 ; CHECK-NOV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -5623,15 +5615,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-V-NEXT:    snez a2, s1
 ; CHECK-V-NEXT:    addi a2, a2, -1
 ; CHECK-V-NEXT:    and a2, a2, s0
-; CHECK-V-NEXT:    addi s1, s1, -1
-; CHECK-V-NEXT:    seqz a3, s1
-; CHECK-V-NEXT:    addi a3, a3, -1
-; CHECK-V-NEXT:    and a2, a3, a2
-; CHECK-V-NEXT:    snez a3, a1
-; CHECK-V-NEXT:    addi a3, a3, -1
-; CHECK-V-NEXT:    and a0, a3, a0
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    seqz a1, a1
+; CHECK-V-NEXT:    snez a1, a1
 ; CHECK-V-NEXT:    addi a1, a1, -1
 ; CHECK-V-NEXT:    and a0, a1, a0
 ; CHECK-V-NEXT:    sd a0, 24(sp)
@@ -5677,42 +5661,35 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NOV-NEXT:    mv s1, a1
 ; CHECK-NOV-NEXT:    fmv.d fa0, fs0
 ; CHECK-NOV-NEXT:    call __fixdfti at plt
-; CHECK-NOV-NEXT:    mv a2, a1
+; CHECK-NOV-NEXT:    mv a3, a1
 ; CHECK-NOV-NEXT:    blez a1, .LBB47_2
 ; CHECK-NOV-NEXT:  # %bb.1: # %entry
-; CHECK-NOV-NEXT:    li a2, 1
+; CHECK-NOV-NEXT:    li a3, 1
 ; CHECK-NOV-NEXT:  .LBB47_2: # %entry
 ; CHECK-NOV-NEXT:    mv a4, s1
 ; CHECK-NOV-NEXT:    blez s1, .LBB47_4
 ; CHECK-NOV-NEXT:  # %bb.3: # %entry
 ; CHECK-NOV-NEXT:    li a4, 1
 ; CHECK-NOV-NEXT:  .LBB47_4: # %entry
-; CHECK-NOV-NEXT:    slti a3, a1, 1
-; CHECK-NOV-NEXT:    neg a3, a3
-; CHECK-NOV-NEXT:    and a3, a3, a0
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    seqz a1, a1
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    slti a0, s1, 1
-; CHECK-NOV-NEXT:    neg a0, a0
-; CHECK-NOV-NEXT:    and a0, a0, s0
-; CHECK-NOV-NEXT:    addi s1, s1, -1
-; CHECK-NOV-NEXT:    seqz a5, s1
-; CHECK-NOV-NEXT:    addi a5, a5, -1
-; CHECK-NOV-NEXT:    and a0, a5, a0
+; CHECK-NOV-NEXT:    slti a1, a1, 1
+; CHECK-NOV-NEXT:    neg a1, a1
+; CHECK-NOV-NEXT:    slti a2, s1, 1
+; CHECK-NOV-NEXT:    neg a2, a2
+; CHECK-NOV-NEXT:    and a2, a2, s0
 ; CHECK-NOV-NEXT:    beqz a4, .LBB47_6
 ; CHECK-NOV-NEXT:  # %bb.5: # %entry
 ; CHECK-NOV-NEXT:    sgtz a4, a4
 ; CHECK-NOV-NEXT:    neg a4, a4
-; CHECK-NOV-NEXT:    and a0, a4, a0
+; CHECK-NOV-NEXT:    and a2, a4, a2
 ; CHECK-NOV-NEXT:  .LBB47_6: # %entry
-; CHECK-NOV-NEXT:    and a1, a1, a3
-; CHECK-NOV-NEXT:    beqz a2, .LBB47_8
+; CHECK-NOV-NEXT:    and a1, a1, a0
+; CHECK-NOV-NEXT:    beqz a3, .LBB47_8
 ; CHECK-NOV-NEXT:  # %bb.7: # %entry
-; CHECK-NOV-NEXT:    sgtz a2, a2
-; CHECK-NOV-NEXT:    neg a2, a2
-; CHECK-NOV-NEXT:    and a1, a2, a1
+; CHECK-NOV-NEXT:    sgtz a0, a3
+; CHECK-NOV-NEXT:    neg a0, a0
+; CHECK-NOV-NEXT:    and a1, a0, a1
 ; CHECK-NOV-NEXT:  .LBB47_8: # %entry
+; CHECK-NOV-NEXT:    mv a0, a2
 ; CHECK-NOV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -5753,37 +5730,29 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-V-NEXT:    li a2, 1
 ; CHECK-V-NEXT:  .LBB47_2: # %entry
 ; CHECK-V-NEXT:    slti a3, s0, 1
-; CHECK-V-NEXT:    neg a3, a3
-; CHECK-V-NEXT:    and a3, a3, s1
-; CHECK-V-NEXT:    addi a4, s0, -1
-; CHECK-V-NEXT:    seqz a4, a4
-; CHECK-V-NEXT:    addi a4, a4, -1
-; CHECK-V-NEXT:    slti a5, a1, 1
-; CHECK-V-NEXT:    neg a5, a5
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    seqz a6, a1
+; CHECK-V-NEXT:    neg a4, a3
+; CHECK-V-NEXT:    slti a1, a1, 1
 ; CHECK-V-NEXT:    blez s0, .LBB47_4
 ; CHECK-V-NEXT:  # %bb.3: # %entry
 ; CHECK-V-NEXT:    li s0, 1
 ; CHECK-V-NEXT:  .LBB47_4: # %entry
-; CHECK-V-NEXT:    and a1, a5, a0
-; CHECK-V-NEXT:    addi a5, a6, -1
-; CHECK-V-NEXT:    and a0, a4, a3
+; CHECK-V-NEXT:    neg a3, a1
+; CHECK-V-NEXT:    and a1, a4, s1
 ; CHECK-V-NEXT:    beqz s0, .LBB47_6
 ; CHECK-V-NEXT:  # %bb.5: # %entry
-; CHECK-V-NEXT:    sgtz a3, s0
-; CHECK-V-NEXT:    neg a3, a3
-; CHECK-V-NEXT:    and a0, a3, a0
+; CHECK-V-NEXT:    sgtz a4, s0
+; CHECK-V-NEXT:    neg a4, a4
+; CHECK-V-NEXT:    and a1, a4, a1
 ; CHECK-V-NEXT:  .LBB47_6: # %entry
-; CHECK-V-NEXT:    and a1, a5, a1
+; CHECK-V-NEXT:    and a0, a3, a0
 ; CHECK-V-NEXT:    beqz a2, .LBB47_8
 ; CHECK-V-NEXT:  # %bb.7: # %entry
 ; CHECK-V-NEXT:    sgtz a2, a2
 ; CHECK-V-NEXT:    neg a2, a2
-; CHECK-V-NEXT:    and a1, a2, a1
+; CHECK-V-NEXT:    and a0, a2, a0
 ; CHECK-V-NEXT:  .LBB47_8: # %entry
-; CHECK-V-NEXT:    sd a1, 24(sp)
-; CHECK-V-NEXT:    sd a0, 32(sp)
+; CHECK-V-NEXT:    sd a0, 24(sp)
+; CHECK-V-NEXT:    sd a1, 32(sp)
 ; CHECK-V-NEXT:    addi a0, sp, 24
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vle64.v v8, (a0)
@@ -6062,20 +6031,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NOV-NEXT:    mv s1, a1
 ; CHECK-NOV-NEXT:    fmv.s fa0, fs0
 ; CHECK-NOV-NEXT:    call __fixunssfti at plt
-; CHECK-NOV-NEXT:    snez a2, a1
-; CHECK-NOV-NEXT:    addi a2, a2, -1
-; CHECK-NOV-NEXT:    and a0, a2, a0
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    seqz a1, a1
+; CHECK-NOV-NEXT:    snez a1, a1
 ; CHECK-NOV-NEXT:    addi a1, a1, -1
 ; CHECK-NOV-NEXT:    and a0, a1, a0
 ; CHECK-NOV-NEXT:    snez a1, s1
 ; CHECK-NOV-NEXT:    addi a1, a1, -1
 ; CHECK-NOV-NEXT:    and a1, a1, s0
-; CHECK-NOV-NEXT:    addi s1, s1, -1
-; CHECK-NOV-NEXT:    seqz a2, s1
-; CHECK-NOV-NEXT:    addi a2, a2, -1
-; CHECK-NOV-NEXT:    and a1, a2, a1
 ; CHECK-NOV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -6113,15 +6074,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-V-NEXT:    snez a2, s1
 ; CHECK-V-NEXT:    addi a2, a2, -1
 ; CHECK-V-NEXT:    and a2, a2, s0
-; CHECK-V-NEXT:    addi s1, s1, -1
-; CHECK-V-NEXT:    seqz a3, s1
-; CHECK-V-NEXT:    addi a3, a3, -1
-; CHECK-V-NEXT:    and a2, a3, a2
-; CHECK-V-NEXT:    snez a3, a1
-; CHECK-V-NEXT:    addi a3, a3, -1
-; CHECK-V-NEXT:    and a0, a3, a0
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    seqz a1, a1
+; CHECK-V-NEXT:    snez a1, a1
 ; CHECK-V-NEXT:    addi a1, a1, -1
 ; CHECK-V-NEXT:    and a0, a1, a0
 ; CHECK-V-NEXT:    sd a0, 24(sp)
@@ -6167,42 +6120,35 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NOV-NEXT:    mv s1, a1
 ; CHECK-NOV-NEXT:    fmv.s fa0, fs0
 ; CHECK-NOV-NEXT:    call __fixsfti at plt
-; CHECK-NOV-NEXT:    mv a2, a1
+; CHECK-NOV-NEXT:    mv a3, a1
 ; CHECK-NOV-NEXT:    blez a1, .LBB50_2
 ; CHECK-NOV-NEXT:  # %bb.1: # %entry
-; CHECK-NOV-NEXT:    li a2, 1
+; CHECK-NOV-NEXT:    li a3, 1
 ; CHECK-NOV-NEXT:  .LBB50_2: # %entry
 ; CHECK-NOV-NEXT:    mv a4, s1
 ; CHECK-NOV-NEXT:    blez s1, .LBB50_4
 ; CHECK-NOV-NEXT:  # %bb.3: # %entry
 ; CHECK-NOV-NEXT:    li a4, 1
 ; CHECK-NOV-NEXT:  .LBB50_4: # %entry
-; CHECK-NOV-NEXT:    slti a3, a1, 1
-; CHECK-NOV-NEXT:    neg a3, a3
-; CHECK-NOV-NEXT:    and a3, a3, a0
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    seqz a1, a1
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    slti a0, s1, 1
-; CHECK-NOV-NEXT:    neg a0, a0
-; CHECK-NOV-NEXT:    and a0, a0, s0
-; CHECK-NOV-NEXT:    addi s1, s1, -1
-; CHECK-NOV-NEXT:    seqz a5, s1
-; CHECK-NOV-NEXT:    addi a5, a5, -1
-; CHECK-NOV-NEXT:    and a0, a5, a0
+; CHECK-NOV-NEXT:    slti a1, a1, 1
+; CHECK-NOV-NEXT:    neg a1, a1
+; CHECK-NOV-NEXT:    slti a2, s1, 1
+; CHECK-NOV-NEXT:    neg a2, a2
+; CHECK-NOV-NEXT:    and a2, a2, s0
 ; CHECK-NOV-NEXT:    beqz a4, .LBB50_6
 ; CHECK-NOV-NEXT:  # %bb.5: # %entry
 ; CHECK-NOV-NEXT:    sgtz a4, a4
 ; CHECK-NOV-NEXT:    neg a4, a4
-; CHECK-NOV-NEXT:    and a0, a4, a0
+; CHECK-NOV-NEXT:    and a2, a4, a2
 ; CHECK-NOV-NEXT:  .LBB50_6: # %entry
-; CHECK-NOV-NEXT:    and a1, a1, a3
-; CHECK-NOV-NEXT:    beqz a2, .LBB50_8
+; CHECK-NOV-NEXT:    and a1, a1, a0
+; CHECK-NOV-NEXT:    beqz a3, .LBB50_8
 ; CHECK-NOV-NEXT:  # %bb.7: # %entry
-; CHECK-NOV-NEXT:    sgtz a2, a2
-; CHECK-NOV-NEXT:    neg a2, a2
-; CHECK-NOV-NEXT:    and a1, a2, a1
+; CHECK-NOV-NEXT:    sgtz a0, a3
+; CHECK-NOV-NEXT:    neg a0, a0
+; CHECK-NOV-NEXT:    and a1, a0, a1
 ; CHECK-NOV-NEXT:  .LBB50_8: # %entry
+; CHECK-NOV-NEXT:    mv a0, a2
 ; CHECK-NOV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -6243,37 +6189,29 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-V-NEXT:    li a2, 1
 ; CHECK-V-NEXT:  .LBB50_2: # %entry
 ; CHECK-V-NEXT:    slti a3, s0, 1
-; CHECK-V-NEXT:    neg a3, a3
-; CHECK-V-NEXT:    and a3, a3, s1
-; CHECK-V-NEXT:    addi a4, s0, -1
-; CHECK-V-NEXT:    seqz a4, a4
-; CHECK-V-NEXT:    addi a4, a4, -1
-; CHECK-V-NEXT:    slti a5, a1, 1
-; CHECK-V-NEXT:    neg a5, a5
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    seqz a6, a1
+; CHECK-V-NEXT:    neg a4, a3
+; CHECK-V-NEXT:    slti a1, a1, 1
 ; CHECK-V-NEXT:    blez s0, .LBB50_4
 ; CHECK-V-NEXT:  # %bb.3: # %entry
 ; CHECK-V-NEXT:    li s0, 1
 ; CHECK-V-NEXT:  .LBB50_4: # %entry
-; CHECK-V-NEXT:    and a1, a5, a0
-; CHECK-V-NEXT:    addi a5, a6, -1
-; CHECK-V-NEXT:    and a0, a4, a3
+; CHECK-V-NEXT:    neg a3, a1
+; CHECK-V-NEXT:    and a1, a4, s1
 ; CHECK-V-NEXT:    beqz s0, .LBB50_6
 ; CHECK-V-NEXT:  # %bb.5: # %entry
-; CHECK-V-NEXT:    sgtz a3, s0
-; CHECK-V-NEXT:    neg a3, a3
-; CHECK-V-NEXT:    and a0, a3, a0
+; CHECK-V-NEXT:    sgtz a4, s0
+; CHECK-V-NEXT:    neg a4, a4
+; CHECK-V-NEXT:    and a1, a4, a1
 ; CHECK-V-NEXT:  .LBB50_6: # %entry
-; CHECK-V-NEXT:    and a1, a5, a1
+; CHECK-V-NEXT:    and a0, a3, a0
 ; CHECK-V-NEXT:    beqz a2, .LBB50_8
 ; CHECK-V-NEXT:  # %bb.7: # %entry
 ; CHECK-V-NEXT:    sgtz a2, a2
 ; CHECK-V-NEXT:    neg a2, a2
-; CHECK-V-NEXT:    and a1, a2, a1
+; CHECK-V-NEXT:    and a0, a2, a0
 ; CHECK-V-NEXT:  .LBB50_8: # %entry
-; CHECK-V-NEXT:    sd a1, 24(sp)
-; CHECK-V-NEXT:    sd a0, 32(sp)
+; CHECK-V-NEXT:    sd a0, 24(sp)
+; CHECK-V-NEXT:    sd a1, 32(sp)
 ; CHECK-V-NEXT:    addi a0, sp, 24
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vle64.v v8, (a0)
@@ -6547,20 +6485,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NOV-NEXT:    mv a0, s0
 ; CHECK-NOV-NEXT:    call __extendhfsf2 at plt
 ; CHECK-NOV-NEXT:    call __fixunssfti at plt
-; CHECK-NOV-NEXT:    snez a2, a1
-; CHECK-NOV-NEXT:    addi a2, a2, -1
-; CHECK-NOV-NEXT:    and a0, a2, a0
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    seqz a1, a1
+; CHECK-NOV-NEXT:    snez a1, a1
 ; CHECK-NOV-NEXT:    addi a1, a1, -1
 ; CHECK-NOV-NEXT:    and a0, a1, a0
 ; CHECK-NOV-NEXT:    snez a1, s2
 ; CHECK-NOV-NEXT:    addi a1, a1, -1
 ; CHECK-NOV-NEXT:    and a1, a1, s1
-; CHECK-NOV-NEXT:    addi s2, s2, -1
-; CHECK-NOV-NEXT:    seqz a2, s2
-; CHECK-NOV-NEXT:    addi a2, a2, -1
-; CHECK-NOV-NEXT:    and a1, a2, a1
 ; CHECK-NOV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -6589,20 +6519,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-V-NEXT:    mv a0, s0
 ; CHECK-V-NEXT:    call __extendhfsf2 at plt
 ; CHECK-V-NEXT:    call __fixunssfti at plt
-; CHECK-V-NEXT:    snez a2, a1
-; CHECK-V-NEXT:    addi a2, a2, -1
-; CHECK-V-NEXT:    and a0, a2, a0
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    seqz a1, a1
+; CHECK-V-NEXT:    snez a1, a1
 ; CHECK-V-NEXT:    addi a1, a1, -1
 ; CHECK-V-NEXT:    and a0, a1, a0
 ; CHECK-V-NEXT:    snez a1, s2
 ; CHECK-V-NEXT:    addi a1, a1, -1
 ; CHECK-V-NEXT:    and a1, a1, s1
-; CHECK-V-NEXT:    addi s2, s2, -1
-; CHECK-V-NEXT:    seqz a2, s2
-; CHECK-V-NEXT:    addi a2, a2, -1
-; CHECK-V-NEXT:    and a1, a2, a1
 ; CHECK-V-NEXT:    sd a1, 8(sp)
 ; CHECK-V-NEXT:    sd a0, 0(sp)
 ; CHECK-V-NEXT:    addi a0, sp, 8
@@ -6646,42 +6568,35 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NOV-NEXT:    mv a0, s2
 ; CHECK-NOV-NEXT:    call __extendhfsf2 at plt
 ; CHECK-NOV-NEXT:    call __fixsfti at plt
-; CHECK-NOV-NEXT:    mv a2, a1
+; CHECK-NOV-NEXT:    mv a3, a1
 ; CHECK-NOV-NEXT:    blez a1, .LBB53_2
 ; CHECK-NOV-NEXT:  # %bb.1: # %entry
-; CHECK-NOV-NEXT:    li a2, 1
+; CHECK-NOV-NEXT:    li a3, 1
 ; CHECK-NOV-NEXT:  .LBB53_2: # %entry
 ; CHECK-NOV-NEXT:    mv a4, s1
 ; CHECK-NOV-NEXT:    blez s1, .LBB53_4
 ; CHECK-NOV-NEXT:  # %bb.3: # %entry
 ; CHECK-NOV-NEXT:    li a4, 1
 ; CHECK-NOV-NEXT:  .LBB53_4: # %entry
-; CHECK-NOV-NEXT:    slti a3, a1, 1
-; CHECK-NOV-NEXT:    neg a3, a3
-; CHECK-NOV-NEXT:    and a3, a3, a0
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    seqz a1, a1
-; CHECK-NOV-NEXT:    addi a1, a1, -1
-; CHECK-NOV-NEXT:    slti a0, s1, 1
-; CHECK-NOV-NEXT:    neg a0, a0
-; CHECK-NOV-NEXT:    and a0, a0, s0
-; CHECK-NOV-NEXT:    addi s1, s1, -1
-; CHECK-NOV-NEXT:    seqz a5, s1
-; CHECK-NOV-NEXT:    addi a5, a5, -1
-; CHECK-NOV-NEXT:    and a0, a5, a0
+; CHECK-NOV-NEXT:    slti a1, a1, 1
+; CHECK-NOV-NEXT:    neg a1, a1
+; CHECK-NOV-NEXT:    slti a2, s1, 1
+; CHECK-NOV-NEXT:    neg a2, a2
+; CHECK-NOV-NEXT:    and a2, a2, s0
 ; CHECK-NOV-NEXT:    beqz a4, .LBB53_6
 ; CHECK-NOV-NEXT:  # %bb.5: # %entry
 ; CHECK-NOV-NEXT:    sgtz a4, a4
 ; CHECK-NOV-NEXT:    neg a4, a4
-; CHECK-NOV-NEXT:    and a0, a4, a0
+; CHECK-NOV-NEXT:    and a2, a4, a2
 ; CHECK-NOV-NEXT:  .LBB53_6: # %entry
-; CHECK-NOV-NEXT:    and a1, a1, a3
-; CHECK-NOV-NEXT:    beqz a2, .LBB53_8
+; CHECK-NOV-NEXT:    and a1, a1, a0
+; CHECK-NOV-NEXT:    beqz a3, .LBB53_8
 ; CHECK-NOV-NEXT:  # %bb.7: # %entry
-; CHECK-NOV-NEXT:    sgtz a2, a2
-; CHECK-NOV-NEXT:    neg a2, a2
-; CHECK-NOV-NEXT:    and a1, a2, a1
+; CHECK-NOV-NEXT:    sgtz a0, a3
+; CHECK-NOV-NEXT:    neg a0, a0
+; CHECK-NOV-NEXT:    and a1, a0, a1
 ; CHECK-NOV-NEXT:  .LBB53_8: # %entry
+; CHECK-NOV-NEXT:    mv a0, a2
 ; CHECK-NOV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NOV-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -6719,34 +6634,26 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-V-NEXT:  # %bb.3: # %entry
 ; CHECK-V-NEXT:    li a4, 1
 ; CHECK-V-NEXT:  .LBB53_4: # %entry
-; CHECK-V-NEXT:    slti a3, a1, 1
-; CHECK-V-NEXT:    neg a3, a3
-; CHECK-V-NEXT:    and a3, a3, a0
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    seqz a1, a1
-; CHECK-V-NEXT:    addi a1, a1, -1
-; CHECK-V-NEXT:    slti a0, s1, 1
-; CHECK-V-NEXT:    neg a0, a0
-; CHECK-V-NEXT:    and a0, a0, s0
-; CHECK-V-NEXT:    addi s1, s1, -1
-; CHECK-V-NEXT:    seqz a5, s1
-; CHECK-V-NEXT:    addi a5, a5, -1
-; CHECK-V-NEXT:    and a0, a5, a0
+; CHECK-V-NEXT:    slti a1, a1, 1
+; CHECK-V-NEXT:    neg a3, a1
+; CHECK-V-NEXT:    slti a1, s1, 1
+; CHECK-V-NEXT:    neg a1, a1
+; CHECK-V-NEXT:    and a1, a1, s0
 ; CHECK-V-NEXT:    beqz a4, .LBB53_6
 ; CHECK-V-NEXT:  # %bb.5: # %entry
 ; CHECK-V-NEXT:    sgtz a4, a4
 ; CHECK-V-NEXT:    neg a4, a4
-; CHECK-V-NEXT:    and a0, a4, a0
+; CHECK-V-NEXT:    and a1, a4, a1
 ; CHECK-V-NEXT:  .LBB53_6: # %entry
-; CHECK-V-NEXT:    and a1, a1, a3
+; CHECK-V-NEXT:    and a0, a3, a0
 ; CHECK-V-NEXT:    beqz a2, .LBB53_8
 ; CHECK-V-NEXT:  # %bb.7: # %entry
 ; CHECK-V-NEXT:    sgtz a2, a2
 ; CHECK-V-NEXT:    neg a2, a2
-; CHECK-V-NEXT:    and a1, a2, a1
+; CHECK-V-NEXT:    and a0, a2, a0
 ; CHECK-V-NEXT:  .LBB53_8: # %entry
-; CHECK-V-NEXT:    sd a1, 8(sp)
-; CHECK-V-NEXT:    sd a0, 0(sp)
+; CHECK-V-NEXT:    sd a0, 8(sp)
+; CHECK-V-NEXT:    sd a1, 0(sp)
 ; CHECK-V-NEXT:    addi a0, sp, 8
 ; CHECK-V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vle64.v v9, (a0)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index d3297d2a18089..d6376d3c884b1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -1817,100 +1817,94 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl __fixdfti
+; CHECK-NEXT:    mvn r11, #-2147483648
+; CHECK-NEXT:    cmp r1, r11
 ; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    csel r0, r1, r11, lo
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    csel r1, r3, r0, mi
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    andne.w r0, r2, r0, asr #31
-; CHECK-NEXT:    mvn r11, #-2147483648
-; CHECK-NEXT:    ands r0, r1
-; CHECK-NEXT:    cmp r6, r11
+; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    add.w r3, r0, #1
-; CHECK-NEXT:    csel r0, r6, r11, lo
+; CHECK-NEXT:    csel r1, r1, r11, mi
+; CHECK-NEXT:    orrs r3, r2
+; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    csel r4, r0, r1, eq
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r7, r6, r11, mi
-; CHECK-NEXT:    orrs r2, r5
+; CHECK-NEXT:    mov.w r1, #-2147483648
+; CHECK-NEXT:    csel r8, r5, r0, mi
+; CHECK-NEXT:    and.w r0, r2, r5, asr #31
+; CHECK-NEXT:    and.w r0, r0, r8
+; CHECK-NEXT:    cmp.w r8, #-1
+; CHECK-NEXT:    add.w r2, r0, #1
+; CHECK-NEXT:    csel r0, r4, r1, gt
+; CHECK-NEXT:    cmp.w r4, #-2147483648
 ; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    csel r8, r0, r7, eq
-; CHECK-NEXT:    mov.w r2, #-2147483648
-; CHECK-NEXT:    cmp.w r1, #-1
-; CHECK-NEXT:    csel r0, r8, r2, gt
-; CHECK-NEXT:    cmp.w r8, #-2147483648
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r1, r8, r2, hi
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    csel r1, r4, r1, hi
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    cmp r1, r11
-; CHECK-NEXT:    mov lr, r0
+; CHECK-NEXT:    mov r12, r0
 ; CHECK-NEXT:    csel r7, r1, r11, lo
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    csel r4, r1, r11, mi
-; CHECK-NEXT:    orrs r3, r2
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    csel r7, r7, r4, eq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r4, r0, r9, mi
-; CHECK-NEXT:    mov.w r3, #-2147483648
-; CHECK-NEXT:    cmp.w r4, #-1
-; CHECK-NEXT:    csel r9, r7, r3, gt
-; CHECK-NEXT:    cmp.w r7, #-2147483648
-; CHECK-NEXT:    csel r12, r7, r3, hi
-; CHECK-NEXT:    mov r3, r0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    andne.w r3, r2, r3, asr #31
-; CHECK-NEXT:    and.w r2, r3, r4
-; CHECK-NEXT:    mov.w r3, #-1
+; CHECK-NEXT:    csel r6, r1, r11, mi
+; CHECK-NEXT:    orrs.w r0, r2, r3
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    csel r6, r7, r6, eq
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov.w r0, #0
+; CHECK-NEXT:    csel r7, r3, r0, mi
+; CHECK-NEXT:    and.w r2, r2, r3, asr #31
+; CHECK-NEXT:    cmp.w r7, #-1
+; CHECK-NEXT:    mov.w r0, #-2147483648
+; CHECK-NEXT:    csel lr, r6, r0, gt
+; CHECK-NEXT:    cmp.w r6, #-2147483648
+; CHECK-NEXT:    and.w r2, r2, r7
+; CHECK-NEXT:    csel r0, r6, r0, hi
 ; CHECK-NEXT:    adds r2, #1
 ; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT:    csel r12, r12, r9, eq
-; CHECK-NEXT:    cmp r6, r11
-; CHECK-NEXT:    csel r6, r10, r3, lo
-; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    csel r6, r10, r6, eq
+; CHECK-NEXT:    csel r0, r0, lr, eq
+; CHECK-NEXT:    mov.w lr, #-1
+; CHECK-NEXT:    cmp r9, r11
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    csel r2, r10, lr, lo
+; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    csel r2, r10, r2, eq
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r5, r10, r3, mi
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r5, r6, r5, eq
-; CHECK-NEXT:    cmp.w r8, #-2147483648
-; CHECK-NEXT:    mov.w r8, #0
-; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    csel r6, r5, r8, hi
-; CHECK-NEXT:    csel r6, r5, r6, eq
-; CHECK-NEXT:    cmp.w r2, #-1
-; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    csel r5, r5, r8, gt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    csel r5, r6, r5, eq
-; CHECK-NEXT:    cmp r1, r11
-; CHECK-NEXT:    csel r1, lr, r3, lo
-; CHECK-NEXT:    csel r1, lr, r1, eq
+; CHECK-NEXT:    csel r5, r10, lr, mi
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, lr, r3, mi
-; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r2, r2, r5, eq
+; CHECK-NEXT:    cmp.w r4, #-2147483648
+; CHECK-NEXT:    mov.w r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    csel r4, r2, r5, hi
+; CHECK-NEXT:    csel r4, r2, r4, eq
+; CHECK-NEXT:    cmp.w r8, #-1
+; CHECK-NEXT:    csel r2, r2, r5, gt
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r2, r4, r2, eq
+; CHECK-NEXT:    cmp r1, r11
+; CHECK-NEXT:    csel r1, r12, lr, lo
+; CHECK-NEXT:    csel r1, r12, r1, eq
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    csel r0, r12, lr, mi
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    csel r0, r1, r0, eq
-; CHECK-NEXT:    cmp.w r7, #-2147483648
-; CHECK-NEXT:    csel r1, r0, r8, hi
-; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    cmp.w r6, #-2147483648
+; CHECK-NEXT:    csel r1, r0, r5, hi
 ; CHECK-NEXT:    csel r1, r0, r1, eq
-; CHECK-NEXT:    cmp.w r4, #-1
-; CHECK-NEXT:    csel r0, r0, r8, gt
-; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    cmp.w r7, #-1
+; CHECK-NEXT:    csel r0, r0, r5, gt
+; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[3], q0[1], r12, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -2074,105 +2068,96 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    .pad #28
-; CHECK-NEXT:    sub sp, #28
-; CHECK-NEXT:    vmov r4, r0, d0
+; CHECK-NEXT:    .pad #20
+; CHECK-NEXT:    sub sp, #20
+; CHECK-NEXT:    vmov r7, r0, d0
 ; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    mvn r9, #-2147483648
+; CHECK-NEXT:    cmp r1, r9
 ; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    csel r0, r1, r9, lo
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    csel r1, r3, r0, mi
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    andne.w r0, r2, r0, asr #31
-; CHECK-NEXT:    mvn r10, #-2147483648
-; CHECK-NEXT:    ands r0, r1
-; CHECK-NEXT:    cmp r6, r10
 ; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    add.w r3, r0, #1
-; CHECK-NEXT:    csel r0, r6, r10, lo
+; CHECK-NEXT:    csel r1, r1, r9, mi
+; CHECK-NEXT:    orrs r3, r2
+; CHECK-NEXT:    mov.w r8, #-2147483648
+; CHECK-NEXT:    csel r4, r0, r1, eq
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r7, r6, r10, mi
-; CHECK-NEXT:    orrs r2, r5
-; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r8, r0, r7, eq
-; CHECK-NEXT:    mov.w r2, #-2147483648
-; CHECK-NEXT:    cmp.w r1, #-1
-; CHECK-NEXT:    csel r0, r8, r2, gt
-; CHECK-NEXT:    cmp.w r8, #-2147483648
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    csel r1, r8, r2, hi
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov.w r9, #0
-; CHECK-NEXT:    csel r0, r1, r0, eq
 ; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    csel r10, r5, r0, mi
+; CHECK-NEXT:    and.w r0, r2, r5, asr #31
+; CHECK-NEXT:    and.w r0, r0, r10
+; CHECK-NEXT:    cmp.w r10, #-1
+; CHECK-NEXT:    add.w r2, r0, #1
+; CHECK-NEXT:    csel r0, r4, r8, gt
+; CHECK-NEXT:    cmp.w r4, #-2147483648
+; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    csel r1, r4, r8, hi
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r0, r1, r0, eq
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    cmp r1, r10
+; CHECK-NEXT:    cmp r1, r9
 ; CHECK-NEXT:    mov lr, r0
-; CHECK-NEXT:    csel r7, r1, r10, lo
+; CHECK-NEXT:    csel r12, r1, r9, lo
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    csel r4, r1, r10, mi
-; CHECK-NEXT:    orrs.w r3, r2, r0
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    csel r7, r7, r4, eq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r4, r0, r9, mi
-; CHECK-NEXT:    mov.w r3, #-2147483648
-; CHECK-NEXT:    cmp.w r4, #-1
-; CHECK-NEXT:    csel r9, r7, r3, gt
+; CHECK-NEXT:    csel r7, r1, r9, mi
+; CHECK-NEXT:    orrs.w r0, r2, r3
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    csel r7, r12, r7, eq
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov.w r0, #0
+; CHECK-NEXT:    csel r12, r3, r0, mi
+; CHECK-NEXT:    and.w r2, r2, r3, asr #31
+; CHECK-NEXT:    cmp.w r12, #-1
+; CHECK-NEXT:    and.w r2, r2, r12
+; CHECK-NEXT:    csel r0, r7, r8, gt
 ; CHECK-NEXT:    cmp.w r7, #-2147483648
-; CHECK-NEXT:    csel r12, r7, r3, hi
-; CHECK-NEXT:    mov r3, r0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    andne.w r3, r2, r3, asr #31
-; CHECK-NEXT:    and.w r2, r3, r4
-; CHECK-NEXT:    mov.w r3, #-1
+; CHECK-NEXT:    csel r8, r7, r8, hi
 ; CHECK-NEXT:    adds r2, #1
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    csel r12, r12, r9, eq
-; CHECK-NEXT:    cmp r6, r10
-; CHECK-NEXT:    csel r6, r11, r3, lo
-; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    csel r8, r8, r0, eq
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:    cmp r6, r9
+; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
+; CHECK-NEXT:    csel r6, r11, r0, lo
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    csel r6, r11, r6, eq
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r5, r11, r3, mi
+; CHECK-NEXT:    csel r5, r11, r0, mi
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r5, r6, r5, eq
-; CHECK-NEXT:    cmp.w r8, #-2147483648
-; CHECK-NEXT:    mov.w r8, #0
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    csel r6, r5, r8, hi
-; CHECK-NEXT:    csel r6, r5, r6, eq
-; CHECK-NEXT:    cmp.w r2, #-1
-; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    csel r5, r5, r8, gt
-; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    cmp.w r4, #-2147483648
+; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    csel r5, r6, r5, eq
-; CHECK-NEXT:    cmp r1, r10
-; CHECK-NEXT:    csel r1, lr, r3, lo
+; CHECK-NEXT:    csel r4, r5, r6, hi
+; CHECK-NEXT:    csel r4, r5, r4, eq
+; CHECK-NEXT:    cmp.w r10, #-1
+; CHECK-NEXT:    csel r5, r5, r6, gt
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r4, r4, r5, eq
+; CHECK-NEXT:    cmp r1, r9
+; CHECK-NEXT:    csel r1, lr, r0, lo
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    csel r1, lr, r1, eq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, lr, r3, mi
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csel r0, lr, r0, mi
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
 ; CHECK-NEXT:    cmp.w r7, #-2147483648
-; CHECK-NEXT:    csel r1, r0, r8, hi
-; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    csel r1, r0, r6, hi
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    csel r1, r0, r1, eq
-; CHECK-NEXT:    cmp.w r4, #-1
-; CHECK-NEXT:    csel r0, r0, r8, gt
+; CHECK-NEXT:    cmp.w r12, #-1
+; CHECK-NEXT:    csel r0, r0, r6, gt
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[3], q0[1], r12, r0
-; CHECK-NEXT:    add sp, #28
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[3], q0[1], r8, r0
+; CHECK-NEXT:    add sp, #20
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
@@ -2335,100 +2320,91 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    vmov.u16 r0, q0[1]
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    bl __fixhfti
+; CHECK-NEXT:    mvn r11, #-2147483648
+; CHECK-NEXT:    cmp r1, r11
 ; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    csel r0, r1, r11, lo
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    csel r1, r3, r0, mi
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    andne.w r0, r2, r0, asr #31
-; CHECK-NEXT:    mvn r11, #-2147483648
-; CHECK-NEXT:    ands r0, r1
-; CHECK-NEXT:    cmp r6, r11
 ; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    add.w r3, r0, #1
-; CHECK-NEXT:    csel r0, r6, r11, lo
+; CHECK-NEXT:    csel r1, r1, r11, mi
+; CHECK-NEXT:    orrs r3, r2
+; CHECK-NEXT:    mov.w r9, #-2147483648
+; CHECK-NEXT:    csel r4, r0, r1, eq
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r7, r6, r11, mi
-; CHECK-NEXT:    orrs r2, r5
+; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    csel r8, r5, r0, mi
+; CHECK-NEXT:    and.w r0, r2, r5, asr #31
+; CHECK-NEXT:    and.w r0, r0, r8
+; CHECK-NEXT:    cmp.w r8, #-1
+; CHECK-NEXT:    add.w r2, r0, #1
+; CHECK-NEXT:    csel r0, r4, r9, gt
+; CHECK-NEXT:    cmp.w r4, #-2147483648
 ; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    csel r8, r0, r7, eq
-; CHECK-NEXT:    mov.w r2, #-2147483648
-; CHECK-NEXT:    cmp.w r1, #-1
-; CHECK-NEXT:    csel r0, r8, r2, gt
-; CHECK-NEXT:    cmp.w r8, #-2147483648
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    csel r1, r8, r2, hi
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    csel r1, r4, r9, hi
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    vmov.u16 r0, q4[0]
 ; CHECK-NEXT:    bl __fixhfti
 ; CHECK-NEXT:    cmp r1, r11
 ; CHECK-NEXT:    mov lr, r0
-; CHECK-NEXT:    csel r7, r1, r11, lo
+; CHECK-NEXT:    csel r12, r1, r11, lo
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r0, r3
-; CHECK-NEXT:    csel r4, r1, r11, mi
-; CHECK-NEXT:    orrs r3, r2
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    csel r7, r7, r4, eq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r4, r0, r9, mi
-; CHECK-NEXT:    mov.w r3, #-2147483648
-; CHECK-NEXT:    cmp.w r4, #-1
-; CHECK-NEXT:    csel r9, r7, r3, gt
+; CHECK-NEXT:    csel r7, r1, r11, mi
+; CHECK-NEXT:    orrs.w r0, r2, r3
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    csel r7, r12, r7, eq
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov.w r0, #0
+; CHECK-NEXT:    csel r12, r3, r0, mi
+; CHECK-NEXT:    and.w r2, r2, r3, asr #31
+; CHECK-NEXT:    cmp.w r12, #-1
+; CHECK-NEXT:    and.w r2, r2, r12
+; CHECK-NEXT:    csel r0, r7, r9, gt
 ; CHECK-NEXT:    cmp.w r7, #-2147483648
-; CHECK-NEXT:    csel r12, r7, r3, hi
-; CHECK-NEXT:    mov r3, r0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    andne.w r3, r2, r3, asr #31
-; CHECK-NEXT:    and.w r2, r3, r4
-; CHECK-NEXT:    mov.w r3, #-1
+; CHECK-NEXT:    csel r9, r7, r9, hi
 ; CHECK-NEXT:    adds r2, #1
-; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT:    csel r12, r12, r9, eq
+; CHECK-NEXT:    csel r9, r9, r0, eq
+; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:    cmp r6, r11
-; CHECK-NEXT:    csel r6, r10, r3, lo
-; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    csel r6, r10, r0, lo
+; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    csel r6, r10, r6, eq
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r5, r10, r3, mi
+; CHECK-NEXT:    csel r5, r10, r0, mi
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r5, r6, r5, eq
-; CHECK-NEXT:    cmp.w r8, #-2147483648
-; CHECK-NEXT:    mov.w r8, #0
-; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    csel r6, r5, r8, hi
-; CHECK-NEXT:    csel r6, r5, r6, eq
-; CHECK-NEXT:    cmp.w r2, #-1
-; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    csel r5, r5, r8, gt
+; CHECK-NEXT:    cmp.w r4, #-2147483648
+; CHECK-NEXT:    mov.w r6, #0
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    csel r4, r5, r6, hi
+; CHECK-NEXT:    csel r4, r5, r4, eq
+; CHECK-NEXT:    cmp.w r8, #-1
+; CHECK-NEXT:    csel r5, r5, r6, gt
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    csel r5, r6, r5, eq
+; CHECK-NEXT:    csel r4, r4, r5, eq
 ; CHECK-NEXT:    cmp r1, r11
-; CHECK-NEXT:    csel r1, lr, r3, lo
+; CHECK-NEXT:    csel r1, lr, r0, lo
+; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    csel r1, lr, r1, eq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csel r0, lr, r3, mi
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csel r0, lr, r0, mi
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
 ; CHECK-NEXT:    cmp.w r7, #-2147483648
-; CHECK-NEXT:    csel r1, r0, r8, hi
-; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    csel r1, r0, r6, hi
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    csel r1, r0, r1, eq
-; CHECK-NEXT:    cmp.w r4, #-1
-; CHECK-NEXT:    csel r0, r0, r8, gt
+; CHECK-NEXT:    cmp.w r12, #-1
+; CHECK-NEXT:    csel r0, r0, r6, gt
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r1, r0, eq
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[3], q0[1], r12, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r9, r0
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4

diff  --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index 9a39c62591e1b..e539f00d8e518 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -574,16 +574,11 @@ define i64 @utest_f64i64_cse_combine(double %x) #0 {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui double %x to i128
@@ -677,7 +672,6 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -685,10 +679,6 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
@@ -867,7 +857,6 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -875,10 +864,6 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
@@ -1509,16 +1494,11 @@ define i64 @utest_f64i64_mm(double %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui double %x to i128
@@ -1552,7 +1532,6 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -1560,10 +1539,6 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
@@ -1627,16 +1602,11 @@ define i64 @utest_f32i64_mm(float %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui float %x to i128
@@ -1670,7 +1640,6 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -1678,10 +1647,6 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
@@ -1749,16 +1714,11 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui half %x to i128
@@ -1794,7 +1754,6 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -1802,10 +1761,6 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0

diff  --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 007802dd0c035..facd15f3270d3 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -2309,27 +2309,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    i32.const 32
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.replace_lane 1
 ; CHECK-NEXT:    # fallthrough-return
 entry:
@@ -2381,7 +2371,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    i32.const 32
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 4
@@ -2389,10 +2378,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 5
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0
@@ -2408,7 +2393,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -2416,10 +2400,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 4
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i64.const 0
@@ -2611,27 +2591,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    i32.const 32
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.replace_lane 1
 ; CHECK-NEXT:    # fallthrough-return
 entry:
@@ -2683,7 +2653,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    i32.const 32
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 4
@@ -2691,10 +2660,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 5
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0
@@ -2710,7 +2675,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 2
@@ -2718,10 +2682,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 1
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 4
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i64.const 0
@@ -2917,27 +2877,17 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 32
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 6
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.replace_lane 1
 ; CHECK-NEXT:    # fallthrough-return
 entry:
@@ -2991,7 +2941,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 32
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    global.set __stack_pointer
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 6
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 5
@@ -2999,10 +2948,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 2
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 6
 ; CHECK-NEXT:    local.get 6
 ; CHECK-NEXT:    i64.const 0
@@ -3018,7 +2963,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i64.eqz
 ; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 3
@@ -3026,10 +2970,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i64.lt_s
 ; CHECK-NEXT:    local.tee 2
 ; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    i64.const 1
-; CHECK-NEXT:    i64.eq
-; CHECK-NEXT:    i64.select
 ; CHECK-NEXT:    local.tee 5
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    i64.const 0

diff  --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index ac6d9d359e5eb..87e9a587d31ea 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -1081,8 +1081,6 @@ define i64 @utest_f64i64_mm(double %x) {
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
@@ -1101,11 +1099,9 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; CHECK-NEXT:    callq __fixdfti at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
+; CHECK-NEXT:    cmovgq %rcx, %rax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    cmovleq %rdx, %rsi
-; CHECK-NEXT:    cmovgq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovsq %rcx, %rax
 ; CHECK-NEXT:    popq %rcx
@@ -1147,8 +1143,6 @@ define i64 @utest_f32i64_mm(float %x) {
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
@@ -1167,11 +1161,9 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; CHECK-NEXT:    callq __fixsfti at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
+; CHECK-NEXT:    cmovgq %rcx, %rax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    cmovleq %rdx, %rsi
-; CHECK-NEXT:    cmovgq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovsq %rcx, %rax
 ; CHECK-NEXT:    popq %rcx
@@ -1221,8 +1213,6 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
@@ -1241,11 +1231,9 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; CHECK-NEXT:    callq __fixhfti at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
+; CHECK-NEXT:    cmovgq %rcx, %rax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    cmovleq %rdx, %rsi
-; CHECK-NEXT:    cmovgq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovsq %rcx, %rax
 ; CHECK-NEXT:    popq %rcx

diff  --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index c351c1b82cf19..22fbc1c55b3d6 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -2762,12 +2762,8 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    testq %r14, %r14
 ; CHECK-NEXT:    cmovneq %rcx, %rbx
-; CHECK-NEXT:    cmpq $1, %r14
-; CHECK-NEXT:    cmoveq %rcx, %rbx
 ; CHECK-NEXT:    movq %rbx, %xmm0
 ; CHECK-NEXT:    movq %rax, %xmm1
 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2805,20 +2801,15 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    callq __fixdfti at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
-; CHECK-NEXT:    movl $1, %esi
-; CHECK-NEXT:    movl $1, %edi
-; CHECK-NEXT:    cmovleq %rdx, %rdi
 ; CHECK-NEXT:    cmovgq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    movl $1, %esi
+; CHECK-NEXT:    cmovgq %rsi, %rdx
 ; CHECK-NEXT:    testq %r14, %r14
-; CHECK-NEXT:    cmovleq %r14, %rsi
 ; CHECK-NEXT:    cmovgq %rcx, %rbx
-; CHECK-NEXT:    cmpq $1, %r14
-; CHECK-NEXT:    cmoveq %rcx, %rbx
+; CHECK-NEXT:    cmovleq %r14, %rsi
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovsq %rcx, %rbx
-; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovsq %rcx, %rax
 ; CHECK-NEXT:    movq %rax, %xmm0
 ; CHECK-NEXT:    movq %rbx, %xmm1
@@ -2926,12 +2917,8 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    testq %r14, %r14
 ; CHECK-NEXT:    cmovneq %rcx, %rbx
-; CHECK-NEXT:    cmpq $1, %r14
-; CHECK-NEXT:    cmoveq %rcx, %rbx
 ; CHECK-NEXT:    movq %rbx, %xmm0
 ; CHECK-NEXT:    movq %rax, %xmm1
 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2969,20 +2956,15 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    callq __fixsfti at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
-; CHECK-NEXT:    movl $1, %esi
-; CHECK-NEXT:    movl $1, %edi
-; CHECK-NEXT:    cmovleq %rdx, %rdi
 ; CHECK-NEXT:    cmovgq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    movl $1, %esi
+; CHECK-NEXT:    cmovgq %rsi, %rdx
 ; CHECK-NEXT:    testq %r14, %r14
-; CHECK-NEXT:    cmovleq %r14, %rsi
 ; CHECK-NEXT:    cmovgq %rcx, %rbx
-; CHECK-NEXT:    cmpq $1, %r14
-; CHECK-NEXT:    cmoveq %rcx, %rbx
+; CHECK-NEXT:    cmovleq %r14, %rsi
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovsq %rcx, %rbx
-; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovsq %rcx, %rax
 ; CHECK-NEXT:    movq %rax, %xmm0
 ; CHECK-NEXT:    movq %rbx, %xmm1
@@ -3091,12 +3073,8 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    testq %r14, %r14
 ; CHECK-NEXT:    cmovneq %rcx, %rbx
-; CHECK-NEXT:    cmpq $1, %r14
-; CHECK-NEXT:    cmoveq %rcx, %rbx
 ; CHECK-NEXT:    movq %rbx, %xmm0
 ; CHECK-NEXT:    movq %rax, %xmm1
 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -3134,20 +3112,15 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    callq __fixhfti at PLT
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testq %rdx, %rdx
-; CHECK-NEXT:    movl $1, %esi
-; CHECK-NEXT:    movl $1, %edi
-; CHECK-NEXT:    cmovleq %rdx, %rdi
 ; CHECK-NEXT:    cmovgq %rcx, %rax
-; CHECK-NEXT:    cmpq $1, %rdx
-; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    movl $1, %esi
+; CHECK-NEXT:    cmovgq %rsi, %rdx
 ; CHECK-NEXT:    testq %r14, %r14
-; CHECK-NEXT:    cmovleq %r14, %rsi
 ; CHECK-NEXT:    cmovgq %rcx, %rbx
-; CHECK-NEXT:    cmpq $1, %r14
-; CHECK-NEXT:    cmoveq %rcx, %rbx
+; CHECK-NEXT:    cmovleq %r14, %rsi
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    cmovsq %rcx, %rbx
-; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    testq %rdx, %rdx
 ; CHECK-NEXT:    cmovsq %rcx, %rax
 ; CHECK-NEXT:    movq %rax, %xmm0
 ; CHECK-NEXT:    movq %rbx, %xmm1

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index b7387651f8c44..f0ce2312c1c68 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -370,123 +370,117 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $88, %esp
+; X86-NEXT:    subl $64, %esp
 ; X86-NEXT:    movl 8(%ebp), %ecx
-; X86-NEXT:    movl 12(%ebp), %eax
-; X86-NEXT:    movl 20(%ebp), %edi
-; X86-NEXT:    sarl $31, %edi
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    sarl $31, %ebx
-; X86-NEXT:    movl %ebx, %edx
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shldl $31, %eax, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shldl $31, %ecx, %eax
-; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl 12(%ebp), %ebx
+; X86-NEXT:    movl 20(%ebp), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $31, %ebx, %edi
+; X86-NEXT:    shldl $31, %ecx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    shll $31, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __divti3
+; X86-NEXT:    calll __modti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    subl $1, %esi
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %ebx
-; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    testl %esi, %esi
 ; X86-NEXT:    sets %al
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    testl %ecx, %ecx
-; X86-NEXT:    sets %dl
-; X86-NEXT:    xorb %al, %dl
-; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    sets %cl
+; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %edi
+; X86-NEXT:    setne %bh
+; X86-NEXT:    movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl 20(%ebp)
 ; X86-NEXT:    pushl 16(%ebp)
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __modti3
+; X86-NEXT:    calll __divti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    setne %al
-; X86-NEXT:    testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovsl %ebx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    cmovsl %edx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    cmovsl %esi, %eax
-; X86-NEXT:    movl %ebx, %edi
-; X86-NEXT:    sarl $31, %edi
-; X86-NEXT:    andl %ecx, %edi
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    cmovel %ebx, %edi
-; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    subl $1, %eax
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    sbbl $0, %edx
+; X86-NEXT:    setb %bl
+; X86-NEXT:    testb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addb $255, %bl
+; X86-NEXT:    sbbl $0, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT:    testb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel %esi, %ecx
 ; X86-NEXT:    cmpl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    cmovbl %ecx, %edx
+; X86-NEXT:    movl $2147483647, %esi # imm = 0x7FFFFFFF
+; X86-NEXT:    cmovbl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl $-1, %esi
+; X86-NEXT:    cmovnsl %eax, %esi
 ; X86-NEXT:    testl %ecx, %ecx
-; X86-NEXT:    movl $-1, %ecx
-; X86-NEXT:    cmovsl %ecx, %esi
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT:    cmovnel %eax, %esi
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl $2147483647, %ebx # imm = 0x7FFFFFFF
+; X86-NEXT:    cmovnsl %ebx, %edx
+; X86-NEXT:    movl $-1, %ebx
+; X86-NEXT:    cmovnsl %ebx, %eax
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    cmovsl %ecx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    orl %ecx, %edi
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X86-NEXT:    cmpl $-2147483647, %edx # imm = 0x80000001
-; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X86-NEXT:    cmovael %edx, %eax
-; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl $-2147483648, %esi # imm = 0x80000000
+; X86-NEXT:    cmovael %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    andl %eax, %esi
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl $-2147483648, %edi # imm = 0x80000000
+; X86-NEXT:    cmovsl %edi, %edx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovsl %edi, %eax
 ; X86-NEXT:    sarl $31, %ecx
-; X86-NEXT:    andl %esi, %ecx
-; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    movl $-2147483648, %ebx # imm = 0x80000000
-; X86-NEXT:    cmovsl %ebx, %edx
-; X86-NEXT:    movl $0, %ebx
-; X86-NEXT:    cmovsl %ebx, %esi
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT:    cmpl $-1, %edi
-; X86-NEXT:    cmovel %ecx, %esi
-; X86-NEXT:    cmovel %eax, %edx
-; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    andl %ebx, %ecx
+; X86-NEXT:    cmpl $-1, %ecx
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X86-NEXT:    leal -12(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
@@ -842,100 +836,78 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $256, %esp # imm = 0x100
-; X86-NEXT:    movl 16(%ebp), %edi
-; X86-NEXT:    movl 32(%ebp), %eax
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    subl $192, %esp
+; X86-NEXT:    movl 24(%ebp), %ebx
+; X86-NEXT:    movl 12(%ebp), %ecx
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %esi
 ; X86-NEXT:    sarl $31, %esi
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edi, %ebx
-; X86-NEXT:    sarl $31, %ebx
-; X86-NEXT:    leal (%edi,%edi), %eax
-; X86-NEXT:    shrl $31, %edi
-; X86-NEXT:    shldl $31, %eax, %edi
+; X86-NEXT:    leal (%ecx,%ecx), %eax
+; X86-NEXT:    shrl $31, %ecx
+; X86-NEXT:    shldl $31, %eax, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __divti3
-; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl 32(%ebp)
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %ecx
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll __modti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl 36(%ebp), %edx
-; X86-NEXT:    movl %edx, %ebx
-; X86-NEXT:    sarl $31, %ebx
-; X86-NEXT:    movl 20(%ebp), %ecx
+; X86-NEXT:    movl 40(%ebp), %ecx
 ; X86-NEXT:    movl %ecx, %esi
 ; X86-NEXT:    sarl $31, %esi
-; X86-NEXT:    leal (%ecx,%ecx), %eax
-; X86-NEXT:    shrl $31, %ecx
-; X86-NEXT:    shldl $31, %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edx
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    leal (%ebx,%ebx), %eax
+; X86-NEXT:    shrl $31, %ebx
+; X86-NEXT:    shldl $31, %eax, %ebx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll __modti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl 28(%ebp), %edx
-; X86-NEXT:    movl %edx, %edi
-; X86-NEXT:    sarl $31, %edi
-; X86-NEXT:    movl 12(%ebp), %ecx
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    sarl $31, %esi
-; X86-NEXT:    leal (%ecx,%ecx), %eax
-; X86-NEXT:    shrl $31, %ecx
-; X86-NEXT:    shldl $31, %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl 40(%ebp)
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll __divti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl 40(%ebp), %edx
+; X86-NEXT:    movl 36(%ebp), %edx
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    sarl $31, %esi
-; X86-NEXT:    movl 24(%ebp), %ecx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 20(%ebp), %ecx
 ; X86-NEXT:    movl %ecx, %edi
 ; X86-NEXT:    sarl $31, %edi
 ; X86-NEXT:    leal (%ecx,%ecx), %eax
 ; X86-NEXT:    shrl $31, %ecx
 ; X86-NEXT:    shldl $31, %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %ebx
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
@@ -949,27 +921,51 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    calll __modti3
 ; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll __divti3
 ; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl 32(%ebp), %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl 16(%ebp), %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    leal (%ecx,%ecx), %eax
+; X86-NEXT:    shrl $31, %ecx
+; X86-NEXT:    shldl $31, %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl 36(%ebp)
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __modti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl %eax
@@ -977,327 +973,282 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    subl $1, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, %edx
-; X86-NEXT:    sbbl $0, %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %al
 ; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    sets %bl
-; X86-NEXT:    testl %edi, %edi
-; X86-NEXT:    sets %bh
-; X86-NEXT:    xorb %bl, %bh
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sets %dl
+; X86-NEXT:    xorb %al, %dl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl %eax, %ebx
 ; X86-NEXT:    setne %al
-; X86-NEXT:    testb %bh, %al
-; X86-NEXT:    cmovel %esi, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb %dl, %al
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    andl %edi, %eax
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    cmovsl %ecx, %ebx
+; X86-NEXT:    movl $-1, %esi
+; X86-NEXT:    cmovsl (%esp), %esi # 4-byte Folded Reload
+; X86-NEXT:    negl %edi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    orl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovel %edi, %ecx
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    cmovsl %esi, %edi
+; X86-NEXT:    movl $-1, %esi
+; X86-NEXT:    cmovsl %esi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    andl %ebx, %edx
+; X86-NEXT:    cmpl $-1, %edx
+; X86-NEXT:    cmovnel %eax, %edx
+; X86-NEXT:    cmovel %ecx, %edi
+; X86-NEXT:    shrdl $1, %edx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __divti3
+; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    subl $1, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    sbbl $0, %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    sbbl $0, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    sets %bl
-; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    sets %bh
-; X86-NEXT:    xorb %bl, %bh
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %edi
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %al
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    sets %dl
+; X86-NEXT:    xorb %al, %dl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl %eax, %ebx
 ; X86-NEXT:    setne %al
-; X86-NEXT:    testb %bh, %al
-; X86-NEXT:    cmovel %esi, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel %edx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testb %dl, %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    andl %ecx, %ebx
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    cmovsl %edi, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovsl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    orl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    cmovnel %eax, %ecx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovel %edi, %ebx
+; X86-NEXT:    cmpl $-1, %ebx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    cmovsl %edi, %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovsl %ecx, %ebx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    andl %edx, %esi
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    cmovnel %ebx, %esi
+; X86-NEXT:    cmovel %eax, %edi
+; X86-NEXT:    shrdl $1, %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    subl $1, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, %esi
-; X86-NEXT:    sbbl $0, %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %ebx
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    sets %al
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    testl %ecx, %ecx
-; X86-NEXT:    sets %bl
-; X86-NEXT:    xorb %al, %bl
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    pushl 28(%ebp)
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __modti3
-; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %dl
+; X86-NEXT:    xorb %al, %dl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl %eax, %esi
 ; X86-NEXT:    setne %al
-; X86-NEXT:    testb %bl, %al
-; X86-NEXT:    cmovel %edi, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testb %dl, %al
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovsl %ebx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %edi
+; X86-NEXT:    cmovsl %edx, %edi
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    sbbl %edx, %edx
+; X86-NEXT:    orl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel %edi, %edx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovel %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    cmovsl %edi, %edx
+; X86-NEXT:    movl $-1, %edi
+; X86-NEXT:    cmovsl %edi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    andl %ebx, %esi
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    cmovnel %eax, %esi
+; X86-NEXT:    cmovel %ecx, %edx
+; X86-NEXT:    shrdl $1, %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    subl $1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    subl $1, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %edx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %ecx
 ; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    sets %bl
+; X86-NEXT:    sets %al
 ; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    sets %bh
-; X86-NEXT:    xorb %bl, %bh
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sets %bl
+; X86-NEXT:    xorb %al, %bl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl %eax, %edi
 ; X86-NEXT:    setne %al
-; X86-NEXT:    testb %bh, %al
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel %esi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb %bl, %al
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    testl %edx, %edx
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovsl %edx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    cmovsl %ecx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    sarl $31, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    cmovel %eax, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmovsl %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    sarl $31, %edi
-; X86-NEXT:    movl %edi, %ecx
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmovsl %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    movl %ecx, %ebx
 ; X86-NEXT:    sarl $31, %ebx
 ; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    andl %esi, %eax
 ; X86-NEXT:    testl %ecx, %ecx
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovsl %ecx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    movl $-1, %esi
-; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    sarl $31, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    andl %eax, %ebx
-; X86-NEXT:    negl %eax
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    cmovnel %esi, %ecx
-; X86-NEXT:    movl $0, %edx
-; X86-NEXT:    cmovel %edx, %ebx
-; X86-NEXT:    cmpl $-1, %ebx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovsl %ecx, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %edi
+; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    negl %esi
 ; X86-NEXT:    movl $0, %esi
-; X86-NEXT:    cmovel %ecx, %esi
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    cmovsl %edx, %ecx
-; X86-NEXT:    movl $-1, %edx
-; X86-NEXT:    cmovsl %edx, %ebx
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel %edi, %esi
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovel %edi, %eax
 ; X86-NEXT:    cmpl $-1, %eax
-; X86-NEXT:    cmovel %esi, %ecx
-; X86-NEXT:    cmovnel %ebx, %eax
-; X86-NEXT:    shldl $31, %ecx, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    andl %eax, %edi
-; X86-NEXT:    negl %eax
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl $0, %esi
-; X86-NEXT:    cmovel %esi, %edi
-; X86-NEXT:    cmpl $-1, %edi
-; X86-NEXT:    movl $0, %edx
-; X86-NEXT:    cmovel %eax, %edx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    testl %ecx, %ecx
-; X86-NEXT:    cmovsl %esi, %eax
-; X86-NEXT:    movl $-1, %ebx
-; X86-NEXT:    cmovsl %ebx, %edi
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    cmpl $-1, %ecx
-; X86-NEXT:    cmovel %edx, %eax
-; X86-NEXT:    cmovnel %edi, %ecx
-; X86-NEXT:    shldl $31, %eax, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    andl %eax, %edx
-; X86-NEXT:    negl %eax
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    cmovel %esi, %edx
-; X86-NEXT:    cmpl $-1, %edx
 ; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    cmovsl %esi, %eax
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    cmovsl %edi, %esi
 ; X86-NEXT:    movl $-1, %edi
-; X86-NEXT:    cmovsl %edi, %edx
+; X86-NEXT:    cmovsl %edi, %eax
+; X86-NEXT:    andl (%esp), %ebx # 4-byte Folded Reload
 ; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X86-NEXT:    cmpl $-1, %ebx
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    cmovnel %edx, %ebx
-; X86-NEXT:    shldl $31, %eax, %ebx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT:    andl %eax, %edi
-; X86-NEXT:    negl %eax
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    cmovel %esi, %edi
-; X86-NEXT:    cmpl $-1, %edi
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    testl %edx, %edx
-; X86-NEXT:    cmovsl %esi, %eax
-; X86-NEXT:    movl $-1, %esi
-; X86-NEXT:    cmovsl %esi, %edi
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT:    andl %edx, %esi
-; X86-NEXT:    cmpl $-1, %esi
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    cmovnel %edi, %esi
-; X86-NEXT:    shldl $31, %eax, %esi
+; X86-NEXT:    cmovnel %eax, %ebx
+; X86-NEXT:    cmovel %ecx, %esi
+; X86-NEXT:    shrdl $1, %ebx, %esi
 ; X86-NEXT:    movl 8(%ebp), %eax
 ; X86-NEXT:    movl %esi, 12(%eax)
-; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    movl %ecx, 4(%eax)
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload


        


More information about the llvm-commits mailing list