[llvm] 2a5e1da - Revert "[ARM] Stop gluing ALU nodes to branches / selects" (#118232)

via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 1 14:02:31 PST 2024


Author: Martin Storsjö
Date: 2024-12-02T00:02:25+02:00
New Revision: 2a5e1da57a42fa2fc081bbc11970871a1eecb3b3

URL: https://github.com/llvm/llvm-project/commit/2a5e1da57a42fa2fc081bbc11970871a1eecb3b3
DIFF: https://github.com/llvm/llvm-project/commit/2a5e1da57a42fa2fc081bbc11970871a1eecb3b3.diff

LOG: Revert "[ARM] Stop gluing ALU nodes to branches / selects" (#118232)

Reverts llvm/llvm-project#116970.

This change broke Wine compiled for armv7, causing segfaults when
starting Wine. See llvm/llvm-project#116970 for more detailed discussion
about the issue.

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/lib/Target/ARM/ARMISelLowering.h
    llvm/lib/Target/ARM/ARMInstrFormats.td
    llvm/lib/Target/ARM/ARMInstrInfo.td
    llvm/lib/Target/ARM/ARMInstrThumb.td
    llvm/lib/Target/ARM/ARMInstrThumb2.td
    llvm/lib/Target/ARM/ARMInstrVFP.td
    llvm/test/CodeGen/ARM/add-like-or.ll
    llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
    llvm/test/CodeGen/ARM/atomic-64bit.ll
    llvm/test/CodeGen/ARM/atomic-ops-v8.ll
    llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
    llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
    llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
    llvm/test/CodeGen/ARM/bfi.ll
    llvm/test/CodeGen/ARM/cmov_fp16.ll
    llvm/test/CodeGen/ARM/cse-call.ll
    llvm/test/CodeGen/ARM/cttz.ll
    llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
    llvm/test/CodeGen/ARM/fcmp-xo.ll
    llvm/test/CodeGen/ARM/fpclamptosat.ll
    llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
    llvm/test/CodeGen/ARM/fpscr-multi-use.ll
    llvm/test/CodeGen/ARM/fptoi-sat-store.ll
    llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
    llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
    llvm/test/CodeGen/ARM/funnel-shift-rot.ll
    llvm/test/CodeGen/ARM/funnel-shift.ll
    llvm/test/CodeGen/ARM/ifcvt1.ll
    llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
    llvm/test/CodeGen/ARM/neon_vabd.ll
    llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
    llvm/test/CodeGen/ARM/sadd_sat.ll
    llvm/test/CodeGen/ARM/sadd_sat_plus.ll
    llvm/test/CodeGen/ARM/select.ll
    llvm/test/CodeGen/ARM/select_const.ll
    llvm/test/CodeGen/ARM/shift-i64.ll
    llvm/test/CodeGen/ARM/ssub_sat.ll
    llvm/test/CodeGen/ARM/ssub_sat_plus.ll
    llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
    llvm/test/CodeGen/ARM/uadd_sat.ll
    llvm/test/CodeGen/ARM/uadd_sat_plus.ll
    llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
    llvm/test/CodeGen/ARM/usub_sat.ll
    llvm/test/CodeGen/ARM/usub_sat_plus.ll
    llvm/test/CodeGen/ARM/vselect_imax.ll
    llvm/test/CodeGen/ARM/wide-compares.ll
    llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
    llvm/test/CodeGen/Thumb/select.ll
    llvm/test/CodeGen/Thumb/smul_fix_sat.ll
    llvm/test/CodeGen/Thumb/stack-guard-xo.ll
    llvm/test/CodeGen/Thumb/umul_fix_sat.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
    llvm/test/CodeGen/Thumb2/float-ops.ll
    llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
    llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
    llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
    llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
    llvm/test/CodeGen/Thumb2/mve-fmas.ll
    llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
    llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
    llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
    llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
    llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
    llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
    llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
    llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
    llvm/test/CodeGen/Thumb2/mve-pred-or.ll
    llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
    llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
    llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
    llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
    llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
    llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
    llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index fd024ffdeefde4..73ee8cf81adcd6 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -111,6 +111,13 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 
+  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
+    const ConstantSDNode *CN = cast<ConstantSDNode>(N);
+    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
+    Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
+    return true;
+  }
+
   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
@@ -4116,15 +4123,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Chain = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
     SDValue N2 = N->getOperand(2);
-    SDValue Flags = N->getOperand(3);
+    SDValue N3 = N->getOperand(3);
+    SDValue InGlue = N->getOperand(4);
     assert(N1.getOpcode() == ISD::BasicBlock);
     assert(N2.getOpcode() == ISD::Constant);
+    assert(N3.getOpcode() == ISD::Register);
 
     unsigned CC = (unsigned)N2->getAsZExtVal();
 
-    if (Flags.getOpcode() == ARMISD::CMPZ) {
-      if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
-        SDValue Int = Flags.getOperand(0);
+    if (InGlue.getOpcode() == ARMISD::CMPZ) {
+      if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+        SDValue Int = InGlue.getOperand(0);
         uint64_t ID = Int->getConstantOperandVal(1);
 
         // Handle low-overhead loops.
@@ -4146,15 +4155,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
 
           ReplaceUses(N, LoopEnd);
           CurDAG->RemoveDeadNode(N);
-          CurDAG->RemoveDeadNode(Flags.getNode());
+          CurDAG->RemoveDeadNode(InGlue.getNode());
           CurDAG->RemoveDeadNode(Int.getNode());
           return;
         }
       }
 
       bool SwitchEQNEToPLMI;
-      SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
-      Flags = N->getOperand(3);
+      SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
+      InGlue = N->getOperand(4);
 
       if (SwitchEQNEToPLMI) {
         switch ((ARMCC::CondCodes)CC) {
@@ -4170,18 +4179,25 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     }
 
     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
-    Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
-    SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
-                     Chain.getValue(1)};
-    CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
+    SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
+    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+                                             MVT::Glue, Ops);
+    Chain = SDValue(ResNode, 0);
+    if (N->getNumValues() == 2) {
+      InGlue = SDValue(ResNode, 1);
+      ReplaceUses(SDValue(N, 1), InGlue);
+    }
+    ReplaceUses(SDValue(N, 0),
+                SDValue(Chain.getNode(), Chain.getResNo()));
+    CurDAG->RemoveDeadNode(N);
     return;
   }
 
   case ARMISD::CMPZ: {
     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
     //   This allows us to avoid materializing the expensive negative constant.
-    //   The CMPZ #0 is useless and will be peepholed away but we need to keep
-    //   it for its flags output.
+    //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
+    //   for its glue output.
     SDValue X = N->getOperand(0);
     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
@@ -4208,7 +4224,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
       }
       if (Add) {
         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
-        CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
+        CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
       }
     }
     // Other cases are autogenerated.
@@ -4216,11 +4232,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::CMOV: {
-    SDValue Flags = N->getOperand(3);
+    SDValue InGlue = N->getOperand(4);
 
-    if (Flags.getOpcode() == ARMISD::CMPZ) {
+    if (InGlue.getOpcode() == ARMISD::CMPZ) {
       bool SwitchEQNEToPLMI;
-      SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
+      SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
 
       if (SwitchEQNEToPLMI) {
         SDValue ARMcc = N->getOperand(2);
@@ -4237,9 +4253,10 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
         }
         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
-                         N->getOperand(3)};
+                         N->getOperand(3), N->getOperand(4)};
         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
       }
+
     }
     // Other cases are autogenerated.
     break;

diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c9250e4ed3422c..6b290135c5bcba 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4924,11 +4924,14 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
       LHS.getConstantOperandVal(1) < 31) {
     unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
-    SDValue Shift =
-        DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
-                    LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
+    SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
+                                DAG.getVTList(MVT::i32, MVT::i32),
+                                LHS.getOperand(0),
+                                DAG.getConstant(ShiftAmt, dl, MVT::i32));
+    SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+                                     Shift.getValue(1), SDValue());
     ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
-    return Shift.getValue(1);
+    return Chain.getValue(1);
   }
 
   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
@@ -4960,7 +4963,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     break;
   }
   ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
-  return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
+  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
 }
 
 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
@@ -4975,7 +4978,24 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
   else
     Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
                         FlagsVT, LHS);
-  return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
+  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+  unsigned Opc = Cmp.getOpcode();
+  SDLoc DL(Cmp);
+  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+  SDValue Flags = Cmp.getOperand(0);
+  assert((Flags.getOpcode() == ARMISD::CMPFP ||
+          Flags.getOpcode() == ARMISD::CMPFPw0) &&
+         "unexpected operand of FMSTAT");
+  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags);
 }
 
 // This function returns three things: the arithmetic computation itself
@@ -5003,7 +5023,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
   case ISD::SADDO:
     ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
     Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
     break;
   case ISD::UADDO:
     ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
@@ -5012,17 +5032,17 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ARMISD::ADDC, dl,
                         DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
                 .getValue(0);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
     break;
   case ISD::SSUBO:
     ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
     Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
     break;
   case ISD::USUBO:
     ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
     Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
     break;
   case ISD::UMULO:
     // We generate a UMUL_LOHI and then check if the high word is 0.
@@ -5030,7 +5050,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ISD::UMUL_LOHI, dl,
                         DAG.getVTList(Op.getValueType(), Op.getValueType()),
                         LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
                               DAG.getConstant(0, dl, MVT::i32));
     Value = Value.getValue(0); // We only want the low 32 bits for the result.
     break;
@@ -5041,7 +5061,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ISD::SMUL_LOHI, dl,
                         DAG.getVTList(Op.getValueType(), Op.getValueType()),
                         LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
                               DAG.getNode(ISD::SRA, dl, Op.getValueType(),
                                           Value.getValue(0),
                                           DAG.getConstant(31, dl, MVT::i32)));
@@ -5061,14 +5081,15 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
   SDValue Value, OverflowCmp;
   SDValue ARMcc;
   std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDLoc dl(Op);
   // We use 0 and 1 as false and true values.
   SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
   SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
   EVT VT = Op.getValueType();
 
-  SDValue Overflow =
-      DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
+  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
+                                 ARMcc, CCR, OverflowCmp);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
   return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -5205,9 +5226,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     SDValue Value, OverflowCmp;
     SDValue ARMcc;
     std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     EVT VT = Op.getValueType();
 
-    return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
+    return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
+                   OverflowCmp, DAG);
   }
 
   // Convert:
@@ -5235,9 +5258,14 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
         False = SelectTrue;
       }
 
-      if (True.getNode() && False.getNode())
-        return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
-                       Cond.getOperand(3), DAG);
+      if (True.getNode() && False.getNode()) {
+        EVT VT = Op.getValueType();
+        SDValue ARMcc = Cond.getOperand(2);
+        SDValue CCR = Cond.getOperand(3);
+        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+        assert(True.getValueType() == VT);
+        return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
+      }
     }
   }
 
@@ -5302,8 +5330,8 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 }
 
 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
-                                   SDValue TrueVal, SDValue ARMcc,
-                                   SDValue Flags, SelectionDAG &DAG) const {
+                                   SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+                                   SDValue Cmp, SelectionDAG &DAG) const {
   if (!Subtarget->hasFP64() && VT == MVT::f64) {
     FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
                            DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
@@ -5316,13 +5344,15 @@ SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
     SDValue FalseHigh = FalseVal.getValue(1);
 
     SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
-                              ARMcc, Flags);
+                              ARMcc, CCR, Cmp);
     SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
-                               ARMcc, Flags);
+                               ARMcc, CCR, duplicateCmp(Cmp, DAG));
 
     return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+  } else {
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+                       Cmp);
   }
-  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
 }
 
 static bool isGTorGE(ISD::CondCode CC) {
@@ -5595,11 +5625,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     }
 
     SDValue ARMcc;
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
     // Choose GE over PL, which vsel does now support
     if (ARMcc->getAsZExtVal() == ARMCC::PL)
       ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
-    return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
+    return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
@@ -5629,10 +5660,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
-  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
   if (CondCode2 != ARMCC::AL) {
     SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
-    Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
+    // FIXME: Needs another CMP because flag can have but one use.
+    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+    Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
   }
   return Result;
 }
@@ -5733,8 +5767,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
                         bitcastf32Toi32(RHS, DAG), Mask);
       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
-      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
-                         Cmp);
+      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                         Chain, Dest, ARMcc, CCR, Cmp);
     }
 
     SDValue LHS1, LHS2;
@@ -5745,8 +5780,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
     ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
-    return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
+    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
   }
 
   return SDValue();
@@ -5780,8 +5816,9 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
         (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
     CondCode = ARMCC::getOppositeCondition(CondCode);
     ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
 
-    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
                        OverflowCmp);
   }
 
@@ -5833,15 +5870,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       CondCode = ARMCC::getOppositeCondition(CondCode);
       ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
     }
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
 
-    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
                        OverflowCmp);
   }
 
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMcc;
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
-    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                       Chain, Dest, ARMcc, CCR, Cmp);
   }
 
   if (getTargetMachine().Options.UnsafeFPMath &&
@@ -5856,12 +5896,14 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
-  SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
-  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
   if (CondCode2 != ARMCC::AL) {
     ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
-    SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
-    Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
+    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
   }
   return Res;
 }
@@ -6366,6 +6408,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
   SDValue ARMcc;
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
 
   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -6380,8 +6423,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
                             ISD::SETGE, ARMcc, DAG, dl);
-  SDValue Lo =
-      DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
+                           ARMcc, CCR, CmpLo);
 
   SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   SDValue HiBigShift = Opc == ISD::SRA
@@ -6390,8 +6433,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
                            : DAG.getConstant(0, dl, VT);
   SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
                             ISD::SETGE, ARMcc, DAG, dl);
-  SDValue Hi =
-      DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+                           ARMcc, CCR, CmpHi);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
@@ -6409,6 +6452,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
   SDValue ARMcc;
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
 
   assert(Op.getOpcode() == ISD::SHL_PARTS);
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -6422,14 +6466,14 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
   SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
                             ISD::SETGE, ARMcc, DAG, dl);
-  SDValue Hi =
-      DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+                           ARMcc, CCR, CmpHi);
 
   SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
                           ISD::SETGE, ARMcc, DAG, dl);
   SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
-                           DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
+                           DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
@@ -7016,8 +7060,11 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
   SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
   SDValue ARMcc = DAG.getConstant(
       IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
+                                   Cmp.getValue(1), SDValue());
   return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
-                     Cmp.getValue(1));
+                     CCR, Chain.getValue(1));
 }
 
 /// isVMOVModifiedImm - Check if the specified splat value corresponds to a
@@ -10566,14 +10613,21 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
+  // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
+  // in CMPFP and CMPFPE, but instead it should be made explicit by these
+  // instructions using a chain instead of glue. This would also fix the problem
+  // here (and also in LowerSELECT_CC) where we generate two comparisons when
+  // CondCode2 != AL.
   SDValue True = DAG.getConstant(1, dl, VT);
   SDValue False =  DAG.getConstant(0, dl, VT);
   SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
-  SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
+  SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
   if (CondCode2 != ARMCC::AL) {
     ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
-    Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
+    Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
+    Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
   }
   return DAG.getMergeValues({Result, Chain}, dl);
 }
@@ -15003,7 +15057,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
 }
 
 // Check that N is CMPZ(CSINC(0, 0, CC, X)),
-//              or CMPZ(CMOV(1, 0, CC, X))
+//              or CMPZ(CMOV(1, 0, CC, $cpsr, X))
 // return X if valid.
 static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
   if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
@@ -15027,22 +15081,22 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
   if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
       isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
     CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
-    return CSInc.getOperand(3);
+    return CSInc.getOperand(4);
   }
   if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
       isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
     CC = ARMCC::getOppositeCondition(
         (ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
-    return CSInc.getOperand(3);
+    return CSInc.getOperand(4);
   }
   return SDValue();
 }
 
 static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
   // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
-  //       t92: flags = ARMISD::CMPZ t74, 0
+  //       t92: glue = ARMISD::CMPZ t74, 0
   //     t93: i32 = ARMISD::CSINC 0, 0, 1, t92
-  //   t96: flags = ARMISD::CMPZ t93, 0
+  //   t96: glue = ARMISD::CMPZ t93, 0
   // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
   ARMCC::CondCodes Cond;
   if (SDValue C = IsCMPZCSINC(N, Cond))
@@ -18133,7 +18187,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
   SDValue Op0 = CMOV->getOperand(0);
   SDValue Op1 = CMOV->getOperand(1);
   auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
-  SDValue CmpZ = CMOV->getOperand(3);
+  SDValue CmpZ = CMOV->getOperand(4);
 
   // The compare must be against zero.
   if (!isNullConstant(CmpZ->getOperand(1)))
@@ -18377,11 +18431,12 @@ static SDValue PerformHWLoopCombine(SDNode *N,
 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
 SDValue
 ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
-  SDValue Cmp = N->getOperand(3);
+  SDValue Cmp = N->getOperand(4);
   if (Cmp.getOpcode() != ARMISD::CMPZ)
     // Only looking at NE cases.
     return SDValue();
 
+  EVT VT = N->getValueType(0);
   SDLoc dl(N);
   SDValue LHS = Cmp.getOperand(0);
   SDValue RHS = Cmp.getOperand(1);
@@ -18390,17 +18445,17 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
   SDValue ARMcc = N->getOperand(2);
   ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
 
-  // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
-  // -> (brcond Chain BB CC Flags)
+  // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
+  // -> (brcond Chain BB CC CPSR Cmp)
   if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
       LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
       LHS->getOperand(0)->hasOneUse() &&
       isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
       isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
       isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
-    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
-                       LHS->getOperand(0)->getOperand(2),
-                       LHS->getOperand(0)->getOperand(3));
+    return DAG.getNode(
+        ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
+        LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
   }
 
   return SDValue();
@@ -18409,7 +18464,7 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
 SDValue
 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
-  SDValue Cmp = N->getOperand(3);
+  SDValue Cmp = N->getOperand(4);
   if (Cmp.getOpcode() != ARMISD::CMPZ)
     // Only looking at EQ and NE cases.
     return SDValue();
@@ -18449,38 +18504,42 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   /// FIXME: Turn this into a target neutral optimization?
   SDValue Res;
   if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
-    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+                      N->getOperand(3), Cmp);
   } else if (CC == ARMCC::EQ && TrueVal == RHS) {
     SDValue ARMcc;
     SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
-    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+                      N->getOperand(3), NewCmp);
   }
 
-  // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
-  // -> (cmov F T CC Flags)
+  // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
+  // -> (cmov F T CC CPSR Cmp)
   if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
       isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
       isNullConstant(RHS)) {
     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
-                       LHS->getOperand(2), LHS->getOperand(3));
+                       LHS->getOperand(2), LHS->getOperand(3),
+                       LHS->getOperand(4));
   }
 
   if (!VT.isInteger())
       return SDValue();
 
   // Fold away an unneccessary CMPZ/CMOV
-  // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
-  // if C1==EQ -> CMOV A, B, C2, D
-  // if C1==NE -> CMOV A, B, NOT(C2), D
+  // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
+  // if C1==EQ -> CMOV A, B, C2, $cpsr, D
+  // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
   if (N->getConstantOperandVal(2) == ARMCC::EQ ||
       N->getConstantOperandVal(2) == ARMCC::NE) {
     ARMCC::CondCodes Cond;
-    if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
+    if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
       if (N->getConstantOperandVal(2) == ARMCC::NE)
         Cond = ARMCC::getOppositeCondition(Cond);
       return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
                          N->getOperand(1),
-                         DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
+                         DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
+                         N->getOperand(3), C);
     }
   }
 
@@ -18520,8 +18579,10 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
       // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
       SDValue Sub =
           DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+      SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+                                          Sub.getValue(1), SDValue());
       Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
-                        Sub.getValue(1));
+                        N->getOperand(3), CPSRGlue.getValue(1));
       FalseVal = Sub;
     }
   } else if (isNullConstant(TrueVal)) {
@@ -18532,9 +18593,11 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
       // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
       SDValue Sub =
           DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+      SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+                                          Sub.getValue(1), SDValue());
       Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
                         DAG.getConstant(ARMCC::NE, dl, MVT::i32),
-                        Sub.getValue(1));
+                        N->getOperand(3), CPSRGlue.getValue(1));
       FalseVal = Sub;
     }
   }

diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 4fa600e0cfcc40..344a0ad91e5178 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -978,11 +978,13 @@ class VectorType;
     bool isUnsupportedFloatingType(EVT VT) const;
 
     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
-                    SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const;
+                    SDValue ARMcc, SDValue CCR, SDValue Cmp,
+                    SelectionDAG &DAG) const;
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                       SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
     SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
                       const SDLoc &dl, bool Signaling = false) const;
+    SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
 
     SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
 

diff  --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 041601748b1f73..d0678f378da1ea 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -167,6 +167,16 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
   let DecoderMethod = "DecodePredicateOperand";
 }
 
+// Selectable predicate operand for CMOV instructions. We can't use a normal
+// predicate because the default values interfere with instruction selection. In
+// all other respects it is identical though: pseudo-instruction expansion
+// relies on the MachineOperands being compatible.
+def cmovpred : Operand<i32>, PredicateOp,
+               ComplexPattern<i32, 2, "SelectCMOVPred"> {
+  let MIOperandInfo = (ops i32imm, i32imm);
+  let PrintMethod = "printPredicateOperand";
+}
+
 // Conditional code result for instructions whose 's' bit is set, e.g. subs.
 def CCOutOperand : AsmOperandClass {
   let Name = "CCOut";
@@ -1124,9 +1134,6 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
 class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsARM, HasV6];
 }
-class ARMV6T2Pat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsARM, HasV6T2];
-}
 class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [HasVFP2];
 }

diff  --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 718cb964ab7c3b..db38b43279b866 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -14,9 +14,6 @@
 // ARM specific DAG Nodes.
 //
 
-/// Value type used for "condition code" operands.
-defvar CondCodeVT = i32;
-
 /// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
 defvar FlagsVT = i32;
 
@@ -32,19 +29,12 @@ def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
 
 def SDT_ARMcall    : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
 
-def SDT_ARMCMov : SDTypeProfile<1, 4, [
-  /* any */                // result
-  SDTCisSameAs<1, 0>,      // value on false
-  SDTCisSameAs<2, 0>,      // value on true
-  SDTCisVT<3, CondCodeVT>, // condition code
-  SDTCisVT<4, FlagsVT>,    // in flags
-]>;
+def SDT_ARMCMov    : SDTypeProfile<1, 3,
+                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                    SDTCisVT<3, i32>]>;
 
-def SDT_ARMBrcond : SDTypeProfile<0, 2, [
-  SDTCisVT<0, OtherVT>,    // target basic block
-  SDTCisVT<1, CondCodeVT>, // condition code
-  SDTCisVT<2, FlagsVT>,    // in flags
-]>;
+def SDT_ARMBrcond  : SDTypeProfile<0, 2,
+                                   [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
 
 def SDT_ARMBrJT    : SDTypeProfile<0, 2,
                                   [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -63,11 +53,7 @@ def SDT_ARMAnd     : SDTypeProfile<1, 2,
                                    [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                     SDTCisVT<2, i32>]>;
 
-def SDT_ARMCmp : SDTypeProfile<1, 2, [
-  SDTCisVT<0, FlagsVT>, // out flags
-  SDTCisInt<1>,         // lhs
-  SDTCisSameAs<2, 1>    // rhs
-]>;
+def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
 
 def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
                                           SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
@@ -138,17 +124,15 @@ def ARMSmlaldx       : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
 def ARMSmlsld        : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
 def ARMSmlsldx       : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
 
-def SDT_ARMCSel : SDTypeProfile<1, 4, [
-  /* any */                // result
-  SDTCisSameAs<1, 0>,      // lhs
-  SDTCisSameAs<2, 0>,      // rhs
-  SDTCisVT<3, CondCodeVT>, // condition code
-  SDTCisVT<3, FlagsVT>     // in flags
-]>;
+def SDT_ARMCSel      : SDTypeProfile<1, 3,
+                                   [SDTCisSameAs<0, 1>,
+                                    SDTCisSameAs<0, 2>,
+                                    SDTCisInt<3>,
+                                    SDTCisVT<3, i32>]>;
 
-def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>;
-def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>;
-def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>;
+def ARMcsinv         : SDNode<"ARMISD::CSINV", SDT_ARMCSel, [SDNPOptInGlue]>;
+def ARMcsneg         : SDNode<"ARMISD::CSNEG", SDT_ARMCSel, [SDNPOptInGlue]>;
+def ARMcsinc         : SDNode<"ARMISD::CSINC", SDT_ARMCSel, [SDNPOptInGlue]>;
 
 def SDT_MulHSR       : SDTypeProfile<1, 3, [SDTCisVT<0,i32>,
                                             SDTCisSameAs<0, 1>,
@@ -189,13 +173,15 @@ def ARMseretglue     : SDNode<"ARMISD::SERET_GLUE", SDTNone,
                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def ARMintretglue    : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall,
                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov>;
+def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+                              [SDNPInGlue]>;
 
 def ARMssat   : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
 
 def ARMusat   : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
 
-def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>;
+def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+                              [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
 
 def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
                               [SDNPHasChain]>;
@@ -205,11 +191,14 @@ def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
 def ARMBcci64        : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
                               [SDNPHasChain]>;
 
-def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp>;
+def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+                              [SDNPOutGlue]>;
 
-def ARMcmn           : SDNode<"ARMISD::CMN", SDT_ARMCmp>;
+def ARMcmn           : SDNode<"ARMISD::CMN", SDT_ARMCmp,
+                              [SDNPOutGlue]>;
 
-def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>;
+def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+                              [SDNPOutGlue, SDNPCommutative]>;
 
 def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
 
@@ -1787,7 +1776,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
                      string rrDecoderMethod = ""> {
   def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
                opc, "\t$Rn, $imm",
-               [(set CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>,
+               [(opnode GPR:$Rn, mod_imm:$imm)]>,
            Sched<[WriteCMP, ReadALU]> {
     bits<4> Rn;
     bits<12> imm;
@@ -1801,7 +1790,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   }
   def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
                opc, "\t$Rn, $Rm",
-               [(set CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+               [(opnode GPR:$Rn, GPR:$Rm)]>,
            Sched<[WriteCMP, ReadALU, ReadALU]> {
     bits<4> Rn;
     bits<4> Rm;
@@ -1819,7 +1808,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   def rsi : AI1<opcod, (outs),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
                opc, "\t$Rn, $shift",
-               [(set CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+               [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
             Sched<[WriteCMPsi, ReadALU]> {
     bits<4> Rn;
     bits<12> shift;
@@ -1836,7 +1825,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   def rsr : AI1<opcod, (outs),
                (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
                opc, "\t$Rn, $shift",
-               [(set CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift))]>,
+               [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
             Sched<[WriteCMPsr, ReadALU]> {
     bits<4> Rn;
     bits<12> shift;
@@ -4954,7 +4943,7 @@ def : ARMPat<(ARMcmpZ so_reg_reg:$rhs, 0),
 let isCompare = 1, Defs = [CPSR] in {
 def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
                 "cmn", "\t$Rn, $imm",
-                [(set CPSR, (ARMcmn GPR:$Rn, mod_imm:$imm))]>,
+                [(ARMcmn GPR:$Rn, mod_imm:$imm)]>,
                 Sched<[WriteCMP, ReadALU]> {
   bits<4> Rn;
   bits<12> imm;
@@ -4970,8 +4959,8 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
 // CMN register-register/shift
 def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
                  "cmn", "\t$Rn, $Rm",
-                 [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
-                   GPR:$Rn, GPR:$Rm))]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
+                 [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+                   GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
   bits<4> Rn;
   bits<4> Rm;
   let isCommutable = 1;
@@ -4988,8 +4977,8 @@ def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
 def CMNzrsi : AI1<0b1011, (outs),
                   (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
                   "cmn", "\t$Rn, $shift",
-                  [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
-                    GPR:$Rn, so_reg_imm:$shift))]>,
+                  [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+                    GPR:$Rn, so_reg_imm:$shift)]>,
                     Sched<[WriteCMPsi, ReadALU]> {
   bits<4> Rn;
   bits<12> shift;
@@ -5007,8 +4996,8 @@ def CMNzrsi : AI1<0b1011, (outs),
 def CMNzrsr : AI1<0b1011, (outs),
                   (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
                   "cmn", "\t$Rn, $shift",
-                  [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
-                    GPRnopc:$Rn, so_reg_reg:$shift))]>,
+                  [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+                    GPRnopc:$Rn, so_reg_reg:$shift)]>,
                     Sched<[WriteCMPsr, ReadALU]> {
   bits<4> Rn;
   bits<12> shift;
@@ -5063,74 +5052,65 @@ let hasSideEffects = 0 in {
 
 let isCommutable = 1, isSelect = 1 in
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
-                           (ins GPR:$false, GPR:$Rm, pred:$p),
-                           4, IIC_iCMOVr, []>,
+                           (ins GPR:$false, GPR:$Rm, cmovpred:$p),
+                           4, IIC_iCMOVr,
+                           [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm,
+                                                   cmovpred:$p))]>,
              RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
-                            (ins GPR:$false, so_reg_imm:$shift, pred:$p),
-                            4, IIC_iCMOVsr, []>,
+                            (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p),
+                            4, IIC_iCMOVsr,
+                            [(set GPR:$Rd,
+                                  (ARMcmov GPR:$false, so_reg_imm:$shift,
+                                           cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
-                            (ins GPR:$false, so_reg_reg:$shift, pred:$p),
-                           4, IIC_iCMOVsr, []>,
+                            (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p),
+                           4, IIC_iCMOVsr,
+  [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+                            cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 
 let isMoveImm = 1 in
 def MOVCCi16
     : ARMPseudoInst<(outs GPR:$Rd),
-                    (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
-                    4, IIC_iMOVi, []>,
+                    (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+                    4, IIC_iMOVi,
+                    [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm,
+                                            cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
       Sched<[WriteALU]>;
 
 let isMoveImm = 1 in
 def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
-                           (ins GPR:$false, mod_imm:$imm, pred:$p),
-                           4, IIC_iCMOVi, []>,
+                           (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
+                           4, IIC_iCMOVi,
+                           [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm,
+                                                   cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 // Two instruction predicate mov immediate.
 let isMoveImm = 1 in
 def MOVCCi32imm
     : ARMPseudoInst<(outs GPR:$Rd),
-                    (ins GPR:$false, i32imm:$src, pred:$p),
-                    8, IIC_iCMOVix2, []>,
+                    (ins GPR:$false, i32imm:$src, cmovpred:$p),
+                    8, IIC_iCMOVix2,
+                    [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src,
+                                            cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
 
 let isMoveImm = 1 in
 def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
-                           (ins GPR:$false, mod_imm:$imm, pred:$p),
-                           4, IIC_iCMOVi, []>,
+                           (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
+                           4, IIC_iCMOVi,
+                           [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm,
+                                                   cmovpred:$p))]>,
                 RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 } // hasSideEffects
 
-// The following patterns have to be defined out-of-line because the number
-// of instruction operands does not match the number of SDNode operands
-// (`pred` counts as one operand).
-
-def : ARMPat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR),
-             (MOVCCr $false, $Rm, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, so_reg_imm:$shift, imm:$cc, CPSR),
-             (MOVCCsi $false, so_reg_imm:$shift, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, so_reg_reg:$shift, imm:$cc, CPSR),
-             (MOVCCsr $false, so_reg_reg:$shift, imm:$cc, CPSR)>;
-
-def : ARMV6T2Pat<(ARMcmov i32:$false, imm0_65535:$imm, imm:$cc, CPSR),
-                 (MOVCCi16 $false, imm0_65535:$imm, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, mod_imm:$imm, imm:$cc, CPSR),
-             (MOVCCi $false, mod_imm:$imm, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, mod_imm_not:$imm, imm:$cc, CPSR),
-             (MVNCCi $false, mod_imm_not:$imm, imm:$cc, CPSR)>;
-
-def : ARMV6T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR),
-                 (MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>;
 
 //===----------------------------------------------------------------------===//
 // Atomic operations intrinsics

diff  --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index cc7fc743fe4f92..b92f42874bbddb 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -23,7 +23,8 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
 }]>;
 def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; }
-def imm_sr : Operand<i32>, ImmLeaf<i32, [{
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+  uint64_t Imm = N->getZExtValue();
   return Imm > 0 && Imm <= 32;
 }], imm_sr_XFORM> {
   let PrintMethod = "printThumbSRImm";
@@ -1107,14 +1108,13 @@ let isCompare = 1, Defs = [CPSR] in {
 //  T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
 //               IIC_iCMPr,
 //               "cmn", "\t$lhs, $rhs",
-//               [(set CPSR, (ARMcmp tGPR:$lhs, (ineg tGPR:$rhs)))]>;
+//               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
 
 def tCMNz :                     // A8.6.33
   T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
                IIC_iCMPr,
                "cmn", "\t$Rn, $Rm",
-               [(set CPSR, (ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm)))]>,
-  Sched<[WriteCMP]>;
+               [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>;
 
 } // isCompare = 1, Defs = [CPSR]
 
@@ -1122,7 +1122,7 @@ def tCMNz :                     // A8.6.33
 let isCompare = 1, Defs = [CPSR] in {
 def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
                   "cmp", "\t$Rn, $imm8",
-                  [(set CPSR, (ARMcmp tGPR:$Rn, imm0_255:$imm8))]>,
+                  [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
              T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> {
   // A8.6.35
   bits<3> Rn;
@@ -1136,7 +1136,7 @@ def tCMPr :                     // A8.6.36 T1
   T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
                IIC_iCMPr,
                "cmp", "\t$Rn, $Rm",
-               [(set CPSR, (ARMcmp tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteCMP]>;
+               [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>;
 
 def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
                    "cmp", "\t$Rn, $Rm", []>,
@@ -1423,7 +1423,7 @@ let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
 def tTST :                      // A8.6.230
   T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
                "tst", "\t$Rn, $Rm",
-               [(set CPSR, (ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0))]>,
+               [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
                Sched<[WriteALU]>;
 
 // A8.8.247  UDF - Undefined (Encoding T1)
@@ -1466,11 +1466,9 @@ def tUXTH :                     // A8.6.264
 // Expanded after instruction selection into a branch sequence.
 let usesCustomInserter = 1 in  // Expanded after instruction selection.
   def tMOVCCr_pseudo :
-  PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$p),
-             NoItinerary, []>;
-
-def : Pat<(ARMcmov tGPR:$false, tGPR:$true, imm:$cc, CPSR),
-          (tMOVCCr_pseudo $false, $true, imm:$cc, CPSR)>;
+  PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p),
+             NoItinerary,
+             [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>;
 
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.

diff  --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 99617e53d657a9..aa5c0a58897688 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1144,8 +1144,7 @@ let isCompare = 1, Defs = [CPSR] in {
    def ri : T2OneRegCmpImm<
                 (outs), (ins LHSGPR:$Rn, t2_so_imm:$imm), iii,
                 opc, ".w\t$Rn, $imm",
-                [(set CPSR, (opnode LHSGPR:$Rn, t2_so_imm:$imm))]>,
-            Sched<[WriteCMP]> {
+                [(opnode LHSGPR:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -1157,8 +1156,7 @@ let isCompare = 1, Defs = [CPSR] in {
    def rr : T2TwoRegCmp<
                 (outs), (ins LHSGPR:$Rn, rGPR:$Rm), iir,
                 opc, ".w\t$Rn, $Rm",
-                [(set CPSR, (opnode LHSGPR:$Rn, rGPR:$Rm))]>,
-            Sched<[WriteCMP]> {
+                [(opnode LHSGPR:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -1172,7 +1170,7 @@ let isCompare = 1, Defs = [CPSR] in {
    def rs : T2OneRegCmpShiftedReg<
                 (outs), (ins LHSGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
                 opc, ".w\t$Rn, $ShiftedRm",
-                [(set CPSR, (opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+                [(opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm)]>,
                 Sched<[WriteCMPsi]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -3479,7 +3477,7 @@ let isCompare = 1, Defs = [CPSR] in {
    def t2CMNri : T2OneRegCmpImm<
                 (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
                 "cmn", ".w\t$Rn, $imm",
-                [(set CPSR, (ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm)))]>,
+                [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>,
                 Sched<[WriteCMP, ReadALU]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
@@ -3492,9 +3490,8 @@ let isCompare = 1, Defs = [CPSR] in {
    def t2CMNzrr : T2TwoRegCmp<
                 (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
                 "cmn", ".w\t$Rn, $Rm",
-                [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
-                  GPRnopc:$Rn, rGPR:$Rm))]>,
-                  Sched<[WriteCMP, ReadALU, ReadALU]> {
+                [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+                  GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = 0b1000;
@@ -3508,8 +3505,8 @@ let isCompare = 1, Defs = [CPSR] in {
    def t2CMNzrs : T2OneRegCmpShiftedReg<
                 (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
                 "cmn", ".w\t$Rn, $ShiftedRm",
-                [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
-                  GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>,
+                [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+                  GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
                   Sched<[WriteCMPsi, ReadALU, ReadALU]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -3545,84 +3542,67 @@ let hasSideEffects = 0 in {
 
 let isCommutable = 1, isSelect = 1 in
 def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
-                            (ins rGPR:$false, rGPR:$Rm, pred:$p),
-                            4, IIC_iCMOVr, []>,
+                            (ins rGPR:$false, rGPR:$Rm, cmovpred:$p),
+                            4, IIC_iCMOVr,
+                            [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm,
+                                                     cmovpred:$p))]>,
                RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 let isMoveImm = 1 in
 def t2MOVCCi
     : t2PseudoInst<(outs rGPR:$Rd),
-                   (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
-                   4, IIC_iCMOVi, []>,
+                   (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+                   4, IIC_iCMOVi,
+                   [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm,
+                                            cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 let isCodeGenOnly = 1 in {
 let isMoveImm = 1 in
 def t2MOVCCi16
     : t2PseudoInst<(outs rGPR:$Rd),
-                   (ins rGPR:$false, imm0_65535_expr:$imm, pred:$p),
-                   4, IIC_iCMOVi, []>,
+                   (ins  rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+                   4, IIC_iCMOVi,
+                   [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm,
+                                            cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
 let isMoveImm = 1 in
 def t2MVNCCi
     : t2PseudoInst<(outs rGPR:$Rd),
-                   (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
-                   4, IIC_iCMOVi, []>,
+                   (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+                   4, IIC_iCMOVi,
+                   [(set rGPR:$Rd,
+                         (ARMcmov rGPR:$false, t2_so_imm_not:$imm,
+                                  cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
-class MOVCCShPseudo
+class MOVCCShPseudo<SDPatternOperator opnode, Operand ty>
     : t2PseudoInst<(outs rGPR:$Rd),
-                   (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, pred:$p),
-                   4, IIC_iCMOVsi, []>,
+                   (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p),
+                   4, IIC_iCMOVsi,
+                   [(set rGPR:$Rd, (ARMcmov rGPR:$false,
+                                            (opnode rGPR:$Rm, (i32 ty:$imm)),
+                                            cmovpred:$p))]>,
       RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
 
-def t2MOVCClsl : MOVCCShPseudo;
-def t2MOVCClsr : MOVCCShPseudo;
-def t2MOVCCasr : MOVCCShPseudo;
-def t2MOVCCror : MOVCCShPseudo;
+def t2MOVCClsl : MOVCCShPseudo<shl,  imm0_31>;
+def t2MOVCClsr : MOVCCShPseudo<srl,  imm_sr>;
+def t2MOVCCasr : MOVCCShPseudo<sra,  imm_sr>;
+def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>;
 
 let isMoveImm = 1 in
 def t2MOVCCi32imm
     : t2PseudoInst<(outs rGPR:$dst),
-                   (ins rGPR:$false, i32imm:$src, pred:$p),
-                   8, IIC_iCMOVix2, []>,
+                   (ins rGPR:$false, i32imm:$src, cmovpred:$p),
+                   8, IIC_iCMOVix2,
+                   [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src,
+                                             cmovpred:$p))]>,
       RegConstraint<"$false = $dst">;
 } // isCodeGenOnly = 1
 
 } // hasSideEffects
 
-// The following patterns have to be defined out-of-line because the number
-// of instruction operands does not match the number of SDNode operands
-// (`pred` counts as one operand).
-
-def : T2Pat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR),
-            (t2MOVCCr $false, $Rm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, t2_so_imm:$imm, imm:$cc, CPSR),
-            (t2MOVCCi $false, t2_so_imm:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, imm0_65535:$imm, imm:$cc, CPSR),
-            (t2MOVCCi16 $false, imm0_65535:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, t2_so_imm_not:$imm, imm:$cc, CPSR),
-            (t2MVNCCi $false, t2_so_imm_not:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (shl i32:$Rm, imm0_31:$imm), imm:$cc, CPSR),
-            (t2MOVCClsl $false, $Rm, imm0_31:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (srl i32:$Rm, imm_sr:$imm), imm:$cc, CPSR),
-            (t2MOVCClsr $false, $Rm, imm_sr:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (sra i32:$Rm, imm_sr:$imm), imm:$cc, CPSR),
-            (t2MOVCCasr $false, $Rm, imm_sr:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (rotr i32:$Rm, imm0_31:$imm), imm:$cc, CPSR),
-            (t2MOVCCror $false, $Rm, imm0_31:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR),
-            (t2MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>;
-
 //===----------------------------------------------------------------------===//
 // Atomic operations intrinsics
 //
@@ -5726,53 +5706,51 @@ def t2CSINC : CS<"csinc", 0b1001>;
 def t2CSINV : CS<"csinv", 0b1010>;
 def t2CSNEG : CS<"csneg", 0b1011>;
 
-def ARMcsinc_su
-    : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, node:$flags),
-              (ARMcsinc node:$lhs, node:$rhs, node:$cc, node:$flags), [{
+def ARMcsinc_su : PatFrag<(ops node:$lhs, node:$rhs, node:$cond),
+                          (ARMcsinc node:$lhs, node:$rhs, node:$cond), [{
   return N->hasOneUse();
 }]>;
 
 let Predicates = [HasV8_1MMainline] in {
   multiclass CSPats<SDNode Node, Instruction Insn> {
-    def : T2Pat<(Node GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc, CPSR),
-                (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
-    def : T2Pat<(Node (i32 0), GPRwithZR:$fval, imm:$cc, CPSR),
-                (Insn ZR, GPRwithZR:$fval, imm:$cc)>;
-    def : T2Pat<(Node GPRwithZR:$tval, (i32 0), imm:$cc, CPSR),
-                (Insn GPRwithZR:$tval, ZR, imm:$cc)>;
-    def : T2Pat<(Node (i32 0), (i32 0), imm:$cc, CPSR),
-                (Insn ZR, ZR, imm:$cc)>;
+    def : T2Pat<(Node GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm),
+                (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+    def : T2Pat<(Node (i32 0), GPRwithZR:$fval, imm0_31:$imm),
+                (Insn ZR, GPRwithZR:$fval, imm0_31:$imm)>;
+    def : T2Pat<(Node GPRwithZR:$tval, (i32 0), imm0_31:$imm),
+                (Insn GPRwithZR:$tval, ZR, imm0_31:$imm)>;
+    def : T2Pat<(Node (i32 0), (i32 0), imm0_31:$imm),
+                (Insn ZR, ZR, imm0_31:$imm)>;
   }
 
   defm : CSPats<ARMcsinc, t2CSINC>;
   defm : CSPats<ARMcsinv, t2CSINV>;
   defm : CSPats<ARMcsneg, t2CSNEG>;
 
-  def : T2Pat<(ARMcmov (i32 1), (i32 0), imm:$cc, CPSR),
-              (t2CSINC ZR, ZR, imm:$cc)>;
-  def : T2Pat<(ARMcmov (i32 -1), (i32 0), imm:$cc, CPSR),
-              (t2CSINV ZR, ZR, imm:$cc)>;
-  def : T2Pat<(ARMcmov (i32 0), (i32 1), imm:$cc, CPSR),
-              (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$cc))>;
-  def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR),
-              (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>;
+  def : T2Pat<(ARMcmov (i32 1), (i32 0), cmovpred:$imm),
+              (t2CSINC ZR, ZR, imm0_31:$imm)>;
+  def : T2Pat<(ARMcmov (i32 -1), (i32 0), cmovpred:$imm),
+              (t2CSINV ZR, ZR, imm0_31:$imm)>;
+  def : T2Pat<(ARMcmov (i32 0), (i32 1), cmovpred:$imm),
+              (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$imm))>;
+  def : T2Pat<(ARMcmov (i32 0), (i32 -1), cmovpred:$imm),
+              (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$imm))>;
 
   multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
-    def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR),
-                (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
-    def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, imm:$cc, CPSR),
+    def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm),
+                (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+    def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, cmovpred:$imm),
                 (Insn GPRwithZR:$tval, GPRwithZR:$fval,
-                         (i32 (inv_cond_XFORM imm:$cc)))>;
+                         (i32 (inv_cond_XFORM imm:$imm)))>;
   }
   defm : ModifiedV8_1CSEL<t2CSINC, (add rGPR:$fval, 1)>;
   defm : ModifiedV8_1CSEL<t2CSINV, (xor rGPR:$fval, -1)>;
   defm : ModifiedV8_1CSEL<t2CSNEG, (sub 0, rGPR:$fval)>;
 
-  def : T2Pat<(ARMcmov (topbitsallzero32:$Rn), (i32 1), imm:$cc, CPSR),
-              (t2CSINC $Rn, ZR, (inv_cond_XFORM imm:$cc))>;
-  def : T2Pat<(and (topbitsallzero32:$Rn),
-                   (ARMcsinc_su (i32 0), (i32 0), imm:$cc, CPSR)),
-              (t2CSEL ZR, $Rn, imm:$cc)>;
+  def : T2Pat<(ARMcmov (topbitsallzero32:$Rn), (i32 1), cmovpred:$imm),
+              (t2CSINC $Rn, ZR, (inv_cond_XFORM imm:$imm))>;
+  def : T2Pat<(and (topbitsallzero32:$Rn), (ARMcsinc_su (i32 0), (i32 0), cmovpred:$imm)),
+              (t2CSEL ZR, $Rn, $imm)>;
 }
 
 // CS aliases.

diff  --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 754517f3bc4d5b..a29753909ea992 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -34,10 +34,10 @@ def arm_cmpfpe  : SDNode<"ARMISD::CMPFPE",   SDT_CMPFP>;
 def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>;
 
 def arm_fmstat : SDNode<"ARMISD::FMSTAT",
-  SDTypeProfile<1, 1, [
-    SDTCisVT<0, FlagsVT>, // out flags
-    SDTCisVT<1, FlagsVT>  // in flags
-  ]>
+  SDTypeProfile<0, 1, [
+    SDTCisVT<0, FlagsVT> // in flags
+  ]>,
+  [SDNPOutGlue] // TODO: Change Glue to a normal result.
 >;
 
 def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
@@ -562,21 +562,19 @@ multiclass vsel_inst<string op, bits<2> opc, int CC> {
     def H : AHbInp<0b11100, opc, 0,
                    (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                    NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
-                   [(set (f16 HPR:$Sd),
-                         (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC, CPSR))]>,
+                   [(set (f16 HPR:$Sd), (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC))]>,
                    Requires<[HasFullFP16]>;
 
     def S : ASbInp<0b11100, opc, 0,
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
-                   [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC, CPSR))]>,
+                   [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
                    Requires<[HasFPARMv8]>;
 
     def D : ADbInp<0b11100, opc, 0,
                    (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
                    NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
-                   [(set DPR:$Dd,
-                         (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC, CPSR))]>,
+                   [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
                    Requires<[HasFPARMv8, HasDPVFP]>;
   }
 }
@@ -2463,35 +2461,25 @@ def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))
 //
 
 let hasSideEffects = 0 in {
-def VMOVDcc  : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
-                          IIC_fpUNA64, []>,
+def VMOVDcc  : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
+                    IIC_fpUNA64,
+                    [(set (f64 DPR:$Dd),
+                          (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
                RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
 
-def VMOVScc  : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
-                          IIC_fpUNA32, []>,
+def VMOVScc  : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
+                    IIC_fpUNA32,
+                    [(set (f32 SPR:$Sd),
+                          (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
                RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
 
-def VMOVHcc  : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, pred:$p),
-                          IIC_fpUNA16, []>,
+def VMOVHcc  : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
+                    IIC_fpUNA16,
+                    [(set (f16 HPR:$Sd),
+                          (ARMcmov (f16 HPR:$Sn), (f16 HPR:$Sm), cmovpred:$p))]>,
                RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>;
 } // hasSideEffects
 
-// The following patterns have to be defined out-of-line because the number
-// of instruction operands does not match the number of SDNode operands
-// (`pred` counts as one operand).
-
-def : Pat<(ARMcmov f64:$Dn, f64:$Dm, imm:$cc, CPSR),
-          (VMOVDcc $Dn, $Dm, imm:$cc, CPSR)>,
-      Requires<[HasFPRegs64]>;
-
-def : Pat<(ARMcmov f32:$Sn, f32:$Sm, imm:$cc, CPSR),
-          (VMOVScc $Sn, $Sm, imm:$cc, CPSR)>,
-      Requires<[HasFPRegs]>;
-
-def : Pat<(ARMcmov f16:$Sn, f16:$Sm, imm:$cc, CPSR),
-          (VMOVHcc $Sn, $Sm, imm:$cc, CPSR)>,
-      Requires<[HasFPRegs]>; // FIXME: Shouldn't this be HasFPRegs16?
-
 //===----------------------------------------------------------------------===//
 // Move from VFP System Register to ARM core register.
 //
@@ -2522,7 +2510,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
      Rt = 0b1111 /* apsr_nzcv */ in
  def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
                          "vmrs", "\tAPSR_nzcv, fpscr",
-                         [(set CPSR, (arm_fmstat FPSCR_NZCV))]>;
+                         [(arm_fmstat FPSCR_NZCV)]>;
 
  // Application level FPSCR -> GPR
  let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in

diff  --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index c0ddee83880410..5de03a92afeb42 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -29,8 +29,8 @@ define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) {
 ; CHECK-T2:       @ %bb.0:
 ; CHECK-T2-NEXT:    .save {r4, lr}
 ; CHECK-T2-NEXT:    push {r4, lr}
-; CHECK-T2-NEXT:    bic r4, r2, #3
 ; CHECK-T2-NEXT:    lsls r0, r0, #31
+; CHECK-T2-NEXT:    bic r4, r2, #3
 ; CHECK-T2-NEXT:    it ne
 ; CHECK-T2-NEXT:    bicne r4, r1, #6
 ; CHECK-T2-NEXT:    mov r0, r4
@@ -144,12 +144,12 @@ define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) {
 ;
 ; CHECK-A-LABEL: test_add_i12:
 ; CHECK-A:       @ %bb.0:
-; CHECK-A-NEXT:    bfc r0, #0, #12
 ; CHECK-A-NEXT:    bfc r1, #0, #13
+; CHECK-A-NEXT:    bfc r0, #0, #12
 ; CHECK-A-NEXT:    tst r2, #1
-; CHECK-A-NEXT:    movne r1, r0
-; CHECK-A-NEXT:    movw r0, #854
-; CHECK-A-NEXT:    orr r0, r1, r0
+; CHECK-A-NEXT:    moveq r0, r1
+; CHECK-A-NEXT:    movw r1, #854
+; CHECK-A-NEXT:    orr r0, r0, r1
 ; CHECK-A-NEXT:    bx lr
   %tmp = and i32 %a, -4096
   %tmp1 = and i32 %b, -8192

diff  --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
index 75416475289f31..b6adc995091cea 100644
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -1965,34 +1965,32 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
 ; ARM-ENABLE-NEXT:  @ %bb.1: @ %bb3
 ; ARM-ENABLE-NEXT:    push {r4, r7, lr}
 ; ARM-ENABLE-NEXT:    add r7, sp, #4
-; ARM-ENABLE-NEXT:    sub r4, sp, #24
+; ARM-ENABLE-NEXT:    sub r4, sp, #16
 ; ARM-ENABLE-NEXT:    bfc r4, #0, #4
 ; ARM-ENABLE-NEXT:    mov sp, r4
 ; ARM-ENABLE-NEXT:    ldr r1, [r7, #8]
-; ARM-ENABLE-NEXT:    vmov.f64 d16, #1.000000e+00
 ; ARM-ENABLE-NEXT:    mov r2, r3
 ; ARM-ENABLE-NEXT:    vst1.64 {d8, d9}, [r4:128]
-; ARM-ENABLE-NEXT:    vmov d9, r3, r1
 ; ARM-ENABLE-NEXT:    vmov s16, r0
 ; ARM-ENABLE-NEXT:    mov r0, r3
+; ARM-ENABLE-NEXT:    vmov d9, r3, r1
 ; ARM-ENABLE-NEXT:    mov r3, r1
-; ARM-ENABLE-NEXT:    vstr d10, [r4, #16]
-; ARM-ENABLE-NEXT:    vadd.f64 d10, d9, d16
 ; ARM-ENABLE-NEXT:    bl _pow
 ; ARM-ENABLE-NEXT:    vmov.f32 s0, #1.000000e+00
 ; ARM-ENABLE-NEXT:    mov r4, sp
-; ARM-ENABLE-NEXT:    vmov.f64 d17, d9
-; ARM-ENABLE-NEXT:    vmov d16, r0, r1
+; ARM-ENABLE-NEXT:    vmov.f64 d16, #1.000000e+00
+; ARM-ENABLE-NEXT:    vadd.f64 d16, d9, d16
 ; ARM-ENABLE-NEXT:    vcmp.f32 s16, s0
 ; ARM-ENABLE-NEXT:    vmrs APSR_nzcv, fpscr
-; ARM-ENABLE-NEXT:    vadd.f64 d16, d16, d16
-; ARM-ENABLE-NEXT:    vmovgt.f64 d17, d10
-; ARM-ENABLE-NEXT:    vcmp.f64 d17, d9
+; ARM-ENABLE-NEXT:    vmov d17, r0, r1
+; ARM-ENABLE-NEXT:    vmov.f64 d18, d9
+; ARM-ENABLE-NEXT:    vadd.f64 d17, d17, d17
+; ARM-ENABLE-NEXT:    vmovgt.f64 d18, d16
+; ARM-ENABLE-NEXT:    vcmp.f64 d18, d9
 ; ARM-ENABLE-NEXT:    vmrs APSR_nzcv, fpscr
-; ARM-ENABLE-NEXT:    vmovne.f64 d9, d16
+; ARM-ENABLE-NEXT:    vmovne.f64 d9, d17
 ; ARM-ENABLE-NEXT:    vcvt.f32.f64 s0, d9
 ; ARM-ENABLE-NEXT:    vld1.64 {d8, d9}, [r4:128]
-; ARM-ENABLE-NEXT:    vldr d10, [r4, #16]
 ; ARM-ENABLE-NEXT:    sub sp, r7, #4
 ; ARM-ENABLE-NEXT:    pop {r4, r7, lr}
 ; ARM-ENABLE-NEXT:    vmov r0, s0
@@ -2014,33 +2012,32 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
 ; ARM-DISABLE-NEXT:  @ %bb.0: @ %bb
 ; ARM-DISABLE-NEXT:    push {r4, r7, lr}
 ; ARM-DISABLE-NEXT:    add r7, sp, #4
-; ARM-DISABLE-NEXT:    sub r4, sp, #24
+; ARM-DISABLE-NEXT:    sub r4, sp, #16
 ; ARM-DISABLE-NEXT:    bfc r4, #0, #4
 ; ARM-DISABLE-NEXT:    mov sp, r4
 ; ARM-DISABLE-NEXT:    tst r2, #1
 ; ARM-DISABLE-NEXT:    vst1.64 {d8, d9}, [r4:128]
-; ARM-DISABLE-NEXT:    vstr d10, [r4, #16]
 ; ARM-DISABLE-NEXT:    beq LBB12_2
 ; ARM-DISABLE-NEXT:  @ %bb.1: @ %bb3
 ; ARM-DISABLE-NEXT:    ldr r1, [r7, #8]
-; ARM-DISABLE-NEXT:    vmov.f64 d16, #1.000000e+00
-; ARM-DISABLE-NEXT:    mov r2, r3
-; ARM-DISABLE-NEXT:    vmov d9, r3, r1
 ; ARM-DISABLE-NEXT:    vmov s16, r0
 ; ARM-DISABLE-NEXT:    mov r0, r3
+; ARM-DISABLE-NEXT:    mov r2, r3
+; ARM-DISABLE-NEXT:    vmov d9, r3, r1
 ; ARM-DISABLE-NEXT:    mov r3, r1
-; ARM-DISABLE-NEXT:    vadd.f64 d10, d9, d16
 ; ARM-DISABLE-NEXT:    bl _pow
 ; ARM-DISABLE-NEXT:    vmov.f32 s0, #1.000000e+00
-; ARM-DISABLE-NEXT:    vmov.f64 d17, d9
-; ARM-DISABLE-NEXT:    vmov d16, r0, r1
+; ARM-DISABLE-NEXT:    vmov.f64 d16, #1.000000e+00
+; ARM-DISABLE-NEXT:    vadd.f64 d16, d9, d16
 ; ARM-DISABLE-NEXT:    vcmp.f32 s16, s0
 ; ARM-DISABLE-NEXT:    vmrs APSR_nzcv, fpscr
-; ARM-DISABLE-NEXT:    vadd.f64 d16, d16, d16
-; ARM-DISABLE-NEXT:    vmovgt.f64 d17, d10
-; ARM-DISABLE-NEXT:    vcmp.f64 d17, d9
+; ARM-DISABLE-NEXT:    vmov d17, r0, r1
+; ARM-DISABLE-NEXT:    vmov.f64 d18, d9
+; ARM-DISABLE-NEXT:    vadd.f64 d17, d17, d17
+; ARM-DISABLE-NEXT:    vmovgt.f64 d18, d16
+; ARM-DISABLE-NEXT:    vcmp.f64 d18, d9
 ; ARM-DISABLE-NEXT:    vmrs APSR_nzcv, fpscr
-; ARM-DISABLE-NEXT:    vmovne.f64 d9, d16
+; ARM-DISABLE-NEXT:    vmovne.f64 d9, d17
 ; ARM-DISABLE-NEXT:    vcvt.f32.f64 s0, d9
 ; ARM-DISABLE-NEXT:    b LBB12_3
 ; ARM-DISABLE-NEXT:  LBB12_2:
@@ -2049,7 +2046,6 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
 ; ARM-DISABLE-NEXT:    mov r4, sp
 ; ARM-DISABLE-NEXT:    vld1.64 {d8, d9}, [r4:128]
 ; ARM-DISABLE-NEXT:    vmov r0, s0
-; ARM-DISABLE-NEXT:    vldr d10, [r4, #16]
 ; ARM-DISABLE-NEXT:    sub sp, r7, #4
 ; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
 ; ARM-DISABLE-NEXT:    .p2align 2
@@ -2068,36 +2064,34 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
 ; THUMB-ENABLE-NEXT:  @ %bb.1: @ %bb3
 ; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
 ; THUMB-ENABLE-NEXT:    add r7, sp, #4
-; THUMB-ENABLE-NEXT:    sub.w r4, sp, #24
+; THUMB-ENABLE-NEXT:    sub.w r4, sp, #16
 ; THUMB-ENABLE-NEXT:    bfc r4, #0, #4
 ; THUMB-ENABLE-NEXT:    mov sp, r4
 ; THUMB-ENABLE-NEXT:    ldr r1, [r7, #8]
-; THUMB-ENABLE-NEXT:    vmov.f64 d16, #1.000000e+00
 ; THUMB-ENABLE-NEXT:    mov r2, r3
 ; THUMB-ENABLE-NEXT:    vst1.64 {d8, d9}, [r4:128]
-; THUMB-ENABLE-NEXT:    vmov d9, r3, r1
 ; THUMB-ENABLE-NEXT:    vmov s16, r0
 ; THUMB-ENABLE-NEXT:    mov r0, r3
+; THUMB-ENABLE-NEXT:    vmov d9, r3, r1
 ; THUMB-ENABLE-NEXT:    mov r3, r1
-; THUMB-ENABLE-NEXT:    vstr d10, [r4, #16]
-; THUMB-ENABLE-NEXT:    vadd.f64 d10, d9, d16
 ; THUMB-ENABLE-NEXT:    bl _pow
 ; THUMB-ENABLE-NEXT:    vmov.f32 s0, #1.000000e+00
 ; THUMB-ENABLE-NEXT:    mov r4, sp
-; THUMB-ENABLE-NEXT:    vmov.f64 d17, d9
-; THUMB-ENABLE-NEXT:    vmov d16, r0, r1
+; THUMB-ENABLE-NEXT:    vmov.f64 d16, #1.000000e+00
+; THUMB-ENABLE-NEXT:    vmov.f64 d18, d9
 ; THUMB-ENABLE-NEXT:    vcmp.f32 s16, s0
+; THUMB-ENABLE-NEXT:    vadd.f64 d16, d9, d16
 ; THUMB-ENABLE-NEXT:    vmrs APSR_nzcv, fpscr
 ; THUMB-ENABLE-NEXT:    it gt
-; THUMB-ENABLE-NEXT:    vmovgt.f64 d17, d10
-; THUMB-ENABLE-NEXT:    vcmp.f64 d17, d9
-; THUMB-ENABLE-NEXT:    vadd.f64 d16, d16, d16
+; THUMB-ENABLE-NEXT:    vmovgt.f64 d18, d16
+; THUMB-ENABLE-NEXT:    vcmp.f64 d18, d9
+; THUMB-ENABLE-NEXT:    vmov d17, r0, r1
 ; THUMB-ENABLE-NEXT:    vmrs APSR_nzcv, fpscr
+; THUMB-ENABLE-NEXT:    vadd.f64 d17, d17, d17
 ; THUMB-ENABLE-NEXT:    it ne
-; THUMB-ENABLE-NEXT:    vmovne.f64 d9, d16
+; THUMB-ENABLE-NEXT:    vmovne.f64 d9, d17
 ; THUMB-ENABLE-NEXT:    vcvt.f32.f64 s0, d9
 ; THUMB-ENABLE-NEXT:    vld1.64 {d8, d9}, [r4:128]
-; THUMB-ENABLE-NEXT:    vldr d10, [r4, #16]
 ; THUMB-ENABLE-NEXT:    subs r4, r7, #4
 ; THUMB-ENABLE-NEXT:    mov sp, r4
 ; THUMB-ENABLE-NEXT:    pop.w {r4, r7, lr}
@@ -2120,35 +2114,34 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
 ; THUMB-DISABLE-NEXT:  @ %bb.0: @ %bb
 ; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
 ; THUMB-DISABLE-NEXT:    add r7, sp, #4
-; THUMB-DISABLE-NEXT:    sub.w r4, sp, #24
+; THUMB-DISABLE-NEXT:    sub.w r4, sp, #16
 ; THUMB-DISABLE-NEXT:    bfc r4, #0, #4
 ; THUMB-DISABLE-NEXT:    mov sp, r4
 ; THUMB-DISABLE-NEXT:    lsls r1, r2, #31
 ; THUMB-DISABLE-NEXT:    vst1.64 {d8, d9}, [r4:128]
-; THUMB-DISABLE-NEXT:    vstr d10, [r4, #16]
 ; THUMB-DISABLE-NEXT:    beq LBB12_2
 ; THUMB-DISABLE-NEXT:  @ %bb.1: @ %bb3
 ; THUMB-DISABLE-NEXT:    ldr r1, [r7, #8]
-; THUMB-DISABLE-NEXT:    vmov.f64 d16, #1.000000e+00
-; THUMB-DISABLE-NEXT:    mov r2, r3
-; THUMB-DISABLE-NEXT:    vmov d9, r3, r1
 ; THUMB-DISABLE-NEXT:    vmov s16, r0
 ; THUMB-DISABLE-NEXT:    mov r0, r3
+; THUMB-DISABLE-NEXT:    mov r2, r3
+; THUMB-DISABLE-NEXT:    vmov d9, r3, r1
 ; THUMB-DISABLE-NEXT:    mov r3, r1
-; THUMB-DISABLE-NEXT:    vadd.f64 d10, d9, d16
 ; THUMB-DISABLE-NEXT:    bl _pow
 ; THUMB-DISABLE-NEXT:    vmov.f32 s0, #1.000000e+00
-; THUMB-DISABLE-NEXT:    vmov.f64 d17, d9
-; THUMB-DISABLE-NEXT:    vmov d16, r0, r1
+; THUMB-DISABLE-NEXT:    vmov.f64 d16, #1.000000e+00
+; THUMB-DISABLE-NEXT:    vmov.f64 d18, d9
 ; THUMB-DISABLE-NEXT:    vcmp.f32 s16, s0
+; THUMB-DISABLE-NEXT:    vadd.f64 d16, d9, d16
 ; THUMB-DISABLE-NEXT:    vmrs APSR_nzcv, fpscr
 ; THUMB-DISABLE-NEXT:    it gt
-; THUMB-DISABLE-NEXT:    vmovgt.f64 d17, d10
-; THUMB-DISABLE-NEXT:    vcmp.f64 d17, d9
-; THUMB-DISABLE-NEXT:    vadd.f64 d16, d16, d16
+; THUMB-DISABLE-NEXT:    vmovgt.f64 d18, d16
+; THUMB-DISABLE-NEXT:    vcmp.f64 d18, d9
+; THUMB-DISABLE-NEXT:    vmov d17, r0, r1
 ; THUMB-DISABLE-NEXT:    vmrs APSR_nzcv, fpscr
+; THUMB-DISABLE-NEXT:    vadd.f64 d17, d17, d17
 ; THUMB-DISABLE-NEXT:    it ne
-; THUMB-DISABLE-NEXT:    vmovne.f64 d9, d16
+; THUMB-DISABLE-NEXT:    vmovne.f64 d9, d17
 ; THUMB-DISABLE-NEXT:    vcvt.f32.f64 s0, d9
 ; THUMB-DISABLE-NEXT:    b LBB12_3
 ; THUMB-DISABLE-NEXT:  LBB12_2:
@@ -2156,9 +2149,8 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
 ; THUMB-DISABLE-NEXT:  LBB12_3: @ %bb13
 ; THUMB-DISABLE-NEXT:    mov r4, sp
 ; THUMB-DISABLE-NEXT:    vld1.64 {d8, d9}, [r4:128]
-; THUMB-DISABLE-NEXT:    vmov r0, s0
-; THUMB-DISABLE-NEXT:    vldr d10, [r4, #16]
 ; THUMB-DISABLE-NEXT:    subs r4, r7, #4
+; THUMB-DISABLE-NEXT:    vmov r0, s0
 ; THUMB-DISABLE-NEXT:    mov sp, r4
 ; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
 ; THUMB-DISABLE-NEXT:    .p2align 2

diff  --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index ca9939c0f8c552..ab9e1dfd1cfb19 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -278,9 +278,12 @@ define i64 @test10(ptr %ptr, i64 %val) {
 ; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]]
 ; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]]
 ; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]]
-; CHECK: movge   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[CMP:[a-z0-9]+]], #0
+; CHECK: movwge  [[CMP]], #1
+; CHECK: cmp     [[CMP]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
 ; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movge   [[OUT_LO]], [[REG1]]
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -294,10 +297,12 @@ define i64 @test10(ptr %ptr, i64 %val) {
 ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w     [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movge.w   [[CMP]], #1
+; CHECK-THUMB: cmp.w     [[CMP]], #0
 ; CHECK-THUMB: mov       [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt     ge
-; CHECK-THUMB: movge   [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movge   [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -318,9 +323,12 @@ define i64 @test11(ptr %ptr, i64 %val) {
 ; CHECK-BE: subs    {{[^,]+}}, r2, [[REG2]]
 ; CHECK-LE: sbcs    {{[^,]+}}, r2, [[REG2]]
 ; CHECK-BE: sbcs    {{[^,]+}}, r1, [[REG1]]
-; CHECK: movhs   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[CMP:[a-z0-9]+]], #0
+; CHECK: movwhs  [[CMP]], #1
+; CHECK: cmp     [[CMP]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
 ; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movhs   [[OUT_LO]], [[REG1]]
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -334,10 +342,12 @@ define i64 @test11(ptr %ptr, i64 %val) {
 ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w     [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movhs.w   [[CMP]], #1
+; CHECK-THUMB: cmp.w     [[CMP]], #0
 ; CHECK-THUMB: mov       [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt     hs
-; CHECK-THUMB: movhs   [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movhs   [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -358,9 +368,12 @@ define i64 @test12(ptr %ptr, i64 %val) {
 ; CHECK-BE: subs    {{[^,]+}}, r2, [[REG2]]
 ; CHECK-LE: sbcs    {{[^,]+}}, r2, [[REG2]]
 ; CHECK-BE: sbcs    {{[^,]+}}, r1, [[REG1]]
-; CHECK: movlt   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[CMP:[a-z0-9]+]], #0
+; CHECK: movwlt  [[CMP]], #1
+; CHECK: cmp     [[CMP]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
 ; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movlt   [[OUT_LO]], [[REG1]]
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -374,10 +387,12 @@ define i64 @test12(ptr %ptr, i64 %val) {
 ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w     [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movlt.w   [[CMP]], #1
+; CHECK-THUMB: cmp.w     [[CMP]], #0
 ; CHECK-THUMB: mov       [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt     lt
-; CHECK-THUMB: movlt   [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movlt   [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -398,9 +413,12 @@ define i64 @test13(ptr %ptr, i64 %val) {
 ; CHECK-BE: subs    {{[^,]+}}, r2, [[REG2]]
 ; CHECK-LE: sbcs    {{[^,]+}}, r2, [[REG2]]
 ; CHECK-BE: sbcs    {{[^,]+}}, r1, [[REG1]]
-; CHECK: movlo   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[CMP:[a-z0-9]+]], #0
+; CHECK: movwlo  [[CMP]], #1
+; CHECK: cmp     [[CMP]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
 ; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movlo   [[OUT_LO]], [[REG1]]
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -414,10 +432,12 @@ define i64 @test13(ptr %ptr, i64 %val) {
 ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
 ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w     [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movlo.w   [[CMP]], #1
+; CHECK-THUMB: cmp.w     [[CMP]], #0
 ; CHECK-THUMB: mov       [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt     lo
-; CHECK-THUMB: movlo   [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movlo   [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne

diff  --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
index d48b070aa862e3..0a467c2b70acf2 100644
--- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -672,9 +672,12 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movge [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwge [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
 ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movge [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
 ; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
 ; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
@@ -782,9 +785,12 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movlt [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwlt [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
 ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movlt [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
 ; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
 ; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
@@ -892,9 +898,12 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movhs [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwhs [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
 ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movhs [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
 ; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
 ; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
@@ -1002,9 +1011,12 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
 ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movlo [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwlo [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
 ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movlo [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
 ; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
 ; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0

diff  --git a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
index 8706728c4b8416..62711ee6834898 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
@@ -68,23 +68,27 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
 define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
 ; CHECK-LABEL: atomicrmw_usub_cond_i64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r11, lr}
-; CHECK-NEXT:    push {r4, r5, r11, lr}
-; CHECK-NEXT:    mov r12, r0
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    dmb ish
 ; CHECK-NEXT:  .LBB3_1: @ %atomicrmw.start
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrexd r0, r1, [r12]
-; CHECK-NEXT:    subs r4, r0, r2
-; CHECK-NEXT:    sbcs r5, r1, r3
-; CHECK-NEXT:    movlo r5, r1
-; CHECK-NEXT:    movlo r4, r0
-; CHECK-NEXT:    strexd lr, r4, r5, [r12]
-; CHECK-NEXT:    cmp lr, #0
+; CHECK-NEXT:    ldrexd r4, r5, [r0]
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    subs r6, r4, r2
+; CHECK-NEXT:    sbcs r7, r5, r3
+; CHECK-NEXT:    movwhs r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r7, r5
+; CHECK-NEXT:    moveq r6, r4
+; CHECK-NEXT:    strexd r1, r6, r7, [r0]
+; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    bne .LBB3_1
 ; CHECK-NEXT:  @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    pop {r4, r5, r11, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
   %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
   ret i64 %result
 }
@@ -160,7 +164,7 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
 ; CHECK-NEXT:    subs r6, r4, r2
 ; CHECK-NEXT:    sbcs r7, r5, r3
 ; CHECK-NEXT:    adc r1, r12, #0
-; CHECK-NEXT:    teq r1, #1
+; CHECK-NEXT:    eors r1, r1, #1
 ; CHECK-NEXT:    movwne r7, #0
 ; CHECK-NEXT:    movwne r6, #0
 ; CHECK-NEXT:    strexd r1, r6, r7, [r0]

diff  --git a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
index 433fb325a7349f..243ec4deecdb84 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
@@ -69,25 +69,29 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r6, r7, lr}
-; CHECK-NEXT:    mov r12, r0
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    dmb ish
 ; CHECK-NEXT:  .LBB3_1: @ %atomicrmw.start
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrexd r0, r1, [r12]
-; CHECK-NEXT:    adds r6, r0, #1
-; CHECK-NEXT:    adc r7, r1, #0
-; CHECK-NEXT:    subs r4, r0, r2
-; CHECK-NEXT:    sbcs r4, r1, r3
-; CHECK-NEXT:    movwhs r7, #0
-; CHECK-NEXT:    movwhs r6, #0
-; CHECK-NEXT:    strexd r4, r6, r7, [r12]
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    ldrexd r4, r5, [r0]
+; CHECK-NEXT:    adds r6, r4, #1
+; CHECK-NEXT:    adc r7, r5, #0
+; CHECK-NEXT:    subs r1, r4, r2
+; CHECK-NEXT:    sbcs r1, r5, r3
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    movwhs r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    movwne r7, #0
+; CHECK-NEXT:    movwne r6, #0
+; CHECK-NEXT:    strexd r1, r6, r7, [r0]
+; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    bne .LBB3_1
 ; CHECK-NEXT:  @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    pop {r4, r6, r7, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
   %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
   ret i64 %result
 }
@@ -98,8 +102,8 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK-NEXT:    dmb ish
 ; CHECK-NEXT:  .LBB4_1: @ %atomicrmw.start
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrexb r12, [r0]
 ; CHECK-NEXT:    uxtb r3, r1
+; CHECK-NEXT:    ldrexb r12, [r0]
 ; CHECK-NEXT:    cmp r12, r3
 ; CHECK-NEXT:    mov r3, r1
 ; CHECK-NEXT:    subls r3, r12, #1
@@ -122,8 +126,8 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK-NEXT:    dmb ish
 ; CHECK-NEXT:  .LBB5_1: @ %atomicrmw.start
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrexh r12, [r0]
 ; CHECK-NEXT:    uxth r3, r1
+; CHECK-NEXT:    ldrexh r12, [r0]
 ; CHECK-NEXT:    cmp r12, r3
 ; CHECK-NEXT:    mov r3, r1
 ; CHECK-NEXT:    subls r3, r12, #1

diff  --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index f633315822cc3d..a38ade7cdbf06b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -1422,8 +1422,8 @@ define i8 @test_max_i8() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB7_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    sxtb r0, r1
-; CHECK-ARM8-NEXT:    cmp r0, #1
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #1
 ; CHECK-ARM8-NEXT:    movgt r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
@@ -1468,8 +1468,8 @@ define i8 @test_max_i8() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB7_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    sxtb r0, r1
-; CHECK-ARM6-NEXT:    cmp r0, #1
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #1
 ; CHECK-ARM6-NEXT:    movgt r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI7_0
 ; CHECK-ARM6-NEXT:    uxtb r1, r1
@@ -1518,8 +1518,8 @@ define i8 @test_max_i8() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB7_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    sxtb r0, r1
-; CHECK-THUMB7-NEXT:    cmp r0, #1
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #1
 ; CHECK-THUMB7-NEXT:    it gt
 ; CHECK-THUMB7-NEXT:    movgt r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
@@ -1643,8 +1643,8 @@ define i8 @test_min_i8() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB8_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    sxtb r0, r1
-; CHECK-ARM8-NEXT:    cmp r0, #2
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #2
 ; CHECK-ARM8-NEXT:    movlt r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
@@ -1689,8 +1689,8 @@ define i8 @test_min_i8() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB8_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    sxtb r0, r1
-; CHECK-ARM6-NEXT:    cmp r0, #2
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #2
 ; CHECK-ARM6-NEXT:    movlt r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI8_0
 ; CHECK-ARM6-NEXT:    uxtb r1, r1
@@ -1739,8 +1739,8 @@ define i8 @test_min_i8() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB8_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    sxtb r0, r1
-; CHECK-THUMB7-NEXT:    cmp r0, #2
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #2
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
@@ -1866,8 +1866,8 @@ define i8 @test_umax_i8() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB9_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    uxtb r1, r12
-; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    movhi lr, r12
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
@@ -1913,8 +1913,8 @@ define i8 @test_umax_i8() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB9_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    uxtb r1, r12
-; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    movhi lr, r12
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI9_0
 ; CHECK-ARM6-NEXT:    uxtb r12, r12
@@ -1964,8 +1964,8 @@ define i8 @test_umax_i8() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB9_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    uxtb r1, r4
-; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    it hi
 ; CHECK-THUMB7-NEXT:    movhi r12, r4
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
@@ -2091,8 +2091,8 @@ define i8 @test_umin_i8() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB10_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    uxtb r1, r12
-; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    movlo lr, r12
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
@@ -2138,8 +2138,8 @@ define i8 @test_umin_i8() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB10_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    uxtb r1, r12
-; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    movlo lr, r12
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI10_0
 ; CHECK-ARM6-NEXT:    uxtb r12, r12
@@ -2189,8 +2189,8 @@ define i8 @test_umin_i8() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB10_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    uxtb r1, r4
-; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    it lo
 ; CHECK-THUMB7-NEXT:    movlo r12, r4
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
@@ -3709,8 +3709,8 @@ define i16 @test_max_i16() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB18_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    sxth r0, r1
-; CHECK-ARM8-NEXT:    cmp r0, #1
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #1
 ; CHECK-ARM8-NEXT:    movgt r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
@@ -3755,8 +3755,8 @@ define i16 @test_max_i16() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB18_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    sxth r0, r1
-; CHECK-ARM6-NEXT:    cmp r0, #1
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #1
 ; CHECK-ARM6-NEXT:    movgt r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI18_0
 ; CHECK-ARM6-NEXT:    uxth r1, r1
@@ -3805,8 +3805,8 @@ define i16 @test_max_i16() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB18_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    sxth r0, r1
-; CHECK-THUMB7-NEXT:    cmp r0, #1
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #1
 ; CHECK-THUMB7-NEXT:    it gt
 ; CHECK-THUMB7-NEXT:    movgt r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
@@ -3930,8 +3930,8 @@ define i16 @test_min_i16() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB19_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    sxth r0, r1
-; CHECK-ARM8-NEXT:    cmp r0, #2
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #2
 ; CHECK-ARM8-NEXT:    movlt r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
@@ -3976,8 +3976,8 @@ define i16 @test_min_i16() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB19_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    sxth r0, r1
-; CHECK-ARM6-NEXT:    cmp r0, #2
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #2
 ; CHECK-ARM6-NEXT:    movlt r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI19_0
 ; CHECK-ARM6-NEXT:    uxth r1, r1
@@ -4026,8 +4026,8 @@ define i16 @test_min_i16() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB19_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    sxth r0, r1
-; CHECK-THUMB7-NEXT:    cmp r0, #2
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #2
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
@@ -4153,8 +4153,8 @@ define i16 @test_umax_i16() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB20_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    uxth r1, r12
-; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    movhi lr, r12
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
@@ -4200,8 +4200,8 @@ define i16 @test_umax_i16() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB20_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    uxth r1, r12
-; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    movhi lr, r12
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI20_0
 ; CHECK-ARM6-NEXT:    uxth r12, r12
@@ -4251,8 +4251,8 @@ define i16 @test_umax_i16() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB20_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    uxth r1, r4
-; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    it hi
 ; CHECK-THUMB7-NEXT:    movhi r12, r4
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
@@ -4378,8 +4378,8 @@ define i16 @test_umin_i16() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB21_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    uxth r1, r12
-; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    movlo lr, r12
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
@@ -4425,8 +4425,8 @@ define i16 @test_umin_i16() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB21_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    uxth r1, r12
-; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    movlo lr, r12
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI21_0
 ; CHECK-ARM6-NEXT:    uxth r12, r12
@@ -4476,8 +4476,8 @@ define i16 @test_umin_i16() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB21_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    uxth r1, r4
-; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    it lo
 ; CHECK-THUMB7-NEXT:    movlo r12, r4
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
@@ -5939,8 +5939,8 @@ define i32 @test_max_i32() {
 ; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM8-NEXT:    @ Child Loop BB29_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    movgt r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
@@ -5982,8 +5982,8 @@ define i32 @test_max_i32() {
 ; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM6-NEXT:    @ Child Loop BB29_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    movgt r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI29_0
 ; CHECK-ARM6-NEXT:  .LBB29_2: @ %atomicrmw.start
@@ -6029,8 +6029,8 @@ define i32 @test_max_i32() {
 ; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB29_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    it gt
 ; CHECK-THUMB7-NEXT:    movgt r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
@@ -6148,8 +6148,8 @@ define i32 @test_min_i32() {
 ; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM8-NEXT:    @ Child Loop BB30_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    movlt r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
@@ -6191,8 +6191,8 @@ define i32 @test_min_i32() {
 ; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM6-NEXT:    @ Child Loop BB30_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    movlt r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI30_0
 ; CHECK-ARM6-NEXT:  .LBB30_2: @ %atomicrmw.start
@@ -6238,8 +6238,8 @@ define i32 @test_min_i32() {
 ; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB30_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
@@ -6357,8 +6357,8 @@ define i32 @test_umax_i32() {
 ; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM8-NEXT:    @ Child Loop BB31_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
 ; CHECK-ARM8-NEXT:    movhi r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
@@ -6400,8 +6400,8 @@ define i32 @test_umax_i32() {
 ; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM6-NEXT:    @ Child Loop BB31_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
 ; CHECK-ARM6-NEXT:    movhi r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI31_0
 ; CHECK-ARM6-NEXT:  .LBB31_2: @ %atomicrmw.start
@@ -6447,8 +6447,8 @@ define i32 @test_umax_i32() {
 ; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB31_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
 ; CHECK-THUMB7-NEXT:    it hi
 ; CHECK-THUMB7-NEXT:    movhi r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
@@ -6566,8 +6566,8 @@ define i32 @test_umin_i32() {
 ; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM8-NEXT:    @ Child Loop BB32_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
 ; CHECK-ARM8-NEXT:    movlo r12, r1
 ; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
 ; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
@@ -6609,8 +6609,8 @@ define i32 @test_umin_i32() {
 ; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-ARM6-NEXT:    @ Child Loop BB32_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
 ; CHECK-ARM6-NEXT:    movlo r12, r1
 ; CHECK-ARM6-NEXT:    ldr r3, .LCPI32_0
 ; CHECK-ARM6-NEXT:  .LBB32_2: @ %atomicrmw.start
@@ -6656,8 +6656,8 @@ define i32 @test_umin_i32() {
 ; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB32_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
 ; CHECK-THUMB7-NEXT:    it lo
 ; CHECK-THUMB7-NEXT:    movlo r12, r1
 ; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
@@ -8342,7 +8342,8 @@ define i64 @test_max_i64() {
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlt r0, #1
 ; CHECK-ARM8-NEXT:    mov r10, #1
-; CHECK-ARM8-NEXT:    movlt r10, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
 ; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -8409,7 +8410,8 @@ define i64 @test_max_i64() {
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlt r0, #1
 ; CHECK-ARM6-NEXT:    mov r10, #1
-; CHECK-ARM6-NEXT:    movlt r10, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
 ; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -8481,8 +8483,9 @@ define i64 @test_max_i64() {
 ; CHECK-THUMB7-NEXT:    mov r8, r2
 ; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    mov.w r10, #1
-; CHECK-THUMB7-NEXT:    it lt
-; CHECK-THUMB7-NEXT:    movlt r10, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
@@ -8578,7 +8581,7 @@ define i64 @test_max_i64() {
 ; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
 ; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    blt .LBB40_5
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB40_5
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
 ; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
@@ -8655,7 +8658,8 @@ define i64 @test_min_i64() {
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlt r0, #1
 ; CHECK-ARM8-NEXT:    mov r10, #1
-; CHECK-ARM8-NEXT:    movlt r10, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
 ; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -8722,7 +8726,8 @@ define i64 @test_min_i64() {
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlt r0, #1
 ; CHECK-ARM6-NEXT:    mov r10, #1
-; CHECK-ARM6-NEXT:    movlt r10, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
 ; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -8794,8 +8799,9 @@ define i64 @test_min_i64() {
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r0, #1
 ; CHECK-THUMB7-NEXT:    mov.w r10, #1
-; CHECK-THUMB7-NEXT:    it lt
-; CHECK-THUMB7-NEXT:    movlt r10, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
@@ -8891,7 +8897,7 @@ define i64 @test_min_i64() {
 ; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
 ; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    blt .LBB41_5
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB41_5
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
 ; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
@@ -8968,7 +8974,8 @@ define i64 @test_umax_i64() {
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlo r0, #1
 ; CHECK-ARM8-NEXT:    mov r10, #1
-; CHECK-ARM8-NEXT:    movlo r10, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
 ; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -9035,7 +9042,8 @@ define i64 @test_umax_i64() {
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlo r0, #1
 ; CHECK-ARM6-NEXT:    mov r10, #1
-; CHECK-ARM6-NEXT:    movlo r10, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
 ; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -9107,8 +9115,9 @@ define i64 @test_umax_i64() {
 ; CHECK-THUMB7-NEXT:    mov r8, r2
 ; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    mov.w r10, #1
-; CHECK-THUMB7-NEXT:    it lo
-; CHECK-THUMB7-NEXT:    movlo r10, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
@@ -9204,7 +9213,7 @@ define i64 @test_umax_i64() {
 ; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
 ; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    blo .LBB42_5
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB42_5
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
 ; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
@@ -9281,7 +9290,8 @@ define i64 @test_umin_i64() {
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlo r0, #1
 ; CHECK-ARM8-NEXT:    mov r10, #1
-; CHECK-ARM8-NEXT:    movlo r10, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
 ; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -9348,7 +9358,8 @@ define i64 @test_umin_i64() {
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlo r0, #1
 ; CHECK-ARM6-NEXT:    mov r10, #1
-; CHECK-ARM6-NEXT:    movlo r10, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
 ; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
@@ -9420,8 +9431,9 @@ define i64 @test_umin_i64() {
 ; CHECK-THUMB7-NEXT:    it lo
 ; CHECK-THUMB7-NEXT:    movlo r0, #1
 ; CHECK-THUMB7-NEXT:    mov.w r10, #1
-; CHECK-THUMB7-NEXT:    it lo
-; CHECK-THUMB7-NEXT:    movlo r10, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
@@ -9517,7 +9529,7 @@ define i64 @test_umin_i64() {
 ; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
 ; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    blo .LBB43_5
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB43_5
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
 ; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload

diff  --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll
index 5aeb99695a5fe8..91a74e535a2218 100644
--- a/llvm/test/CodeGen/ARM/bfi.ll
+++ b/llvm/test/CodeGen/ARM/bfi.ll
@@ -204,11 +204,10 @@ define i32 @f12(i32 %x, i32 %y) {
 define i32 @f13(i32 %x, i32 %y) {
 ; CHECK-LABEL: f13:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    and r0, r0, #4
-; CHECK-NEXT:    bic r1, r1, #255
-; CHECK-NEXT:    cmp r0, #42
-; CHECK-NEXT:    orrne r1, r1, #16
-; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    and r2, r0, #4
+; CHECK-NEXT:    bic r0, r1, #255
+; CHECK-NEXT:    cmp r2, #42
+; CHECK-NEXT:    orrne r0, r0, #16
 ; CHECK-NEXT:    bx lr
   %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
   %and = and i32 %x, 4

diff  --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll
index fb8da3724ede8f..805955d3e83069 100644
--- a/llvm/test/CodeGen/ARM/cmov_fp16.ll
+++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll
@@ -5,12 +5,12 @@
 define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_ne:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vmov s0, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    vmov s2, r1
+; CHECK-NEXT:    vmov s2, r0
 ; CHECK-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-NEXT:    vseleq.f16 s0, s2, s0
+; CHECK-NEXT:    vseleq.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    bx lr
 entry:
@@ -26,12 +26,12 @@ entry:
 define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_eq:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov s0, r1
+; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vmov s2, r1
 ; CHECK-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-NEXT:    vseleq.f16 s0, s2, s0
+; CHECK-NEXT:    vseleq.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    bx lr
 entry:
@@ -47,12 +47,12 @@ entry:
 define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_gt:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov s0, r1
+; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vmov s2, r1
 ; CHECK-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-NEXT:    vselgt.f16 s0, s2, s0
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    bx lr
 entry:
@@ -68,12 +68,12 @@ entry:
 define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_ge:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov s0, r1
+; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    vmov s2, r1
 ; CHECK-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-NEXT:    vselge.f16 s0, s2, s0
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    bx lr
 entry:
@@ -89,12 +89,12 @@ entry:
 define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_lt:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vmov s0, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    vmov s2, r1
+; CHECK-NEXT:    vmov s2, r0
 ; CHECK-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-NEXT:    vselge.f16 s0, s2, s0
+; CHECK-NEXT:    vselge.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    bx lr
 entry:
@@ -110,12 +110,12 @@ entry:
 define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_le:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vmov s0, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    vmov s2, r1
+; CHECK-NEXT:    vmov s2, r0
 ; CHECK-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-NEXT:    vselgt.f16 s0, s2, s0
+; CHECK-NEXT:    vselgt.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 r0, s0
 ; CHECK-NEXT:    bx lr
 entry:
@@ -131,25 +131,25 @@ entry:
 define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-THUMB-LABEL: test_hi:
 ; CHECK-THUMB:       @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT:    vmov s2, r1
+; CHECK-THUMB-NEXT:    vmov s0, r1
 ; CHECK-THUMB-NEXT:    cmp r2, r3
-; CHECK-THUMB-NEXT:    vmov s0, r0
-; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT:    vmov s2, r0
 ; CHECK-THUMB-NEXT:    vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
 ; CHECK-THUMB-NEXT:    it hi
-; CHECK-THUMB-NEXT:    vmovhi.f32 s2, s0
-; CHECK-THUMB-NEXT:    vmov.f16 r0, s2
+; CHECK-THUMB-NEXT:    vmovhi.f32 s0, s2
+; CHECK-THUMB-NEXT:    vmov.f16 r0, s0
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: test_hi:
 ; CHECK-ARM:       @ %bb.0: @ %entry
-; CHECK-ARM-NEXT:    vmov s0, r0
+; CHECK-ARM-NEXT:    vmov s0, r1
 ; CHECK-ARM-NEXT:    cmp r2, r3
-; CHECK-ARM-NEXT:    vmov s2, r1
+; CHECK-ARM-NEXT:    vmov s2, r0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT:    vmovhi.f32 s2, s0
-; CHECK-ARM-NEXT:    vmov.f16 r0, s2
+; CHECK-ARM-NEXT:    vmovhi.f32 s0, s2
+; CHECK-ARM-NEXT:    vmov.f16 r0, s0
 ; CHECK-ARM-NEXT:    bx lr
 entry:
   %x.half = uitofp i32 %x to half
@@ -164,25 +164,25 @@ entry:
 define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-THUMB-LABEL: test_hs:
 ; CHECK-THUMB:       @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT:    vmov s2, r1
+; CHECK-THUMB-NEXT:    vmov s0, r1
 ; CHECK-THUMB-NEXT:    cmp r2, r3
-; CHECK-THUMB-NEXT:    vmov s0, r0
-; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT:    vmov s2, r0
 ; CHECK-THUMB-NEXT:    vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
 ; CHECK-THUMB-NEXT:    it hs
-; CHECK-THUMB-NEXT:    vmovhs.f32 s2, s0
-; CHECK-THUMB-NEXT:    vmov.f16 r0, s2
+; CHECK-THUMB-NEXT:    vmovhs.f32 s0, s2
+; CHECK-THUMB-NEXT:    vmov.f16 r0, s0
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: test_hs:
 ; CHECK-ARM:       @ %bb.0: @ %entry
-; CHECK-ARM-NEXT:    vmov s0, r0
+; CHECK-ARM-NEXT:    vmov s0, r1
 ; CHECK-ARM-NEXT:    cmp r2, r3
-; CHECK-ARM-NEXT:    vmov s2, r1
+; CHECK-ARM-NEXT:    vmov s2, r0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT:    vmovhs.f32 s2, s0
-; CHECK-ARM-NEXT:    vmov.f16 r0, s2
+; CHECK-ARM-NEXT:    vmovhs.f32 s0, s2
+; CHECK-ARM-NEXT:    vmov.f16 r0, s0
 ; CHECK-ARM-NEXT:    bx lr
 entry:
   %x.half = uitofp i32 %x to half
@@ -197,25 +197,25 @@ entry:
 define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-THUMB-LABEL: test_lo:
 ; CHECK-THUMB:       @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT:    vmov s2, r1
+; CHECK-THUMB-NEXT:    vmov s0, r1
 ; CHECK-THUMB-NEXT:    cmp r2, r3
-; CHECK-THUMB-NEXT:    vmov s0, r0
-; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT:    vmov s2, r0
 ; CHECK-THUMB-NEXT:    vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
 ; CHECK-THUMB-NEXT:    it lo
-; CHECK-THUMB-NEXT:    vmovlo.f32 s2, s0
-; CHECK-THUMB-NEXT:    vmov.f16 r0, s2
+; CHECK-THUMB-NEXT:    vmovlo.f32 s0, s2
+; CHECK-THUMB-NEXT:    vmov.f16 r0, s0
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: test_lo:
 ; CHECK-ARM:       @ %bb.0: @ %entry
-; CHECK-ARM-NEXT:    vmov s0, r0
+; CHECK-ARM-NEXT:    vmov s0, r1
 ; CHECK-ARM-NEXT:    cmp r2, r3
-; CHECK-ARM-NEXT:    vmov s2, r1
+; CHECK-ARM-NEXT:    vmov s2, r0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT:    vmovlo.f32 s2, s0
-; CHECK-ARM-NEXT:    vmov.f16 r0, s2
+; CHECK-ARM-NEXT:    vmovlo.f32 s0, s2
+; CHECK-ARM-NEXT:    vmov.f16 r0, s0
 ; CHECK-ARM-NEXT:    bx lr
 entry:
   %x.half = uitofp i32 %x to half
@@ -230,25 +230,25 @@ entry:
 define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; CHECK-THUMB-LABEL: test_ls:
 ; CHECK-THUMB:       @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT:    vmov s2, r1
+; CHECK-THUMB-NEXT:    vmov s0, r1
 ; CHECK-THUMB-NEXT:    cmp r2, r3
-; CHECK-THUMB-NEXT:    vmov s0, r0
-; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT:    vmov s2, r0
 ; CHECK-THUMB-NEXT:    vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT:    vcvt.f16.u32 s2, s2
 ; CHECK-THUMB-NEXT:    it ls
-; CHECK-THUMB-NEXT:    vmovls.f32 s2, s0
-; CHECK-THUMB-NEXT:    vmov.f16 r0, s2
+; CHECK-THUMB-NEXT:    vmovls.f32 s0, s2
+; CHECK-THUMB-NEXT:    vmov.f16 r0, s0
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: test_ls:
 ; CHECK-ARM:       @ %bb.0: @ %entry
-; CHECK-ARM-NEXT:    vmov s0, r0
+; CHECK-ARM-NEXT:    vmov s0, r1
 ; CHECK-ARM-NEXT:    cmp r2, r3
-; CHECK-ARM-NEXT:    vmov s2, r1
+; CHECK-ARM-NEXT:    vmov s2, r0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s0, s0
 ; CHECK-ARM-NEXT:    vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT:    vmovls.f32 s2, s0
-; CHECK-ARM-NEXT:    vmov.f16 r0, s2
+; CHECK-ARM-NEXT:    vmovls.f32 s0, s2
+; CHECK-ARM-NEXT:    vmov.f16 r0, s0
 ; CHECK-ARM-NEXT:    bx lr
 entry:
   %x.half = uitofp i32 %x to half

diff  --git a/llvm/test/CodeGen/ARM/cse-call.ll b/llvm/test/CodeGen/ARM/cse-call.ll
index 25fa477e5c2d38..71cfa3b9da9302 100644
--- a/llvm/test/CodeGen/ARM/cse-call.ll
+++ b/llvm/test/CodeGen/ARM/cse-call.ll
@@ -25,9 +25,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; CHECK-T1: cmp
 
 ; CHECK: S_trimzeros
-; CHECK-T1: S_trimzeros
-; CHECK-NOT: moveq
-; CHECK-T1-NOT: beq
+; CHECK: cmp
 ; CHECK: strlen
 
 @F_floatmul.man1 = external global [200 x i8], align 1

diff  --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 76adc61c5971fd..d9663a1c148fc5 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -223,39 +223,42 @@ define i64 @test_i64(i64 %a) {
 ; CHECK-6M:       @ %bb.0:
 ; CHECK-6M-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-6M-NEXT:    push {r4, r5, r7, lr}
-; CHECK-6M-NEXT:    mov r2, r0
 ; CHECK-6M-NEXT:    ldr r5, .LCPI3_0
-; CHECK-6M-NEXT:    adr r3, .LCPI3_1
-; CHECK-6M-NEXT:    movs r0, #32
-; CHECK-6M-NEXT:    cmp r1, #0
-; CHECK-6M-NEXT:    mov r4, r0
-; CHECK-6M-NEXT:    beq .LBB3_2
+; CHECK-6M-NEXT:    adr r4, .LCPI3_1
+; CHECK-6M-NEXT:    movs r3, #32
+; CHECK-6M-NEXT:    cmp r0, #0
+; CHECK-6M-NEXT:    mov r2, r3
+; CHECK-6M-NEXT:    bne .LBB3_5
 ; CHECK-6M-NEXT:  @ %bb.1:
-; CHECK-6M-NEXT:    rsbs r4, r1, #0
-; CHECK-6M-NEXT:    ands r4, r1
-; CHECK-6M-NEXT:    muls r4, r5, r4
-; CHECK-6M-NEXT:    lsrs r1, r4, #27
-; CHECK-6M-NEXT:    ldrb r4, [r3, r1]
+; CHECK-6M-NEXT:    cmp r1, #0
+; CHECK-6M-NEXT:    bne .LBB3_6
 ; CHECK-6M-NEXT:  .LBB3_2:
-; CHECK-6M-NEXT:    adds r4, #32
-; CHECK-6M-NEXT:    rsbs r1, r2, #0
-; CHECK-6M-NEXT:    ands r1, r2
-; CHECK-6M-NEXT:    muls r5, r1, r5
-; CHECK-6M-NEXT:    lsrs r1, r5, #27
-; CHECK-6M-NEXT:    cmp r2, #0
-; CHECK-6M-NEXT:    bne .LBB3_5
-; CHECK-6M-NEXT:  @ %bb.3:
-; CHECK-6M-NEXT:    beq .LBB3_6
+; CHECK-6M-NEXT:    cmp r0, #0
+; CHECK-6M-NEXT:    bne .LBB3_4
+; CHECK-6M-NEXT:  .LBB3_3:
+; CHECK-6M-NEXT:    adds r3, #32
+; CHECK-6M-NEXT:    mov r2, r3
 ; CHECK-6M-NEXT:  .LBB3_4:
 ; CHECK-6M-NEXT:    movs r1, #0
+; CHECK-6M-NEXT:    mov r0, r2
 ; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-6M-NEXT:  .LBB3_5:
-; CHECK-6M-NEXT:    ldrb r0, [r3, r1]
-; CHECK-6M-NEXT:    bne .LBB3_4
+; CHECK-6M-NEXT:    rsbs r2, r0, #0
+; CHECK-6M-NEXT:    ands r2, r0
+; CHECK-6M-NEXT:    muls r2, r5, r2
+; CHECK-6M-NEXT:    lsrs r2, r2, #27
+; CHECK-6M-NEXT:    ldrb r2, [r4, r2]
+; CHECK-6M-NEXT:    cmp r1, #0
+; CHECK-6M-NEXT:    beq .LBB3_2
 ; CHECK-6M-NEXT:  .LBB3_6:
-; CHECK-6M-NEXT:    mov r0, r4
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT:    rsbs r3, r1, #0
+; CHECK-6M-NEXT:    ands r3, r1
+; CHECK-6M-NEXT:    muls r5, r3, r5
+; CHECK-6M-NEXT:    lsrs r1, r5, #27
+; CHECK-6M-NEXT:    ldrb r3, [r4, r1]
+; CHECK-6M-NEXT:    cmp r0, #0
+; CHECK-6M-NEXT:    beq .LBB3_3
+; CHECK-6M-NEXT:    b .LBB3_4
 ; CHECK-6M-NEXT:    .p2align 2
 ; CHECK-6M-NEXT:  @ %bb.7:
 ; CHECK-6M-NEXT:  .LCPI3_0:
@@ -267,39 +270,40 @@ define i64 @test_i64(i64 %a) {
 ; CHECK-8MBASE:       @ %bb.0:
 ; CHECK-8MBASE-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-8MBASE-NEXT:    push {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT:    mov r2, r0
 ; CHECK-8MBASE-NEXT:    movw r5, #46385
 ; CHECK-8MBASE-NEXT:    movt r5, #1916
-; CHECK-8MBASE-NEXT:    adr r3, .LCPI3_0
-; CHECK-8MBASE-NEXT:    movs r0, #32
-; CHECK-8MBASE-NEXT:    mov r4, r0
-; CHECK-8MBASE-NEXT:    cbz r1, .LBB3_2
+; CHECK-8MBASE-NEXT:    adr r4, .LCPI3_0
+; CHECK-8MBASE-NEXT:    movs r3, #32
+; CHECK-8MBASE-NEXT:    mov r2, r3
+; CHECK-8MBASE-NEXT:    cbnz r0, .LBB3_5
 ; CHECK-8MBASE-NEXT:  @ %bb.1:
-; CHECK-8MBASE-NEXT:    rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT:    ands r4, r1
-; CHECK-8MBASE-NEXT:    muls r4, r5, r4
-; CHECK-8MBASE-NEXT:    lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT:    ldrb r4, [r3, r1]
+; CHECK-8MBASE-NEXT:    cbnz r1, .LBB3_6
 ; CHECK-8MBASE-NEXT:  .LBB3_2:
-; CHECK-8MBASE-NEXT:    adds r4, #32
-; CHECK-8MBASE-NEXT:    rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT:    ands r1, r2
-; CHECK-8MBASE-NEXT:    muls r5, r1, r5
-; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
-; CHECK-8MBASE-NEXT:    cmp r2, #0
-; CHECK-8MBASE-NEXT:    bne .LBB3_5
-; CHECK-8MBASE-NEXT:  @ %bb.3:
-; CHECK-8MBASE-NEXT:    beq .LBB3_6
+; CHECK-8MBASE-NEXT:    cbnz r0, .LBB3_4
+; CHECK-8MBASE-NEXT:  .LBB3_3:
+; CHECK-8MBASE-NEXT:    adds r3, #32
+; CHECK-8MBASE-NEXT:    mov r2, r3
 ; CHECK-8MBASE-NEXT:  .LBB3_4:
 ; CHECK-8MBASE-NEXT:    movs r1, #0
+; CHECK-8MBASE-NEXT:    mov r0, r2
 ; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-8MBASE-NEXT:  .LBB3_5:
-; CHECK-8MBASE-NEXT:    ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT:    bne .LBB3_4
+; CHECK-8MBASE-NEXT:    rsbs r2, r0, #0
+; CHECK-8MBASE-NEXT:    ands r2, r0
+; CHECK-8MBASE-NEXT:    muls r2, r5, r2
+; CHECK-8MBASE-NEXT:    lsrs r2, r2, #27
+; CHECK-8MBASE-NEXT:    ldrb r2, [r4, r2]
+; CHECK-8MBASE-NEXT:    cmp r1, #0
+; CHECK-8MBASE-NEXT:    beq .LBB3_2
 ; CHECK-8MBASE-NEXT:  .LBB3_6:
-; CHECK-8MBASE-NEXT:    mov r0, r4
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT:    rsbs r3, r1, #0
+; CHECK-8MBASE-NEXT:    ands r3, r1
+; CHECK-8MBASE-NEXT:    muls r5, r3, r5
+; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT:    ldrb r3, [r4, r1]
+; CHECK-8MBASE-NEXT:    cmp r0, #0
+; CHECK-8MBASE-NEXT:    beq .LBB3_3
+; CHECK-8MBASE-NEXT:    b .LBB3_4
 ; CHECK-8MBASE-NEXT:    .p2align 2
 ; CHECK-8MBASE-NEXT:  @ %bb.7:
 ; CHECK-8MBASE-NEXT:  .LCPI3_0:
@@ -490,39 +494,42 @@ define i64 @test_i64_zero_undef(i64 %a) {
 ; CHECK-6M:       @ %bb.0:
 ; CHECK-6M-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-6M-NEXT:    push {r4, r5, r7, lr}
-; CHECK-6M-NEXT:    mov r2, r0
 ; CHECK-6M-NEXT:    ldr r5, .LCPI7_0
-; CHECK-6M-NEXT:    adr r3, .LCPI7_1
-; CHECK-6M-NEXT:    movs r0, #32
-; CHECK-6M-NEXT:    cmp r1, #0
-; CHECK-6M-NEXT:    mov r4, r0
-; CHECK-6M-NEXT:    beq .LBB7_2
+; CHECK-6M-NEXT:    adr r4, .LCPI7_1
+; CHECK-6M-NEXT:    movs r3, #32
+; CHECK-6M-NEXT:    cmp r0, #0
+; CHECK-6M-NEXT:    mov r2, r3
+; CHECK-6M-NEXT:    bne .LBB7_5
 ; CHECK-6M-NEXT:  @ %bb.1:
-; CHECK-6M-NEXT:    rsbs r4, r1, #0
-; CHECK-6M-NEXT:    ands r4, r1
-; CHECK-6M-NEXT:    muls r4, r5, r4
-; CHECK-6M-NEXT:    lsrs r1, r4, #27
-; CHECK-6M-NEXT:    ldrb r4, [r3, r1]
+; CHECK-6M-NEXT:    cmp r1, #0
+; CHECK-6M-NEXT:    bne .LBB7_6
 ; CHECK-6M-NEXT:  .LBB7_2:
-; CHECK-6M-NEXT:    adds r4, #32
-; CHECK-6M-NEXT:    rsbs r1, r2, #0
-; CHECK-6M-NEXT:    ands r1, r2
-; CHECK-6M-NEXT:    muls r5, r1, r5
-; CHECK-6M-NEXT:    lsrs r1, r5, #27
-; CHECK-6M-NEXT:    cmp r2, #0
-; CHECK-6M-NEXT:    bne .LBB7_5
-; CHECK-6M-NEXT:  @ %bb.3:
-; CHECK-6M-NEXT:    beq .LBB7_6
+; CHECK-6M-NEXT:    cmp r0, #0
+; CHECK-6M-NEXT:    bne .LBB7_4
+; CHECK-6M-NEXT:  .LBB7_3:
+; CHECK-6M-NEXT:    adds r3, #32
+; CHECK-6M-NEXT:    mov r2, r3
 ; CHECK-6M-NEXT:  .LBB7_4:
 ; CHECK-6M-NEXT:    movs r1, #0
+; CHECK-6M-NEXT:    mov r0, r2
 ; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-6M-NEXT:  .LBB7_5:
-; CHECK-6M-NEXT:    ldrb r0, [r3, r1]
-; CHECK-6M-NEXT:    bne .LBB7_4
+; CHECK-6M-NEXT:    rsbs r2, r0, #0
+; CHECK-6M-NEXT:    ands r2, r0
+; CHECK-6M-NEXT:    muls r2, r5, r2
+; CHECK-6M-NEXT:    lsrs r2, r2, #27
+; CHECK-6M-NEXT:    ldrb r2, [r4, r2]
+; CHECK-6M-NEXT:    cmp r1, #0
+; CHECK-6M-NEXT:    beq .LBB7_2
 ; CHECK-6M-NEXT:  .LBB7_6:
-; CHECK-6M-NEXT:    mov r0, r4
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT:    rsbs r3, r1, #0
+; CHECK-6M-NEXT:    ands r3, r1
+; CHECK-6M-NEXT:    muls r5, r3, r5
+; CHECK-6M-NEXT:    lsrs r1, r5, #27
+; CHECK-6M-NEXT:    ldrb r3, [r4, r1]
+; CHECK-6M-NEXT:    cmp r0, #0
+; CHECK-6M-NEXT:    beq .LBB7_3
+; CHECK-6M-NEXT:    b .LBB7_4
 ; CHECK-6M-NEXT:    .p2align 2
 ; CHECK-6M-NEXT:  @ %bb.7:
 ; CHECK-6M-NEXT:  .LCPI7_0:
@@ -534,39 +541,40 @@ define i64 @test_i64_zero_undef(i64 %a) {
 ; CHECK-8MBASE:       @ %bb.0:
 ; CHECK-8MBASE-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-8MBASE-NEXT:    push {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT:    mov r2, r0
 ; CHECK-8MBASE-NEXT:    movw r5, #46385
 ; CHECK-8MBASE-NEXT:    movt r5, #1916
-; CHECK-8MBASE-NEXT:    adr r3, .LCPI7_0
-; CHECK-8MBASE-NEXT:    movs r0, #32
-; CHECK-8MBASE-NEXT:    mov r4, r0
-; CHECK-8MBASE-NEXT:    cbz r1, .LBB7_2
+; CHECK-8MBASE-NEXT:    adr r4, .LCPI7_0
+; CHECK-8MBASE-NEXT:    movs r3, #32
+; CHECK-8MBASE-NEXT:    mov r2, r3
+; CHECK-8MBASE-NEXT:    cbnz r0, .LBB7_5
 ; CHECK-8MBASE-NEXT:  @ %bb.1:
-; CHECK-8MBASE-NEXT:    rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT:    ands r4, r1
-; CHECK-8MBASE-NEXT:    muls r4, r5, r4
-; CHECK-8MBASE-NEXT:    lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT:    ldrb r4, [r3, r1]
+; CHECK-8MBASE-NEXT:    cbnz r1, .LBB7_6
 ; CHECK-8MBASE-NEXT:  .LBB7_2:
-; CHECK-8MBASE-NEXT:    adds r4, #32
-; CHECK-8MBASE-NEXT:    rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT:    ands r1, r2
-; CHECK-8MBASE-NEXT:    muls r5, r1, r5
-; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
-; CHECK-8MBASE-NEXT:    cmp r2, #0
-; CHECK-8MBASE-NEXT:    bne .LBB7_5
-; CHECK-8MBASE-NEXT:  @ %bb.3:
-; CHECK-8MBASE-NEXT:    beq .LBB7_6
+; CHECK-8MBASE-NEXT:    cbnz r0, .LBB7_4
+; CHECK-8MBASE-NEXT:  .LBB7_3:
+; CHECK-8MBASE-NEXT:    adds r3, #32
+; CHECK-8MBASE-NEXT:    mov r2, r3
 ; CHECK-8MBASE-NEXT:  .LBB7_4:
 ; CHECK-8MBASE-NEXT:    movs r1, #0
+; CHECK-8MBASE-NEXT:    mov r0, r2
 ; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-8MBASE-NEXT:  .LBB7_5:
-; CHECK-8MBASE-NEXT:    ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT:    bne .LBB7_4
+; CHECK-8MBASE-NEXT:    rsbs r2, r0, #0
+; CHECK-8MBASE-NEXT:    ands r2, r0
+; CHECK-8MBASE-NEXT:    muls r2, r5, r2
+; CHECK-8MBASE-NEXT:    lsrs r2, r2, #27
+; CHECK-8MBASE-NEXT:    ldrb r2, [r4, r2]
+; CHECK-8MBASE-NEXT:    cmp r1, #0
+; CHECK-8MBASE-NEXT:    beq .LBB7_2
 ; CHECK-8MBASE-NEXT:  .LBB7_6:
-; CHECK-8MBASE-NEXT:    mov r0, r4
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT:    rsbs r3, r1, #0
+; CHECK-8MBASE-NEXT:    ands r3, r1
+; CHECK-8MBASE-NEXT:    muls r5, r3, r5
+; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT:    ldrb r3, [r4, r1]
+; CHECK-8MBASE-NEXT:    cmp r0, #0
+; CHECK-8MBASE-NEXT:    beq .LBB7_3
+; CHECK-8MBASE-NEXT:    b .LBB7_4
 ; CHECK-8MBASE-NEXT:    .p2align 2
 ; CHECK-8MBASE-NEXT:  @ %bb.7:
 ; CHECK-8MBASE-NEXT:  .LCPI7_0:

diff  --git a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
index 90bb02fdc0cd4b..3f2b40460917e4 100644
--- a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
+++ b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
@@ -4,12 +4,12 @@
 define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
 ; CHECK-LABEL: fadd_select_fneg_fneg_f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov s0, r2
+; CHECK-NEXT:    vmov s0, r3
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vmov s2, r2
 ; CHECK-NEXT:    vmov s4, r1
-; CHECK-NEXT:    vmov s2, r3
-; CHECK-NEXT:    vseleq.f32 s0, s4, s0
-; CHECK-NEXT:    vsub.f32 s0, s2, s0
+; CHECK-NEXT:    vseleq.f32 s2, s4, s2
+; CHECK-NEXT:    vsub.f32 s0, s0, s2
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
   %cmp = icmp eq i32 %arg0, 0
@@ -248,10 +248,10 @@ define half @fadd_select_fsub_select_f16(i32 %arg0, half %x, half %y, half %z) {
 define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) {
 ; CHECK-LABEL: fadd_select_fneg_negk_f16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.f16 s0, #4.000000e+00
-; CHECK-NEXT:    vmov.f16 s2, r1
+; CHECK-NEXT:    vmov.f16 s0, r1
+; CHECK-NEXT:    vmov.f16 s2, #4.000000e+00
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vseleq.f16 s0, s2, s0
+; CHECK-NEXT:    vseleq.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 s2, r2
 ; CHECK-NEXT:    vsub.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0
@@ -266,10 +266,10 @@ define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) {
 define half @fadd_select_fneg_posk_f16(i32 %arg0, half %x, half %y) {
 ; CHECK-LABEL: fadd_select_fneg_posk_f16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.f16 s0, #-4.000000e+00
-; CHECK-NEXT:    vmov.f16 s2, r1
+; CHECK-NEXT:    vmov.f16 s0, r1
+; CHECK-NEXT:    vmov.f16 s2, #-4.000000e+00
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vseleq.f16 s0, s2, s0
+; CHECK-NEXT:    vseleq.f16 s0, s0, s2
 ; CHECK-NEXT:    vmov.f16 s2, r2
 ; CHECK-NEXT:    vsub.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov r0, s0

diff  --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll
index ad39cb744620de..908dbd7a11a6b6 100644
--- a/llvm/test/CodeGen/ARM/fcmp-xo.ll
+++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll
@@ -69,12 +69,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr {
 define arm_aapcs_vfpcc double @double1(double %a0) local_unnamed_addr {
 ; CHECK-LABEL: double1:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.f64 d16, #1.000000e+00
-; CHECK-NEXT:    vcmp.f64 d16, d0
+; CHECK-NEXT:    vmov.f64 d18, #1.000000e+00
+; CHECK-NEXT:    vcmp.f64 d18, d0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov.f64 d17, #5.000000e-01
-; CHECK-NEXT:    vmov.f64 d18, #-5.000000e-01
-; CHECK-NEXT:    vselgt.f64 d0, d18, d17
+; CHECK-NEXT:    vmov.f64 d16, #5.000000e-01
+; CHECK-NEXT:    vmov.f64 d17, #-5.000000e-01
+; CHECK-NEXT:    vselgt.f64 d0, d17, d16
 ; CHECK-NEXT:    bx lr
   %1 = fcmp nsz olt double %a0, 1.000000e+00
   %2 = select i1 %1, double -5.000000e-01, double 5.000000e-01
@@ -87,12 +87,12 @@ define arm_aapcs_vfpcc double @double128(double %a0) local_unnamed_addr {
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    movt r0, #16480
-; CHECK-NEXT:    vmov.f64 d17, #5.000000e-01
-; CHECK-NEXT:    vmov d16, r1, r0
-; CHECK-NEXT:    vcmp.f64 d16, d0
+; CHECK-NEXT:    vmov.f64 d16, #5.000000e-01
+; CHECK-NEXT:    vmov d18, r1, r0
+; CHECK-NEXT:    vcmp.f64 d18, d0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov.f64 d18, #-5.000000e-01
-; CHECK-NEXT:    vselgt.f64 d0, d18, d17
+; CHECK-NEXT:    vmov.f64 d17, #-5.000000e-01
+; CHECK-NEXT:    vselgt.f64 d0, d17, d16
 ; CHECK-NEXT:    bx lr
   %1 = fcmp nsz olt double %a0, 128.000000e+00
   %2 = select i1 %1, double -5.000000e-01, double 5.000000e-01

diff  --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 478b98dfac80f3..8bd8aa7b34dec2 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -8,24 +8,27 @@
 define i32 @stest_f64i32(double %x) {
 ; SOFT-LABEL: stest_f64i32:
 ; SOFT:       @ %bb.0: @ %entry
-; SOFT-NEXT:    .save {r4, lr}
-; SOFT-NEXT:    push {r4, lr}
+; SOFT-NEXT:    .save {r4, r5, r7, lr}
+; SOFT-NEXT:    push {r4, r5, r7, lr}
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    movs r2, #0
-; SOFT-NEXT:    ldr r3, .LCPI0_0
-; SOFT-NEXT:    subs r4, r0, r3
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    blt .LBB0_2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    movs r3, #0
+; SOFT-NEXT:    ldr r4, .LCPI0_0
+; SOFT-NEXT:    subs r5, r0, r4
+; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    sbcs r5, r3
+; SOFT-NEXT:    mov r5, r2
+; SOFT-NEXT:    bge .LBB0_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB0_8
 ; SOFT-NEXT:  .LBB0_2: @ %entry
-; SOFT-NEXT:    blt .LBB0_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB0_4
+; SOFT-NEXT:  .LBB0_3: @ %entry
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB0_4: @ %entry
-; SOFT-NEXT:    mvns r3, r2
-; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    mvns r3, r3
 ; SOFT-NEXT:    lsls r2, r2, #31
 ; SOFT-NEXT:    subs r4, r2, r0
 ; SOFT-NEXT:    sbcs r3, r1
@@ -33,9 +36,18 @@ define i32 @stest_f64i32(double %x) {
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:  .LBB0_6: @ %entry
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
+; SOFT-NEXT:  .LBB0_7: @ %entry
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB0_2
+; SOFT-NEXT:  .LBB0_8: @ %entry
+; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB0_3
+; SOFT-NEXT:    b .LBB0_4
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI0_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -123,33 +135,45 @@ define i32 @ustest_f64i32(double %x) {
 ; SOFT-NEXT:    .save {r4, lr}
 ; SOFT-NEXT:    push {r4, lr}
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    movs r2, #0
-; SOFT-NEXT:    mvns r3, r2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    movs r3, #0
 ; SOFT-NEXT:    adds r4, r0, #1
 ; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    blt .LBB2_2
+; SOFT-NEXT:    sbcs r4, r3
+; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    bge .LBB2_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB2_8
 ; SOFT-NEXT:  .LBB2_2: @ %entry
-; SOFT-NEXT:    blt .LBB2_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB2_4
+; SOFT-NEXT:  .LBB2_3: @ %entry
+; SOFT-NEXT:    mvns r0, r3
 ; SOFT-NEXT:  .LBB2_4: @ %entry
-; SOFT-NEXT:    rsbs r3, r0, #0
-; SOFT-NEXT:    mov r3, r2
-; SOFT-NEXT:    sbcs r3, r1
-; SOFT-NEXT:    blt .LBB2_7
+; SOFT-NEXT:    rsbs r4, r0, #0
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:    sbcs r4, r1
+; SOFT-NEXT:    bge .LBB2_9
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    beq .LBB2_8
+; SOFT-NEXT:    beq .LBB2_10
 ; SOFT-NEXT:  .LBB2_6: @ %entry
 ; SOFT-NEXT:    pop {r4, pc}
-; SOFT-NEXT:  .LBB2_7:
-; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:  .LBB2_7: @ %entry
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB2_2
+; SOFT-NEXT:  .LBB2_8: @ %entry
+; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB2_3
+; SOFT-NEXT:    b .LBB2_4
+; SOFT-NEXT:  .LBB2_9: @ %entry
+; SOFT-NEXT:    mov r2, r3
 ; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB2_6
-; SOFT-NEXT:  .LBB2_8: @ %entry
+; SOFT-NEXT:  .LBB2_10: @ %entry
 ; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:    pop {r4, pc}
 ;
@@ -196,24 +220,27 @@ entry:
 define i32 @stest_f32i32(float %x) {
 ; SOFT-LABEL: stest_f32i32:
 ; SOFT:       @ %bb.0: @ %entry
-; SOFT-NEXT:    .save {r4, lr}
-; SOFT-NEXT:    push {r4, lr}
+; SOFT-NEXT:    .save {r4, r5, r7, lr}
+; SOFT-NEXT:    push {r4, r5, r7, lr}
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    movs r2, #0
-; SOFT-NEXT:    ldr r3, .LCPI3_0
-; SOFT-NEXT:    subs r4, r0, r3
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    blt .LBB3_2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    movs r3, #0
+; SOFT-NEXT:    ldr r4, .LCPI3_0
+; SOFT-NEXT:    subs r5, r0, r4
+; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    sbcs r5, r3
+; SOFT-NEXT:    mov r5, r2
+; SOFT-NEXT:    bge .LBB3_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB3_8
 ; SOFT-NEXT:  .LBB3_2: @ %entry
-; SOFT-NEXT:    blt .LBB3_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB3_4
+; SOFT-NEXT:  .LBB3_3: @ %entry
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB3_4: @ %entry
-; SOFT-NEXT:    mvns r3, r2
-; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    mvns r3, r3
 ; SOFT-NEXT:    lsls r2, r2, #31
 ; SOFT-NEXT:    subs r4, r2, r0
 ; SOFT-NEXT:    sbcs r3, r1
@@ -221,9 +248,18 @@ define i32 @stest_f32i32(float %x) {
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:  .LBB3_6: @ %entry
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
+; SOFT-NEXT:  .LBB3_7: @ %entry
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB3_2
+; SOFT-NEXT:  .LBB3_8: @ %entry
+; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB3_3
+; SOFT-NEXT:    b .LBB3_4
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI3_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -276,33 +312,45 @@ define i32 @ustest_f32i32(float %x) {
 ; SOFT-NEXT:    .save {r4, lr}
 ; SOFT-NEXT:    push {r4, lr}
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    movs r2, #0
-; SOFT-NEXT:    mvns r3, r2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    movs r3, #0
 ; SOFT-NEXT:    adds r4, r0, #1
 ; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    blt .LBB5_2
+; SOFT-NEXT:    sbcs r4, r3
+; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    bge .LBB5_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB5_8
 ; SOFT-NEXT:  .LBB5_2: @ %entry
-; SOFT-NEXT:    blt .LBB5_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB5_4
+; SOFT-NEXT:  .LBB5_3: @ %entry
+; SOFT-NEXT:    mvns r0, r3
 ; SOFT-NEXT:  .LBB5_4: @ %entry
-; SOFT-NEXT:    rsbs r3, r0, #0
-; SOFT-NEXT:    mov r3, r2
-; SOFT-NEXT:    sbcs r3, r1
-; SOFT-NEXT:    blt .LBB5_7
+; SOFT-NEXT:    rsbs r4, r0, #0
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:    sbcs r4, r1
+; SOFT-NEXT:    bge .LBB5_9
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    beq .LBB5_8
+; SOFT-NEXT:    beq .LBB5_10
 ; SOFT-NEXT:  .LBB5_6: @ %entry
 ; SOFT-NEXT:    pop {r4, pc}
-; SOFT-NEXT:  .LBB5_7:
-; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:  .LBB5_7: @ %entry
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB5_2
+; SOFT-NEXT:  .LBB5_8: @ %entry
+; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB5_3
+; SOFT-NEXT:    b .LBB5_4
+; SOFT-NEXT:  .LBB5_9: @ %entry
+; SOFT-NEXT:    mov r2, r3
 ; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB5_6
-; SOFT-NEXT:  .LBB5_8: @ %entry
+; SOFT-NEXT:  .LBB5_10: @ %entry
 ; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:    pop {r4, pc}
 ;
@@ -324,26 +372,29 @@ entry:
 define i32 @stest_f16i32(half %x) {
 ; SOFT-LABEL: stest_f16i32:
 ; SOFT:       @ %bb.0: @ %entry
-; SOFT-NEXT:    .save {r4, lr}
-; SOFT-NEXT:    push {r4, lr}
+; SOFT-NEXT:    .save {r4, r5, r7, lr}
+; SOFT-NEXT:    push {r4, r5, r7, lr}
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    movs r2, #0
-; SOFT-NEXT:    ldr r3, .LCPI6_0
-; SOFT-NEXT:    subs r4, r0, r3
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    blt .LBB6_2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    movs r3, #0
+; SOFT-NEXT:    ldr r4, .LCPI6_0
+; SOFT-NEXT:    subs r5, r0, r4
+; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    sbcs r5, r3
+; SOFT-NEXT:    mov r5, r2
+; SOFT-NEXT:    bge .LBB6_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB6_8
 ; SOFT-NEXT:  .LBB6_2: @ %entry
-; SOFT-NEXT:    blt .LBB6_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB6_4
+; SOFT-NEXT:  .LBB6_3: @ %entry
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB6_4: @ %entry
-; SOFT-NEXT:    mvns r3, r2
-; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    mvns r3, r3
 ; SOFT-NEXT:    lsls r2, r2, #31
 ; SOFT-NEXT:    subs r4, r2, r0
 ; SOFT-NEXT:    sbcs r3, r1
@@ -351,9 +402,18 @@ define i32 @stest_f16i32(half %x) {
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:  .LBB6_6: @ %entry
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
+; SOFT-NEXT:  .LBB6_7: @ %entry
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB6_2
+; SOFT-NEXT:  .LBB6_8: @ %entry
+; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB6_3
+; SOFT-NEXT:    b .LBB6_4
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI6_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -432,33 +492,45 @@ define i32 @ustest_f16i32(half %x) {
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    movs r2, #0
-; SOFT-NEXT:    mvns r3, r2
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    movs r3, #0
 ; SOFT-NEXT:    adds r4, r0, #1
 ; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    blt .LBB8_2
+; SOFT-NEXT:    sbcs r4, r3
+; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    bge .LBB8_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB8_8
 ; SOFT-NEXT:  .LBB8_2: @ %entry
-; SOFT-NEXT:    blt .LBB8_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
-; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB8_4
+; SOFT-NEXT:  .LBB8_3: @ %entry
+; SOFT-NEXT:    mvns r0, r3
 ; SOFT-NEXT:  .LBB8_4: @ %entry
-; SOFT-NEXT:    rsbs r3, r0, #0
-; SOFT-NEXT:    mov r3, r2
-; SOFT-NEXT:    sbcs r3, r1
-; SOFT-NEXT:    blt .LBB8_7
+; SOFT-NEXT:    rsbs r4, r0, #0
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:    sbcs r4, r1
+; SOFT-NEXT:    bge .LBB8_9
 ; SOFT-NEXT:  @ %bb.5: @ %entry
 ; SOFT-NEXT:    cmp r2, #0
-; SOFT-NEXT:    beq .LBB8_8
+; SOFT-NEXT:    beq .LBB8_10
 ; SOFT-NEXT:  .LBB8_6: @ %entry
 ; SOFT-NEXT:    pop {r4, pc}
-; SOFT-NEXT:  .LBB8_7:
-; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:  .LBB8_7: @ %entry
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB8_2
+; SOFT-NEXT:  .LBB8_8: @ %entry
+; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB8_3
+; SOFT-NEXT:    b .LBB8_4
+; SOFT-NEXT:  .LBB8_9: @ %entry
+; SOFT-NEXT:    mov r2, r3
 ; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB8_6
-; SOFT-NEXT:  .LBB8_8: @ %entry
+; SOFT-NEXT:  .LBB8_10: @ %entry
 ; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:    pop {r4, pc}
 ;
@@ -918,62 +990,86 @@ define i64 @stest_f64i64(double %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    movs r4, #0
-; SOFT-NEXT:    mvns r5, r4
-; SOFT-NEXT:    ldr r6, .LCPI18_0
-; SOFT-NEXT:    adds r7, r0, #1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    ldr r0, .LCPI18_0
+; SOFT-NEXT:    adds r7, r6, #1
 ; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    sbcs r7, r6
+; SOFT-NEXT:    sbcs r7, r0
 ; SOFT-NEXT:    mov r7, r2
-; SOFT-NEXT:    sbcs r7, r4
+; SOFT-NEXT:    sbcs r7, r5
 ; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:    sbcs r7, r4
-; SOFT-NEXT:    bge .LBB18_8
+; SOFT-NEXT:    sbcs r7, r5
+; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    bge .LBB18_13
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    bge .LBB18_9
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB18_14
 ; SOFT-NEXT:  .LBB18_2: @ %entry
-; SOFT-NEXT:    bge .LBB18_10
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB18_4
 ; SOFT-NEXT:  .LBB18_3: @ %entry
-; SOFT-NEXT:    blt .LBB18_5
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB18_4: @ %entry
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB18_5: @ %entry
-; SOFT-NEXT:    movs r6, #1
-; SOFT-NEXT:    lsls r6, r6, #31
-; SOFT-NEXT:    rsbs r7, r0, #0
-; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB18_6
+; SOFT-NEXT:  @ %bb.5: @ %entry
+; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:  .LBB18_6: @ %entry
+; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB18_8
+; SOFT-NEXT:  @ %bb.7: @ %entry
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:  .LBB18_8: @ %entry
+; SOFT-NEXT:    lsls r3, r4, #31
+; SOFT-NEXT:    rsbs r7, r6, #0
+; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    sbcs r7, r1
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r7, r2
-; SOFT-NEXT:    sbcs r5, r3
-; SOFT-NEXT:    bge .LBB18_11
-; SOFT-NEXT:  @ %bb.6: @ %entry
-; SOFT-NEXT:    bge .LBB18_12
-; SOFT-NEXT:  .LBB18_7: @ %entry
-; SOFT-NEXT:    add sp, #4
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB18_8: @ %entry
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    blt .LBB18_2
-; SOFT-NEXT:  .LBB18_9: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    blt .LBB18_3
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    sbcs r0, r2
+; SOFT-NEXT:    bge .LBB18_15
+; SOFT-NEXT:  @ %bb.9: @ %entry
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB18_16
 ; SOFT-NEXT:  .LBB18_10: @ %entry
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bge .LBB18_4
-; SOFT-NEXT:    b .LBB18_5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB18_12
 ; SOFT-NEXT:  .LBB18_11: @ %entry
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    blt .LBB18_7
+; SOFT-NEXT:    mov r1, r3
 ; SOFT-NEXT:  .LBB18_12: @ %entry
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB18_13: @ %entry
+; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB18_2
+; SOFT-NEXT:  .LBB18_14: @ %entry
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB18_3
+; SOFT-NEXT:    b .LBB18_4
+; SOFT-NEXT:  .LBB18_15: @ %entry
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB18_10
+; SOFT-NEXT:  .LBB18_16: @ %entry
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB18_11
+; SOFT-NEXT:    b .LBB18_12
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI18_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -1004,9 +1100,13 @@ define i64 @stest_f64i64(double %x) {
 ; VFP2-NEXT:    sbcs.w r5, lr, r1
 ; VFP2-NEXT:    sbcs.w r4, r2, r4
 ; VFP2-NEXT:    sbcs r2, r3
-; VFP2-NEXT:    itt ge
-; VFP2-NEXT:    movge r0, r12
-; VFP2-NEXT:    movge r1, lr
+; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    it lt
+; VFP2-NEXT:    movlt r2, #1
+; VFP2-NEXT:    cmp r2, #0
+; VFP2-NEXT:    itt eq
+; VFP2-NEXT:    moveq r0, r12
+; VFP2-NEXT:    moveq r1, lr
 ; VFP2-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; FULL-LABEL: stest_f64i64:
@@ -1031,9 +1131,11 @@ define i64 @stest_f64i64(double %x) {
 ; FULL-NEXT:    sbcs.w r4, r12, r1
 ; FULL-NEXT:    sbcs.w r2, r3, r2
 ; FULL-NEXT:    sbcs.w r2, r3, r5
-; FULL-NEXT:    it ge
-; FULL-NEXT:    movge r0, #0
-; FULL-NEXT:    csel r1, r1, r12, lt
+; FULL-NEXT:    cset r2, lt
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    it eq
+; FULL-NEXT:    moveq r0, #0
+; FULL-NEXT:    csel r1, r1, r12, ne
 ; FULL-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %conv = fptosi double %x to i128
@@ -1054,15 +1156,24 @@ define i64 @utest_f64i64(double %x) {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    subs r2, r2, #1
 ; SOFT-NEXT:    sbcs r3, r4
-; SOFT-NEXT:    bhs .LBB19_3
+; SOFT-NEXT:    blo .LBB19_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    bhs .LBB19_4
-; SOFT-NEXT:  .LBB19_2: @ %entry
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB19_3
+; SOFT-NEXT:    b .LBB19_4
+; SOFT-NEXT:  .LBB19_2:
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB19_4
 ; SOFT-NEXT:  .LBB19_3: @ %entry
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    blo .LBB19_2
 ; SOFT-NEXT:  .LBB19_4: @ %entry
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB19_6
+; SOFT-NEXT:  @ %bb.5: @ %entry
+; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:  .LBB19_6: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    pop {r4, pc}
 ;
@@ -1074,9 +1185,13 @@ define i64 @utest_f64i64(double %x) {
 ; VFP2-NEXT:    subs r2, #1
 ; VFP2-NEXT:    mov.w r12, #0
 ; VFP2-NEXT:    sbcs r2, r3, #0
-; VFP2-NEXT:    itt hs
-; VFP2-NEXT:    movhs r0, r12
-; VFP2-NEXT:    movhs r1, r12
+; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    it lo
+; VFP2-NEXT:    movlo r2, #1
+; VFP2-NEXT:    cmp r2, #0
+; VFP2-NEXT:    itt eq
+; VFP2-NEXT:    moveq r0, r12
+; VFP2-NEXT:    moveq r1, r12
 ; VFP2-NEXT:    pop {r7, pc}
 ;
 ; FULL-LABEL: utest_f64i64:
@@ -1085,10 +1200,12 @@ define i64 @utest_f64i64(double %x) {
 ; FULL-NEXT:    push {r7, lr}
 ; FULL-NEXT:    bl __fixunsdfti
 ; FULL-NEXT:    subs r2, #1
-; FULL-NEXT:    mov.w r12, #0
 ; FULL-NEXT:    sbcs r2, r3, #0
-; FULL-NEXT:    csel r0, r0, r12, lo
-; FULL-NEXT:    csel r1, r1, r12, lo
+; FULL-NEXT:    mov.w r3, #0
+; FULL-NEXT:    cset r2, lo
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csel r1, r1, r3, ne
 ; FULL-NEXT:    pop {r7, pc}
 entry:
   %conv = fptoui double %x to i128
@@ -1109,16 +1226,23 @@ define i64 @ustest_f64i64(double %x) {
 ; SOFT-NEXT:    subs r6, r2, #1
 ; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:    sbcs r6, r5
-; SOFT-NEXT:    bge .LBB20_9
-; SOFT-NEXT:  @ %bb.1: @ %entry
+; SOFT-NEXT:    mov r6, r4
 ; SOFT-NEXT:    bge .LBB20_10
+; SOFT-NEXT:  @ %bb.1: @ %entry
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB20_11
 ; SOFT-NEXT:  .LBB20_2: @ %entry
-; SOFT-NEXT:    bge .LBB20_11
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB20_12
 ; SOFT-NEXT:  .LBB20_3: @ %entry
-; SOFT-NEXT:    blt .LBB20_5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB20_13
 ; SOFT-NEXT:  .LBB20_4: @ %entry
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB20_6
 ; SOFT-NEXT:  .LBB20_5: @ %entry
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:  .LBB20_6: @ %entry
 ; SOFT-NEXT:    rsbs r6, r0, #0
 ; SOFT-NEXT:    mov r6, r5
 ; SOFT-NEXT:    sbcs r6, r1
@@ -1126,32 +1250,41 @@ define i64 @ustest_f64i64(double %x) {
 ; SOFT-NEXT:    sbcs r6, r2
 ; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    sbcs r2, r3
-; SOFT-NEXT:    bge .LBB20_12
-; SOFT-NEXT:  @ %bb.6: @ %entry
+; SOFT-NEXT:    bge .LBB20_14
+; SOFT-NEXT:  @ %bb.7: @ %entry
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB20_13
-; SOFT-NEXT:  .LBB20_7: @ %entry
-; SOFT-NEXT:    beq .LBB20_14
+; SOFT-NEXT:    beq .LBB20_15
 ; SOFT-NEXT:  .LBB20_8: @ %entry
-; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB20_16
 ; SOFT-NEXT:  .LBB20_9: @ %entry
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    blt .LBB20_2
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:  .LBB20_10: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    blt .LBB20_3
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB20_2
 ; SOFT-NEXT:  .LBB20_11: @ %entry
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bge .LBB20_4
-; SOFT-NEXT:    b .LBB20_5
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB20_3
 ; SOFT-NEXT:  .LBB20_12: @ %entry
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB20_4
+; SOFT-NEXT:  .LBB20_13: @ %entry
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB20_5
+; SOFT-NEXT:    b .LBB20_6
+; SOFT-NEXT:  .LBB20_14: @ %entry
 ; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB20_7
-; SOFT-NEXT:  .LBB20_13: @ %entry
-; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB20_8
-; SOFT-NEXT:  .LBB20_14: @ %entry
+; SOFT-NEXT:  .LBB20_15: @ %entry
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB20_9
+; SOFT-NEXT:  .LBB20_16: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ;
@@ -1163,11 +1296,15 @@ define i64 @ustest_f64i64(double %x) {
 ; VFP2-NEXT:    subs.w lr, r2, #1
 ; VFP2-NEXT:    mov.w r12, #0
 ; VFP2-NEXT:    sbcs lr, r3, #0
-; VFP2-NEXT:    itttt ge
-; VFP2-NEXT:    movge r3, r12
-; VFP2-NEXT:    movge r2, #1
-; VFP2-NEXT:    movge r1, r12
-; VFP2-NEXT:    movge r0, r12
+; VFP2-NEXT:    mov.w lr, #0
+; VFP2-NEXT:    it lt
+; VFP2-NEXT:    movlt.w lr, #1
+; VFP2-NEXT:    cmp.w lr, #0
+; VFP2-NEXT:    itttt eq
+; VFP2-NEXT:    moveq r3, r12
+; VFP2-NEXT:    moveq r2, #1
+; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    rsbs.w lr, r0, #0
 ; VFP2-NEXT:    sbcs.w lr, r12, r1
 ; VFP2-NEXT:    sbcs.w r2, r12, r2
@@ -1185,18 +1322,20 @@ define i64 @ustest_f64i64(double %x) {
 ; FULL-NEXT:    .save {r7, lr}
 ; FULL-NEXT:    push {r7, lr}
 ; FULL-NEXT:    bl __fixdfti
-; FULL-NEXT:    subs.w lr, r2, #1
-; FULL-NEXT:    mov.w r12, #0
-; FULL-NEXT:    sbcs lr, r3, #0
-; FULL-NEXT:    it ge
-; FULL-NEXT:    movge r2, #1
-; FULL-NEXT:    csel r0, r0, r12, lt
-; FULL-NEXT:    csel lr, r3, r12, lt
-; FULL-NEXT:    csel r1, r1, r12, lt
+; FULL-NEXT:    subs.w r12, r2, #1
+; FULL-NEXT:    mov.w lr, #0
+; FULL-NEXT:    sbcs r12, r3, #0
+; FULL-NEXT:    cset r12, lt
+; FULL-NEXT:    cmp.w r12, #0
+; FULL-NEXT:    it eq
+; FULL-NEXT:    moveq r2, #1
+; FULL-NEXT:    csel r0, r0, lr, ne
+; FULL-NEXT:    csel r12, r3, lr, ne
+; FULL-NEXT:    csel r1, r1, lr, ne
 ; FULL-NEXT:    rsbs r3, r0, #0
-; FULL-NEXT:    sbcs.w r3, r12, r1
-; FULL-NEXT:    sbcs.w r2, r12, r2
-; FULL-NEXT:    sbcs.w r2, r12, lr
+; FULL-NEXT:    sbcs.w r3, lr, r1
+; FULL-NEXT:    sbcs.w r2, lr, r2
+; FULL-NEXT:    sbcs.w r2, lr, r12
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r0, r2, ne
@@ -1217,62 +1356,86 @@ define i64 @stest_f32i64(float %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    movs r4, #0
-; SOFT-NEXT:    mvns r5, r4
-; SOFT-NEXT:    ldr r6, .LCPI21_0
-; SOFT-NEXT:    adds r7, r0, #1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    ldr r0, .LCPI21_0
+; SOFT-NEXT:    adds r7, r6, #1
 ; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    sbcs r7, r6
+; SOFT-NEXT:    sbcs r7, r0
 ; SOFT-NEXT:    mov r7, r2
-; SOFT-NEXT:    sbcs r7, r4
+; SOFT-NEXT:    sbcs r7, r5
 ; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:    sbcs r7, r4
-; SOFT-NEXT:    bge .LBB21_8
+; SOFT-NEXT:    sbcs r7, r5
+; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    bge .LBB21_13
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    bge .LBB21_9
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB21_14
 ; SOFT-NEXT:  .LBB21_2: @ %entry
-; SOFT-NEXT:    bge .LBB21_10
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB21_4
 ; SOFT-NEXT:  .LBB21_3: @ %entry
-; SOFT-NEXT:    blt .LBB21_5
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB21_4: @ %entry
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB21_5: @ %entry
-; SOFT-NEXT:    movs r6, #1
-; SOFT-NEXT:    lsls r6, r6, #31
-; SOFT-NEXT:    rsbs r7, r0, #0
-; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB21_6
+; SOFT-NEXT:  @ %bb.5: @ %entry
+; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:  .LBB21_6: @ %entry
+; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB21_8
+; SOFT-NEXT:  @ %bb.7: @ %entry
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:  .LBB21_8: @ %entry
+; SOFT-NEXT:    lsls r3, r4, #31
+; SOFT-NEXT:    rsbs r7, r6, #0
+; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    sbcs r7, r1
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r7, r2
-; SOFT-NEXT:    sbcs r5, r3
-; SOFT-NEXT:    bge .LBB21_11
-; SOFT-NEXT:  @ %bb.6: @ %entry
-; SOFT-NEXT:    bge .LBB21_12
-; SOFT-NEXT:  .LBB21_7: @ %entry
-; SOFT-NEXT:    add sp, #4
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB21_8: @ %entry
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    blt .LBB21_2
-; SOFT-NEXT:  .LBB21_9: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    blt .LBB21_3
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    sbcs r0, r2
+; SOFT-NEXT:    bge .LBB21_15
+; SOFT-NEXT:  @ %bb.9: @ %entry
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB21_16
 ; SOFT-NEXT:  .LBB21_10: @ %entry
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bge .LBB21_4
-; SOFT-NEXT:    b .LBB21_5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB21_12
 ; SOFT-NEXT:  .LBB21_11: @ %entry
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    blt .LBB21_7
+; SOFT-NEXT:    mov r1, r3
 ; SOFT-NEXT:  .LBB21_12: @ %entry
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB21_13: @ %entry
+; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB21_2
+; SOFT-NEXT:  .LBB21_14: @ %entry
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB21_3
+; SOFT-NEXT:    b .LBB21_4
+; SOFT-NEXT:  .LBB21_15: @ %entry
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB21_10
+; SOFT-NEXT:  .LBB21_16: @ %entry
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB21_11
+; SOFT-NEXT:    b .LBB21_12
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI21_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -1303,9 +1466,13 @@ define i64 @stest_f32i64(float %x) {
 ; VFP2-NEXT:    sbcs.w r5, lr, r1
 ; VFP2-NEXT:    sbcs.w r4, r2, r4
 ; VFP2-NEXT:    sbcs r2, r3
-; VFP2-NEXT:    itt ge
-; VFP2-NEXT:    movge r0, r12
-; VFP2-NEXT:    movge r1, lr
+; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    it lt
+; VFP2-NEXT:    movlt r2, #1
+; VFP2-NEXT:    cmp r2, #0
+; VFP2-NEXT:    itt eq
+; VFP2-NEXT:    moveq r0, r12
+; VFP2-NEXT:    moveq r1, lr
 ; VFP2-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; FULL-LABEL: stest_f32i64:
@@ -1330,9 +1497,11 @@ define i64 @stest_f32i64(float %x) {
 ; FULL-NEXT:    sbcs.w r4, r12, r1
 ; FULL-NEXT:    sbcs.w r2, r3, r2
 ; FULL-NEXT:    sbcs.w r2, r3, r5
-; FULL-NEXT:    it ge
-; FULL-NEXT:    movge r0, #0
-; FULL-NEXT:    csel r1, r1, r12, lt
+; FULL-NEXT:    cset r2, lt
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    it eq
+; FULL-NEXT:    moveq r0, #0
+; FULL-NEXT:    csel r1, r1, r12, ne
 ; FULL-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %conv = fptosi float %x to i128
@@ -1353,15 +1522,24 @@ define i64 @utest_f32i64(float %x) {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    subs r2, r2, #1
 ; SOFT-NEXT:    sbcs r3, r4
-; SOFT-NEXT:    bhs .LBB22_3
+; SOFT-NEXT:    blo .LBB22_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    bhs .LBB22_4
-; SOFT-NEXT:  .LBB22_2: @ %entry
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB22_3
+; SOFT-NEXT:    b .LBB22_4
+; SOFT-NEXT:  .LBB22_2:
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB22_4
 ; SOFT-NEXT:  .LBB22_3: @ %entry
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    blo .LBB22_2
 ; SOFT-NEXT:  .LBB22_4: @ %entry
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB22_6
+; SOFT-NEXT:  @ %bb.5: @ %entry
+; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:  .LBB22_6: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    pop {r4, pc}
 ;
@@ -1373,9 +1551,13 @@ define i64 @utest_f32i64(float %x) {
 ; VFP2-NEXT:    subs r2, #1
 ; VFP2-NEXT:    mov.w r12, #0
 ; VFP2-NEXT:    sbcs r2, r3, #0
-; VFP2-NEXT:    itt hs
-; VFP2-NEXT:    movhs r0, r12
-; VFP2-NEXT:    movhs r1, r12
+; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    it lo
+; VFP2-NEXT:    movlo r2, #1
+; VFP2-NEXT:    cmp r2, #0
+; VFP2-NEXT:    itt eq
+; VFP2-NEXT:    moveq r0, r12
+; VFP2-NEXT:    moveq r1, r12
 ; VFP2-NEXT:    pop {r7, pc}
 ;
 ; FULL-LABEL: utest_f32i64:
@@ -1384,10 +1566,12 @@ define i64 @utest_f32i64(float %x) {
 ; FULL-NEXT:    push {r7, lr}
 ; FULL-NEXT:    bl __fixunssfti
 ; FULL-NEXT:    subs r2, #1
-; FULL-NEXT:    mov.w r12, #0
 ; FULL-NEXT:    sbcs r2, r3, #0
-; FULL-NEXT:    csel r0, r0, r12, lo
-; FULL-NEXT:    csel r1, r1, r12, lo
+; FULL-NEXT:    mov.w r3, #0
+; FULL-NEXT:    cset r2, lo
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csel r1, r1, r3, ne
 ; FULL-NEXT:    pop {r7, pc}
 entry:
   %conv = fptoui float %x to i128
@@ -1408,16 +1592,23 @@ define i64 @ustest_f32i64(float %x) {
 ; SOFT-NEXT:    subs r6, r2, #1
 ; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:    sbcs r6, r5
-; SOFT-NEXT:    bge .LBB23_9
-; SOFT-NEXT:  @ %bb.1: @ %entry
+; SOFT-NEXT:    mov r6, r4
 ; SOFT-NEXT:    bge .LBB23_10
+; SOFT-NEXT:  @ %bb.1: @ %entry
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB23_11
 ; SOFT-NEXT:  .LBB23_2: @ %entry
-; SOFT-NEXT:    bge .LBB23_11
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB23_12
 ; SOFT-NEXT:  .LBB23_3: @ %entry
-; SOFT-NEXT:    blt .LBB23_5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB23_13
 ; SOFT-NEXT:  .LBB23_4: @ %entry
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB23_6
 ; SOFT-NEXT:  .LBB23_5: @ %entry
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:  .LBB23_6: @ %entry
 ; SOFT-NEXT:    rsbs r6, r0, #0
 ; SOFT-NEXT:    mov r6, r5
 ; SOFT-NEXT:    sbcs r6, r1
@@ -1425,32 +1616,41 @@ define i64 @ustest_f32i64(float %x) {
 ; SOFT-NEXT:    sbcs r6, r2
 ; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    sbcs r2, r3
-; SOFT-NEXT:    bge .LBB23_12
-; SOFT-NEXT:  @ %bb.6: @ %entry
+; SOFT-NEXT:    bge .LBB23_14
+; SOFT-NEXT:  @ %bb.7: @ %entry
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB23_13
-; SOFT-NEXT:  .LBB23_7: @ %entry
-; SOFT-NEXT:    beq .LBB23_14
+; SOFT-NEXT:    beq .LBB23_15
 ; SOFT-NEXT:  .LBB23_8: @ %entry
-; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB23_16
 ; SOFT-NEXT:  .LBB23_9: @ %entry
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    blt .LBB23_2
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:  .LBB23_10: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    blt .LBB23_3
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB23_2
 ; SOFT-NEXT:  .LBB23_11: @ %entry
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bge .LBB23_4
-; SOFT-NEXT:    b .LBB23_5
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB23_3
 ; SOFT-NEXT:  .LBB23_12: @ %entry
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB23_4
+; SOFT-NEXT:  .LBB23_13: @ %entry
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB23_5
+; SOFT-NEXT:    b .LBB23_6
+; SOFT-NEXT:  .LBB23_14: @ %entry
 ; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB23_7
-; SOFT-NEXT:  .LBB23_13: @ %entry
-; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB23_8
-; SOFT-NEXT:  .LBB23_14: @ %entry
+; SOFT-NEXT:  .LBB23_15: @ %entry
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB23_9
+; SOFT-NEXT:  .LBB23_16: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ;
@@ -1462,11 +1662,15 @@ define i64 @ustest_f32i64(float %x) {
 ; VFP2-NEXT:    subs.w lr, r2, #1
 ; VFP2-NEXT:    mov.w r12, #0
 ; VFP2-NEXT:    sbcs lr, r3, #0
-; VFP2-NEXT:    itttt ge
-; VFP2-NEXT:    movge r3, r12
-; VFP2-NEXT:    movge r2, #1
-; VFP2-NEXT:    movge r1, r12
-; VFP2-NEXT:    movge r0, r12
+; VFP2-NEXT:    mov.w lr, #0
+; VFP2-NEXT:    it lt
+; VFP2-NEXT:    movlt.w lr, #1
+; VFP2-NEXT:    cmp.w lr, #0
+; VFP2-NEXT:    itttt eq
+; VFP2-NEXT:    moveq r3, r12
+; VFP2-NEXT:    moveq r2, #1
+; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    rsbs.w lr, r0, #0
 ; VFP2-NEXT:    sbcs.w lr, r12, r1
 ; VFP2-NEXT:    sbcs.w r2, r12, r2
@@ -1484,18 +1688,20 @@ define i64 @ustest_f32i64(float %x) {
 ; FULL-NEXT:    .save {r7, lr}
 ; FULL-NEXT:    push {r7, lr}
 ; FULL-NEXT:    bl __fixsfti
-; FULL-NEXT:    subs.w lr, r2, #1
-; FULL-NEXT:    mov.w r12, #0
-; FULL-NEXT:    sbcs lr, r3, #0
-; FULL-NEXT:    it ge
-; FULL-NEXT:    movge r2, #1
-; FULL-NEXT:    csel r0, r0, r12, lt
-; FULL-NEXT:    csel lr, r3, r12, lt
-; FULL-NEXT:    csel r1, r1, r12, lt
+; FULL-NEXT:    subs.w r12, r2, #1
+; FULL-NEXT:    mov.w lr, #0
+; FULL-NEXT:    sbcs r12, r3, #0
+; FULL-NEXT:    cset r12, lt
+; FULL-NEXT:    cmp.w r12, #0
+; FULL-NEXT:    it eq
+; FULL-NEXT:    moveq r2, #1
+; FULL-NEXT:    csel r0, r0, lr, ne
+; FULL-NEXT:    csel r12, r3, lr, ne
+; FULL-NEXT:    csel r1, r1, lr, ne
 ; FULL-NEXT:    rsbs r3, r0, #0
-; FULL-NEXT:    sbcs.w r3, r12, r1
-; FULL-NEXT:    sbcs.w r2, r12, r2
-; FULL-NEXT:    sbcs.w r2, r12, lr
+; FULL-NEXT:    sbcs.w r3, lr, r1
+; FULL-NEXT:    sbcs.w r2, lr, r2
+; FULL-NEXT:    sbcs.w r2, lr, r12
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r0, r2, ne
@@ -1516,64 +1722,88 @@ define i64 @stest_f16i64(half %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    movs r4, #0
-; SOFT-NEXT:    mvns r5, r4
-; SOFT-NEXT:    ldr r6, .LCPI24_0
-; SOFT-NEXT:    adds r7, r0, #1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    ldr r0, .LCPI24_0
+; SOFT-NEXT:    adds r7, r6, #1
 ; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    sbcs r7, r6
+; SOFT-NEXT:    sbcs r7, r0
 ; SOFT-NEXT:    mov r7, r2
-; SOFT-NEXT:    sbcs r7, r4
+; SOFT-NEXT:    sbcs r7, r5
 ; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:    sbcs r7, r4
-; SOFT-NEXT:    bge .LBB24_8
+; SOFT-NEXT:    sbcs r7, r5
+; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    bge .LBB24_13
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    bge .LBB24_9
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB24_14
 ; SOFT-NEXT:  .LBB24_2: @ %entry
-; SOFT-NEXT:    bge .LBB24_10
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB24_4
 ; SOFT-NEXT:  .LBB24_3: @ %entry
-; SOFT-NEXT:    blt .LBB24_5
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB24_4: @ %entry
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB24_5: @ %entry
-; SOFT-NEXT:    movs r6, #1
-; SOFT-NEXT:    lsls r6, r6, #31
-; SOFT-NEXT:    rsbs r7, r0, #0
-; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB24_6
+; SOFT-NEXT:  @ %bb.5: @ %entry
+; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:  .LBB24_6: @ %entry
+; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB24_8
+; SOFT-NEXT:  @ %bb.7: @ %entry
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:  .LBB24_8: @ %entry
+; SOFT-NEXT:    lsls r3, r4, #31
+; SOFT-NEXT:    rsbs r7, r6, #0
+; SOFT-NEXT:    mov r7, r3
 ; SOFT-NEXT:    sbcs r7, r1
-; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r7, r2
-; SOFT-NEXT:    sbcs r5, r3
-; SOFT-NEXT:    bge .LBB24_11
-; SOFT-NEXT:  @ %bb.6: @ %entry
-; SOFT-NEXT:    bge .LBB24_12
-; SOFT-NEXT:  .LBB24_7: @ %entry
-; SOFT-NEXT:    add sp, #4
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB24_8: @ %entry
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    blt .LBB24_2
-; SOFT-NEXT:  .LBB24_9: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    blt .LBB24_3
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    sbcs r0, r2
+; SOFT-NEXT:    bge .LBB24_15
+; SOFT-NEXT:  @ %bb.9: @ %entry
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB24_16
 ; SOFT-NEXT:  .LBB24_10: @ %entry
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bge .LBB24_4
-; SOFT-NEXT:    b .LBB24_5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB24_12
 ; SOFT-NEXT:  .LBB24_11: @ %entry
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    blt .LBB24_7
+; SOFT-NEXT:    mov r1, r3
 ; SOFT-NEXT:  .LBB24_12: @ %entry
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB24_13: @ %entry
+; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB24_2
+; SOFT-NEXT:  .LBB24_14: @ %entry
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB24_3
+; SOFT-NEXT:    b .LBB24_4
+; SOFT-NEXT:  .LBB24_15: @ %entry
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB24_10
+; SOFT-NEXT:  .LBB24_16: @ %entry
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB24_11
+; SOFT-NEXT:    b .LBB24_12
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI24_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -1607,9 +1837,13 @@ define i64 @stest_f16i64(half %x) {
 ; VFP2-NEXT:    sbcs.w r5, lr, r1
 ; VFP2-NEXT:    sbcs.w r4, r2, r4
 ; VFP2-NEXT:    sbcs r2, r3
-; VFP2-NEXT:    itt ge
-; VFP2-NEXT:    movge r0, r12
-; VFP2-NEXT:    movge r1, lr
+; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    it lt
+; VFP2-NEXT:    movlt r2, #1
+; VFP2-NEXT:    cmp r2, #0
+; VFP2-NEXT:    itt eq
+; VFP2-NEXT:    moveq r0, r12
+; VFP2-NEXT:    moveq r1, lr
 ; VFP2-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; FULL-LABEL: stest_f16i64:
@@ -1636,9 +1870,11 @@ define i64 @stest_f16i64(half %x) {
 ; FULL-NEXT:    sbcs.w r4, r12, r1
 ; FULL-NEXT:    sbcs.w r2, r3, r2
 ; FULL-NEXT:    sbcs.w r2, r3, r5
-; FULL-NEXT:    it ge
-; FULL-NEXT:    movge r0, #0
-; FULL-NEXT:    csel r1, r1, r12, lt
+; FULL-NEXT:    cset r2, lt
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    it eq
+; FULL-NEXT:    moveq r0, #0
+; FULL-NEXT:    csel r1, r1, r12, ne
 ; FULL-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %conv = fptosi half %x to i128
@@ -1661,15 +1897,24 @@ define i64 @utesth_f16i64(half %x) {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    subs r2, r2, #1
 ; SOFT-NEXT:    sbcs r3, r4
-; SOFT-NEXT:    bhs .LBB25_3
+; SOFT-NEXT:    blo .LBB25_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    bhs .LBB25_4
-; SOFT-NEXT:  .LBB25_2: @ %entry
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB25_3
+; SOFT-NEXT:    b .LBB25_4
+; SOFT-NEXT:  .LBB25_2:
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB25_4
 ; SOFT-NEXT:  .LBB25_3: @ %entry
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    blo .LBB25_2
 ; SOFT-NEXT:  .LBB25_4: @ %entry
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB25_6
+; SOFT-NEXT:  @ %bb.5: @ %entry
+; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:  .LBB25_6: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    pop {r4, pc}
 ;
@@ -1684,9 +1929,13 @@ define i64 @utesth_f16i64(half %x) {
 ; VFP2-NEXT:    subs r2, #1
 ; VFP2-NEXT:    mov.w r12, #0
 ; VFP2-NEXT:    sbcs r2, r3, #0
-; VFP2-NEXT:    itt hs
-; VFP2-NEXT:    movhs r0, r12
-; VFP2-NEXT:    movhs r1, r12
+; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    it lo
+; VFP2-NEXT:    movlo r2, #1
+; VFP2-NEXT:    cmp r2, #0
+; VFP2-NEXT:    itt eq
+; VFP2-NEXT:    moveq r0, r12
+; VFP2-NEXT:    moveq r1, r12
 ; VFP2-NEXT:    pop {r7, pc}
 ;
 ; FULL-LABEL: utesth_f16i64:
@@ -1697,10 +1946,12 @@ define i64 @utesth_f16i64(half %x) {
 ; FULL-NEXT:    vmov s0, r0
 ; FULL-NEXT:    bl __fixunshfti
 ; FULL-NEXT:    subs r2, #1
-; FULL-NEXT:    mov.w r12, #0
 ; FULL-NEXT:    sbcs r2, r3, #0
-; FULL-NEXT:    csel r0, r0, r12, lo
-; FULL-NEXT:    csel r1, r1, r12, lo
+; FULL-NEXT:    mov.w r3, #0
+; FULL-NEXT:    cset r2, lo
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    csel r0, r0, r3, ne
+; FULL-NEXT:    csel r1, r1, r3, ne
 ; FULL-NEXT:    pop {r7, pc}
 entry:
   %conv = fptoui half %x to i128
@@ -1723,16 +1974,23 @@ define i64 @ustest_f16i64(half %x) {
 ; SOFT-NEXT:    subs r6, r2, #1
 ; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:    sbcs r6, r5
-; SOFT-NEXT:    bge .LBB26_9
-; SOFT-NEXT:  @ %bb.1: @ %entry
+; SOFT-NEXT:    mov r6, r4
 ; SOFT-NEXT:    bge .LBB26_10
+; SOFT-NEXT:  @ %bb.1: @ %entry
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB26_11
 ; SOFT-NEXT:  .LBB26_2: @ %entry
-; SOFT-NEXT:    bge .LBB26_11
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB26_12
 ; SOFT-NEXT:  .LBB26_3: @ %entry
-; SOFT-NEXT:    blt .LBB26_5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB26_13
 ; SOFT-NEXT:  .LBB26_4: @ %entry
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB26_6
 ; SOFT-NEXT:  .LBB26_5: @ %entry
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:  .LBB26_6: @ %entry
 ; SOFT-NEXT:    rsbs r6, r0, #0
 ; SOFT-NEXT:    mov r6, r5
 ; SOFT-NEXT:    sbcs r6, r1
@@ -1740,32 +1998,41 @@ define i64 @ustest_f16i64(half %x) {
 ; SOFT-NEXT:    sbcs r6, r2
 ; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    sbcs r2, r3
-; SOFT-NEXT:    bge .LBB26_12
-; SOFT-NEXT:  @ %bb.6: @ %entry
+; SOFT-NEXT:    bge .LBB26_14
+; SOFT-NEXT:  @ %bb.7: @ %entry
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB26_13
-; SOFT-NEXT:  .LBB26_7: @ %entry
-; SOFT-NEXT:    beq .LBB26_14
+; SOFT-NEXT:    beq .LBB26_15
 ; SOFT-NEXT:  .LBB26_8: @ %entry
-; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB26_16
 ; SOFT-NEXT:  .LBB26_9: @ %entry
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    blt .LBB26_2
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:  .LBB26_10: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    blt .LBB26_3
+; SOFT-NEXT:    mov r6, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB26_2
 ; SOFT-NEXT:  .LBB26_11: @ %entry
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bge .LBB26_4
-; SOFT-NEXT:    b .LBB26_5
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB26_3
 ; SOFT-NEXT:  .LBB26_12: @ %entry
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB26_4
+; SOFT-NEXT:  .LBB26_13: @ %entry
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB26_5
+; SOFT-NEXT:    b .LBB26_6
+; SOFT-NEXT:  .LBB26_14: @ %entry
 ; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB26_7
-; SOFT-NEXT:  .LBB26_13: @ %entry
-; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB26_8
-; SOFT-NEXT:  .LBB26_14: @ %entry
+; SOFT-NEXT:  .LBB26_15: @ %entry
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB26_9
+; SOFT-NEXT:  .LBB26_16: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ;
@@ -1780,11 +2047,15 @@ define i64 @ustest_f16i64(half %x) {
 ; VFP2-NEXT:    subs.w lr, r2, #1
 ; VFP2-NEXT:    mov.w r12, #0
 ; VFP2-NEXT:    sbcs lr, r3, #0
-; VFP2-NEXT:    itttt ge
-; VFP2-NEXT:    movge r3, r12
-; VFP2-NEXT:    movge r2, #1
-; VFP2-NEXT:    movge r1, r12
-; VFP2-NEXT:    movge r0, r12
+; VFP2-NEXT:    mov.w lr, #0
+; VFP2-NEXT:    it lt
+; VFP2-NEXT:    movlt.w lr, #1
+; VFP2-NEXT:    cmp.w lr, #0
+; VFP2-NEXT:    itttt eq
+; VFP2-NEXT:    moveq r3, r12
+; VFP2-NEXT:    moveq r2, #1
+; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    rsbs.w lr, r0, #0
 ; VFP2-NEXT:    sbcs.w lr, r12, r1
 ; VFP2-NEXT:    sbcs.w r2, r12, r2
@@ -1804,18 +2075,20 @@ define i64 @ustest_f16i64(half %x) {
 ; FULL-NEXT:    vmov.f16 r0, s0
 ; FULL-NEXT:    vmov s0, r0
 ; FULL-NEXT:    bl __fixhfti
-; FULL-NEXT:    subs.w lr, r2, #1
-; FULL-NEXT:    mov.w r12, #0
-; FULL-NEXT:    sbcs lr, r3, #0
-; FULL-NEXT:    it ge
-; FULL-NEXT:    movge r2, #1
-; FULL-NEXT:    csel r0, r0, r12, lt
-; FULL-NEXT:    csel lr, r3, r12, lt
-; FULL-NEXT:    csel r1, r1, r12, lt
+; FULL-NEXT:    subs.w r12, r2, #1
+; FULL-NEXT:    mov.w lr, #0
+; FULL-NEXT:    sbcs r12, r3, #0
+; FULL-NEXT:    cset r12, lt
+; FULL-NEXT:    cmp.w r12, #0
+; FULL-NEXT:    it eq
+; FULL-NEXT:    moveq r2, #1
+; FULL-NEXT:    csel r0, r0, lr, ne
+; FULL-NEXT:    csel r12, r3, lr, ne
+; FULL-NEXT:    csel r1, r1, lr, ne
 ; FULL-NEXT:    rsbs r3, r0, #0
-; FULL-NEXT:    sbcs.w r3, r12, r1
-; FULL-NEXT:    sbcs.w r2, r12, r2
-; FULL-NEXT:    sbcs.w r2, r12, lr
+; FULL-NEXT:    sbcs.w r3, lr, r1
+; FULL-NEXT:    sbcs.w r2, lr, r2
+; FULL-NEXT:    sbcs.w r2, lr, r12
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
 ; FULL-NEXT:    csel r0, r0, r2, ne
@@ -1848,15 +2121,16 @@ define i32 @stest_f64i32_mm(double %x) {
 ; SOFT-NEXT:    subs r5, r0, r4
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    sbcs r5, r3
+; SOFT-NEXT:    mov r5, r2
 ; SOFT-NEXT:    bge .LBB27_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    bge .LBB27_8
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB27_8
 ; SOFT-NEXT:  .LBB27_2: @ %entry
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB27_4
 ; SOFT-NEXT:  .LBB27_3: @ %entry
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB27_4: @ %entry
 ; SOFT-NEXT:    mvns r3, r3
 ; SOFT-NEXT:    lsls r2, r2, #31
@@ -1868,12 +2142,12 @@ define i32 @stest_f64i32_mm(double %x) {
 ; SOFT-NEXT:  .LBB27_6: @ %entry
 ; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ; SOFT-NEXT:  .LBB27_7: @ %entry
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    blt .LBB27_2
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB27_2
 ; SOFT-NEXT:  .LBB27_8: @ %entry
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB27_3
 ; SOFT-NEXT:    b .LBB27_4
 ; SOFT-NEXT:    .p2align 2
@@ -1887,17 +2161,16 @@ define i32 @stest_f64i32_mm(double %x) {
 ; VFP2-NEXT:    push {r7, lr}
 ; VFP2-NEXT:    vmov r0, r1, d0
 ; VFP2-NEXT:    bl __aeabi_d2lz
-; VFP2-NEXT:    mvn r2, #-2147483648
-; VFP2-NEXT:    subs r3, r0, r2
-; VFP2-NEXT:    sbcs r3, r1, #0
-; VFP2-NEXT:    it ge
-; VFP2-NEXT:    movge r0, r2
+; VFP2-NEXT:    mvn r12, #-2147483648
+; VFP2-NEXT:    subs.w r3, r0, r12
 ; VFP2-NEXT:    mov.w r2, #0
+; VFP2-NEXT:    sbcs r3, r1, #0
 ; VFP2-NEXT:    it lt
 ; VFP2-NEXT:    movlt r2, #1
 ; VFP2-NEXT:    cmp r2, #0
-; VFP2-NEXT:    it ne
+; VFP2-NEXT:    ite ne
 ; VFP2-NEXT:    movne r2, r1
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    mov.w r1, #-1
 ; VFP2-NEXT:    rsbs.w r3, r0, #-2147483648
 ; VFP2-NEXT:    sbcs r1, r2
@@ -1961,19 +2234,24 @@ define i32 @ustest_f64i32_mm(double %x) {
 ; SOFT-NEXT:    .save {r7, lr}
 ; SOFT-NEXT:    push {r7, lr}
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    mov r2, r0
-; SOFT-NEXT:    movs r0, #0
+; SOFT-NEXT:    asrs r3, r1, #31
+; SOFT-NEXT:    ands r3, r1
+; SOFT-NEXT:    movs r2, #0
 ; SOFT-NEXT:    cmp r1, #1
-; SOFT-NEXT:    blt .LBB29_2
+; SOFT-NEXT:    bge .LBB29_3
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mvns r2, r0
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bpl .LBB29_4
 ; SOFT-NEXT:  .LBB29_2: @ %entry
-; SOFT-NEXT:    asrs r3, r1, #31
-; SOFT-NEXT:    ands r3, r1
-; SOFT-NEXT:    bmi .LBB29_4
-; SOFT-NEXT:  @ %bb.3: @ %entry
 ; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    pop {r7, pc}
+; SOFT-NEXT:  .LBB29_3: @ %entry
+; SOFT-NEXT:    mvns r0, r2
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bmi .LBB29_2
 ; SOFT-NEXT:  .LBB29_4: @ %entry
+; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:    pop {r7, pc}
 ;
 ; VFP2-LABEL: ustest_f64i32_mm:
@@ -2015,15 +2293,16 @@ define i32 @stest_f32i32_mm(float %x) {
 ; SOFT-NEXT:    subs r5, r0, r4
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    sbcs r5, r3
+; SOFT-NEXT:    mov r5, r2
 ; SOFT-NEXT:    bge .LBB30_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    bge .LBB30_8
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB30_8
 ; SOFT-NEXT:  .LBB30_2: @ %entry
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB30_4
 ; SOFT-NEXT:  .LBB30_3: @ %entry
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB30_4: @ %entry
 ; SOFT-NEXT:    mvns r3, r3
 ; SOFT-NEXT:    lsls r2, r2, #31
@@ -2035,12 +2314,12 @@ define i32 @stest_f32i32_mm(float %x) {
 ; SOFT-NEXT:  .LBB30_6: @ %entry
 ; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ; SOFT-NEXT:  .LBB30_7: @ %entry
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    blt .LBB30_2
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB30_2
 ; SOFT-NEXT:  .LBB30_8: @ %entry
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB30_3
 ; SOFT-NEXT:    b .LBB30_4
 ; SOFT-NEXT:    .p2align 2
@@ -2135,15 +2414,16 @@ define i32 @stest_f16i32_mm(half %x) {
 ; SOFT-NEXT:    subs r5, r0, r4
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    sbcs r5, r3
+; SOFT-NEXT:    mov r5, r2
 ; SOFT-NEXT:    bge .LBB33_7
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    bge .LBB33_8
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB33_8
 ; SOFT-NEXT:  .LBB33_2: @ %entry
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB33_4
 ; SOFT-NEXT:  .LBB33_3: @ %entry
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB33_4: @ %entry
 ; SOFT-NEXT:    mvns r3, r3
 ; SOFT-NEXT:    lsls r2, r2, #31
@@ -2155,12 +2435,12 @@ define i32 @stest_f16i32_mm(half %x) {
 ; SOFT-NEXT:  .LBB33_6: @ %entry
 ; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ; SOFT-NEXT:  .LBB33_7: @ %entry
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    blt .LBB33_2
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB33_2
 ; SOFT-NEXT:  .LBB33_8: @ %entry
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB33_3
 ; SOFT-NEXT:    b .LBB33_4
 ; SOFT-NEXT:    .p2align 2
@@ -2693,78 +2973,81 @@ define i64 @stest_f64i64_mm(double %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    movs r0, #1
+; SOFT-NEXT:    movs r4, #1
 ; SOFT-NEXT:    movs r5, #0
 ; SOFT-NEXT:    ldr r6, .LCPI45_0
-; SOFT-NEXT:    adds r4, r7, #1
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r6
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    sbcs r4, r5
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    sbcs r4, r5
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    blt .LBB45_2
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    adds r0, r0, #1
+; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    sbcs r0, r6
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    sbcs r0, r5
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    sbcs r0, r5
+; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    bge .LBB45_12
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r4, r5
-; SOFT-NEXT:  .LBB45_2: @ %entry
-; SOFT-NEXT:    mvns r6, r5
-; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB45_12
-; SOFT-NEXT:  @ %bb.3: @ %entry
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    beq .LBB45_13
-; SOFT-NEXT:  .LBB45_4: @ %entry
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB45_2: @ %entry
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    beq .LBB45_14
+; SOFT-NEXT:  .LBB45_3: @ %entry
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB45_5
+; SOFT-NEXT:  .LBB45_4: @ %entry
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:  .LBB45_5: @ %entry
-; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r5
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB45_7
-; SOFT-NEXT:  .LBB45_6: @ %entry
-; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:  @ %bb.6: @ %entry
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB45_7: @ %entry
-; SOFT-NEXT:    lsls r3, r0, #31
-; SOFT-NEXT:    rsbs r4, r7, #0
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    sbcs r4, r1
-; SOFT-NEXT:    mov r4, r6
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    sbcs r6, r2
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    rsbs r7, r7, #0
+; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    sbcs r7, r1
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    sbcs r7, r2
+; SOFT-NEXT:    sbcs r0, r3
 ; SOFT-NEXT:    bge .LBB45_15
 ; SOFT-NEXT:  @ %bb.8: @ %entry
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB45_16
 ; SOFT-NEXT:  .LBB45_9: @ %entry
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB45_11
 ; SOFT-NEXT:  .LBB45_10: @ %entry
-; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    str r4, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB45_11: @ %entry
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    add sp, #12
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB45_12: @ %entry
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bne .LBB45_4
+; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB45_2
 ; SOFT-NEXT:  .LBB45_13: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB45_5
+; SOFT-NEXT:    mov r3, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB45_3
 ; SOFT-NEXT:  .LBB45_14: @ %entry
-; SOFT-NEXT:    ldr r1, .LCPI45_0
-; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB45_6
-; SOFT-NEXT:    b .LBB45_7
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB45_4
+; SOFT-NEXT:    b .LBB45_5
 ; SOFT-NEXT:  .LBB45_15: @ %entry
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB45_9
 ; SOFT-NEXT:  .LBB45_16: @ %entry
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB45_10
 ; SOFT-NEXT:    b .LBB45_11
 ; SOFT-NEXT:    .p2align 2
@@ -2803,8 +3086,8 @@ define i64 @stest_f64i64_mm(double %x) {
 ; VFP2-NEXT:    movlt.w r12, #1
 ; VFP2-NEXT:    cmp.w r12, #0
 ; VFP2-NEXT:    itt eq
-; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    moveq r1, lr
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; FULL-LABEL: stest_f64i64_mm:
@@ -2831,8 +3114,8 @@ define i64 @stest_f64i64_mm(double %x) {
 ; FULL-NEXT:    sbcs.w r2, r3, r5
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    csel r0, r0, r2, ne
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r0, r0, r2, ne
 ; FULL-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %conv = fptosi double %x to i128
@@ -2856,6 +3139,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB46_5
 ; SOFT-NEXT:  .LBB46_2: @ %entry
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB46_6
 ; SOFT-NEXT:  .LBB46_3: @ %entry
 ; SOFT-NEXT:    pop {r4, pc}
@@ -2865,6 +3149,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; SOFT-NEXT:    bne .LBB46_2
 ; SOFT-NEXT:  .LBB46_5: @ %entry
 ; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB46_3
 ; SOFT-NEXT:  .LBB46_6: @ %entry
 ; SOFT-NEXT:    mov r1, r4
@@ -2908,8 +3193,8 @@ entry:
 define i64 @ustest_f64i64_mm(double %x) {
 ; SOFT-LABEL: ustest_f64i64_mm:
 ; SOFT:       @ %bb.0: @ %entry
-; SOFT-NEXT:    .save {r4, lr}
-; SOFT-NEXT:    push {r4, lr}
+; SOFT-NEXT:    .save {r4, r5, r7, lr}
+; SOFT-NEXT:    push {r4, r5, r7, lr}
 ; SOFT-NEXT:    bl __fixdfti
 ; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    movs r1, #0
@@ -2918,42 +3203,46 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; SOFT-NEXT:    sbcs r2, r1
 ; SOFT-NEXT:    blt .LBB47_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r2, r1
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB47_3
 ; SOFT-NEXT:    b .LBB47_4
 ; SOFT-NEXT:  .LBB47_2:
-; SOFT-NEXT:    movs r2, #1
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    movs r5, #1
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB47_4
 ; SOFT-NEXT:  .LBB47_3: @ %entry
-; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB47_4: @ %entry
-; SOFT-NEXT:    beq .LBB47_10
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB47_6
 ; SOFT-NEXT:  @ %bb.5: @ %entry
-; SOFT-NEXT:    bne .LBB47_7
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB47_6: @ %entry
-; SOFT-NEXT:    mov r3, r2
-; SOFT-NEXT:  .LBB47_7: @ %entry
 ; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r2, r1
-; SOFT-NEXT:    bpl .LBB47_11
-; SOFT-NEXT:  @ %bb.8: @ %entry
+; SOFT-NEXT:    bpl .LBB47_10
+; SOFT-NEXT:  @ %bb.7: @ %entry
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB47_11
+; SOFT-NEXT:  .LBB47_8: @ %entry
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bpl .LBB47_12
 ; SOFT-NEXT:  .LBB47_9: @ %entry
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ; SOFT-NEXT:  .LBB47_10: @ %entry
-; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    beq .LBB47_6
-; SOFT-NEXT:    b .LBB47_7
-; SOFT-NEXT:  .LBB47_11: @ %entry
 ; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB47_8
+; SOFT-NEXT:  .LBB47_11: @ %entry
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bmi .LBB47_9
 ; SOFT-NEXT:  .LBB47_12: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; VFP2-LABEL: ustest_f64i64_mm:
 ; VFP2:       @ %bb.0: @ %entry
@@ -2966,13 +3255,17 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; VFP2-NEXT:    it lt
 ; VFP2-NEXT:    movlt.w r12, #1
 ; VFP2-NEXT:    cmp.w r12, #0
-; VFP2-NEXT:    itte eq
-; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    itt eq
 ; VFP2-NEXT:    moveq r0, r12
-; VFP2-NEXT:    movne r12, r3
-; VFP2-NEXT:    cmp.w r12, #0
-; VFP2-NEXT:    itt mi
+; VFP2-NEXT:    moveq r3, r12
+; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r0, #0
+; VFP2-NEXT:    cmp.w r12, #0
+; VFP2-NEXT:    it eq
+; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r1, #0
 ; VFP2-NEXT:    pop {r7, pc}
 ;
@@ -2985,12 +3278,15 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; FULL-NEXT:    sbcs r2, r3, #0
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    csel r1, r1, r2, ne
+; FULL-NEXT:    csel r3, r3, r2, ne
 ; FULL-NEXT:    csel r0, r0, r2, ne
-; FULL-NEXT:    csel r2, r3, r2, ne
-; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    itt mi
+; FULL-NEXT:    cmp r3, #0
+; FULL-NEXT:    it mi
 ; FULL-NEXT:    movmi r0, #0
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    csel r1, r1, r2, ne
+; FULL-NEXT:    cmp r3, #0
+; FULL-NEXT:    it mi
 ; FULL-NEXT:    movmi r1, #0
 ; FULL-NEXT:    pop {r7, pc}
 entry:
@@ -3006,78 +3302,81 @@ define i64 @stest_f32i64_mm(float %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    movs r0, #1
+; SOFT-NEXT:    movs r4, #1
 ; SOFT-NEXT:    movs r5, #0
 ; SOFT-NEXT:    ldr r6, .LCPI48_0
-; SOFT-NEXT:    adds r4, r7, #1
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r6
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    sbcs r4, r5
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    sbcs r4, r5
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    blt .LBB48_2
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    adds r0, r0, #1
+; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    sbcs r0, r6
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    sbcs r0, r5
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    sbcs r0, r5
+; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    bge .LBB48_12
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r4, r5
-; SOFT-NEXT:  .LBB48_2: @ %entry
-; SOFT-NEXT:    mvns r6, r5
-; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB48_12
-; SOFT-NEXT:  @ %bb.3: @ %entry
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    beq .LBB48_13
-; SOFT-NEXT:  .LBB48_4: @ %entry
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB48_2: @ %entry
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    beq .LBB48_14
+; SOFT-NEXT:  .LBB48_3: @ %entry
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB48_5
+; SOFT-NEXT:  .LBB48_4: @ %entry
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:  .LBB48_5: @ %entry
-; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r5
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB48_7
-; SOFT-NEXT:  .LBB48_6: @ %entry
-; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:  @ %bb.6: @ %entry
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB48_7: @ %entry
-; SOFT-NEXT:    lsls r3, r0, #31
-; SOFT-NEXT:    rsbs r4, r7, #0
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    sbcs r4, r1
-; SOFT-NEXT:    mov r4, r6
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    sbcs r6, r2
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    rsbs r7, r7, #0
+; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    sbcs r7, r1
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    sbcs r7, r2
+; SOFT-NEXT:    sbcs r0, r3
 ; SOFT-NEXT:    bge .LBB48_15
 ; SOFT-NEXT:  @ %bb.8: @ %entry
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB48_16
 ; SOFT-NEXT:  .LBB48_9: @ %entry
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB48_11
 ; SOFT-NEXT:  .LBB48_10: @ %entry
-; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    str r4, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB48_11: @ %entry
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    add sp, #12
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB48_12: @ %entry
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bne .LBB48_4
+; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB48_2
 ; SOFT-NEXT:  .LBB48_13: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB48_5
+; SOFT-NEXT:    mov r3, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB48_3
 ; SOFT-NEXT:  .LBB48_14: @ %entry
-; SOFT-NEXT:    ldr r1, .LCPI48_0
-; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB48_6
-; SOFT-NEXT:    b .LBB48_7
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB48_4
+; SOFT-NEXT:    b .LBB48_5
 ; SOFT-NEXT:  .LBB48_15: @ %entry
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB48_9
 ; SOFT-NEXT:  .LBB48_16: @ %entry
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB48_10
 ; SOFT-NEXT:    b .LBB48_11
 ; SOFT-NEXT:    .p2align 2
@@ -3116,8 +3415,8 @@ define i64 @stest_f32i64_mm(float %x) {
 ; VFP2-NEXT:    movlt.w r12, #1
 ; VFP2-NEXT:    cmp.w r12, #0
 ; VFP2-NEXT:    itt eq
-; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    moveq r1, lr
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; FULL-LABEL: stest_f32i64_mm:
@@ -3144,8 +3443,8 @@ define i64 @stest_f32i64_mm(float %x) {
 ; FULL-NEXT:    sbcs.w r2, r3, r5
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    csel r0, r0, r2, ne
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r0, r0, r2, ne
 ; FULL-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %conv = fptosi float %x to i128
@@ -3169,6 +3468,7 @@ define i64 @utest_f32i64_mm(float %x) {
 ; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB49_5
 ; SOFT-NEXT:  .LBB49_2: @ %entry
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB49_6
 ; SOFT-NEXT:  .LBB49_3: @ %entry
 ; SOFT-NEXT:    pop {r4, pc}
@@ -3178,6 +3478,7 @@ define i64 @utest_f32i64_mm(float %x) {
 ; SOFT-NEXT:    bne .LBB49_2
 ; SOFT-NEXT:  .LBB49_5: @ %entry
 ; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB49_3
 ; SOFT-NEXT:  .LBB49_6: @ %entry
 ; SOFT-NEXT:    mov r1, r4
@@ -3221,8 +3522,8 @@ entry:
 define i64 @ustest_f32i64_mm(float %x) {
 ; SOFT-LABEL: ustest_f32i64_mm:
 ; SOFT:       @ %bb.0: @ %entry
-; SOFT-NEXT:    .save {r4, lr}
-; SOFT-NEXT:    push {r4, lr}
+; SOFT-NEXT:    .save {r4, r5, r7, lr}
+; SOFT-NEXT:    push {r4, r5, r7, lr}
 ; SOFT-NEXT:    bl __fixsfti
 ; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    movs r1, #0
@@ -3231,42 +3532,46 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; SOFT-NEXT:    sbcs r2, r1
 ; SOFT-NEXT:    blt .LBB50_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r2, r1
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB50_3
 ; SOFT-NEXT:    b .LBB50_4
 ; SOFT-NEXT:  .LBB50_2:
-; SOFT-NEXT:    movs r2, #1
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    movs r5, #1
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB50_4
 ; SOFT-NEXT:  .LBB50_3: @ %entry
-; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB50_4: @ %entry
-; SOFT-NEXT:    beq .LBB50_10
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB50_6
 ; SOFT-NEXT:  @ %bb.5: @ %entry
-; SOFT-NEXT:    bne .LBB50_7
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB50_6: @ %entry
-; SOFT-NEXT:    mov r3, r2
-; SOFT-NEXT:  .LBB50_7: @ %entry
 ; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r2, r1
-; SOFT-NEXT:    bpl .LBB50_11
-; SOFT-NEXT:  @ %bb.8: @ %entry
+; SOFT-NEXT:    bpl .LBB50_10
+; SOFT-NEXT:  @ %bb.7: @ %entry
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB50_11
+; SOFT-NEXT:  .LBB50_8: @ %entry
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bpl .LBB50_12
 ; SOFT-NEXT:  .LBB50_9: @ %entry
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ; SOFT-NEXT:  .LBB50_10: @ %entry
-; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    beq .LBB50_6
-; SOFT-NEXT:    b .LBB50_7
-; SOFT-NEXT:  .LBB50_11: @ %entry
 ; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB50_8
+; SOFT-NEXT:  .LBB50_11: @ %entry
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bmi .LBB50_9
 ; SOFT-NEXT:  .LBB50_12: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; VFP2-LABEL: ustest_f32i64_mm:
 ; VFP2:       @ %bb.0: @ %entry
@@ -3279,13 +3584,17 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; VFP2-NEXT:    it lt
 ; VFP2-NEXT:    movlt.w r12, #1
 ; VFP2-NEXT:    cmp.w r12, #0
-; VFP2-NEXT:    itte eq
-; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    itt eq
 ; VFP2-NEXT:    moveq r0, r12
-; VFP2-NEXT:    movne r12, r3
-; VFP2-NEXT:    cmp.w r12, #0
-; VFP2-NEXT:    itt mi
+; VFP2-NEXT:    moveq r3, r12
+; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r0, #0
+; VFP2-NEXT:    cmp.w r12, #0
+; VFP2-NEXT:    it eq
+; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r1, #0
 ; VFP2-NEXT:    pop {r7, pc}
 ;
@@ -3298,12 +3607,15 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; FULL-NEXT:    sbcs r2, r3, #0
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    csel r1, r1, r2, ne
+; FULL-NEXT:    csel r3, r3, r2, ne
 ; FULL-NEXT:    csel r0, r0, r2, ne
-; FULL-NEXT:    csel r2, r3, r2, ne
-; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    itt mi
+; FULL-NEXT:    cmp r3, #0
+; FULL-NEXT:    it mi
 ; FULL-NEXT:    movmi r0, #0
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    csel r1, r1, r2, ne
+; FULL-NEXT:    cmp r3, #0
+; FULL-NEXT:    it mi
 ; FULL-NEXT:    movmi r1, #0
 ; FULL-NEXT:    pop {r7, pc}
 entry:
@@ -3319,80 +3631,83 @@ define i64 @stest_f16i64_mm(half %x) {
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    movs r0, #1
+; SOFT-NEXT:    movs r4, #1
 ; SOFT-NEXT:    movs r5, #0
 ; SOFT-NEXT:    ldr r6, .LCPI51_0
-; SOFT-NEXT:    adds r4, r7, #1
-; SOFT-NEXT:    mov r4, r1
-; SOFT-NEXT:    sbcs r4, r6
-; SOFT-NEXT:    mov r4, r2
-; SOFT-NEXT:    sbcs r4, r5
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    sbcs r4, r5
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    blt .LBB51_2
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    adds r0, r0, #1
+; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    sbcs r0, r6
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    sbcs r0, r5
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    sbcs r0, r5
+; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    bge .LBB51_12
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r4, r5
-; SOFT-NEXT:  .LBB51_2: @ %entry
-; SOFT-NEXT:    mvns r6, r5
-; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB51_12
-; SOFT-NEXT:  @ %bb.3: @ %entry
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    beq .LBB51_13
-; SOFT-NEXT:  .LBB51_4: @ %entry
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:  .LBB51_2: @ %entry
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    beq .LBB51_14
+; SOFT-NEXT:  .LBB51_3: @ %entry
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB51_5
+; SOFT-NEXT:  .LBB51_4: @ %entry
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:  .LBB51_5: @ %entry
-; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r5
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB51_7
-; SOFT-NEXT:  .LBB51_6: @ %entry
-; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:  @ %bb.6: @ %entry
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB51_7: @ %entry
-; SOFT-NEXT:    lsls r3, r0, #31
-; SOFT-NEXT:    rsbs r4, r7, #0
-; SOFT-NEXT:    mov r4, r3
-; SOFT-NEXT:    sbcs r4, r1
-; SOFT-NEXT:    mov r4, r6
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    sbcs r4, r2
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    sbcs r6, r2
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    rsbs r7, r7, #0
+; SOFT-NEXT:    mov r7, r6
+; SOFT-NEXT:    sbcs r7, r1
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    sbcs r7, r2
+; SOFT-NEXT:    sbcs r0, r3
 ; SOFT-NEXT:    bge .LBB51_15
 ; SOFT-NEXT:  @ %bb.8: @ %entry
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB51_16
 ; SOFT-NEXT:  .LBB51_9: @ %entry
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB51_11
 ; SOFT-NEXT:  .LBB51_10: @ %entry
-; SOFT-NEXT:    mov r1, r3
+; SOFT-NEXT:    str r4, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB51_11: @ %entry
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    add sp, #12
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB51_12: @ %entry
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bne .LBB51_4
+; SOFT-NEXT:    mov r7, r5
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB51_2
 ; SOFT-NEXT:  .LBB51_13: @ %entry
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB51_5
+; SOFT-NEXT:    mov r3, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB51_3
 ; SOFT-NEXT:  .LBB51_14: @ %entry
-; SOFT-NEXT:    ldr r1, .LCPI51_0
-; SOFT-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB51_6
-; SOFT-NEXT:    b .LBB51_7
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB51_4
+; SOFT-NEXT:    b .LBB51_5
 ; SOFT-NEXT:  .LBB51_15: @ %entry
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB51_9
 ; SOFT-NEXT:  .LBB51_16: @ %entry
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB51_10
 ; SOFT-NEXT:    b .LBB51_11
 ; SOFT-NEXT:    .p2align 2
@@ -3434,8 +3749,8 @@ define i64 @stest_f16i64_mm(half %x) {
 ; VFP2-NEXT:    movlt.w r12, #1
 ; VFP2-NEXT:    cmp.w r12, #0
 ; VFP2-NEXT:    itt eq
-; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    moveq r1, lr
+; VFP2-NEXT:    moveq r0, r12
 ; VFP2-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; FULL-LABEL: stest_f16i64_mm:
@@ -3464,8 +3779,8 @@ define i64 @stest_f16i64_mm(half %x) {
 ; FULL-NEXT:    sbcs.w r2, r3, r5
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    csel r0, r0, r2, ne
 ; FULL-NEXT:    csel r1, r1, r12, ne
+; FULL-NEXT:    csel r0, r0, r2, ne
 ; FULL-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %conv = fptosi half %x to i128
@@ -3491,6 +3806,7 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB52_5
 ; SOFT-NEXT:  .LBB52_2: @ %entry
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB52_6
 ; SOFT-NEXT:  .LBB52_3: @ %entry
 ; SOFT-NEXT:    pop {r4, pc}
@@ -3500,6 +3816,7 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; SOFT-NEXT:    bne .LBB52_2
 ; SOFT-NEXT:  .LBB52_5: @ %entry
 ; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB52_3
 ; SOFT-NEXT:  .LBB52_6: @ %entry
 ; SOFT-NEXT:    mov r1, r4
@@ -3548,8 +3865,8 @@ entry:
 define i64 @ustest_f16i64_mm(half %x) {
 ; SOFT-LABEL: ustest_f16i64_mm:
 ; SOFT:       @ %bb.0: @ %entry
-; SOFT-NEXT:    .save {r4, lr}
-; SOFT-NEXT:    push {r4, lr}
+; SOFT-NEXT:    .save {r4, r5, r7, lr}
+; SOFT-NEXT:    push {r4, r5, r7, lr}
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
 ; SOFT-NEXT:    bl __fixsfti
@@ -3560,42 +3877,46 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; SOFT-NEXT:    sbcs r2, r1
 ; SOFT-NEXT:    blt .LBB53_2
 ; SOFT-NEXT:  @ %bb.1: @ %entry
-; SOFT-NEXT:    mov r2, r1
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB53_3
 ; SOFT-NEXT:    b .LBB53_4
 ; SOFT-NEXT:  .LBB53_2:
-; SOFT-NEXT:    movs r2, #1
-; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    movs r5, #1
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB53_4
 ; SOFT-NEXT:  .LBB53_3: @ %entry
-; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB53_4: @ %entry
-; SOFT-NEXT:    beq .LBB53_10
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB53_6
 ; SOFT-NEXT:  @ %bb.5: @ %entry
-; SOFT-NEXT:    bne .LBB53_7
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB53_6: @ %entry
-; SOFT-NEXT:    mov r3, r2
-; SOFT-NEXT:  .LBB53_7: @ %entry
 ; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r2, r1
-; SOFT-NEXT:    bpl .LBB53_11
-; SOFT-NEXT:  @ %bb.8: @ %entry
+; SOFT-NEXT:    bpl .LBB53_10
+; SOFT-NEXT:  @ %bb.7: @ %entry
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB53_11
+; SOFT-NEXT:  .LBB53_8: @ %entry
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bpl .LBB53_12
 ; SOFT-NEXT:  .LBB53_9: @ %entry
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ; SOFT-NEXT:  .LBB53_10: @ %entry
-; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    beq .LBB53_6
-; SOFT-NEXT:    b .LBB53_7
-; SOFT-NEXT:  .LBB53_11: @ %entry
 ; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB53_8
+; SOFT-NEXT:  .LBB53_11: @ %entry
+; SOFT-NEXT:    mov r4, r5
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bmi .LBB53_9
 ; SOFT-NEXT:  .LBB53_12: @ %entry
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    pop {r4, pc}
+; SOFT-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; VFP2-LABEL: ustest_f16i64_mm:
 ; VFP2:       @ %bb.0: @ %entry
@@ -3611,13 +3932,17 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; VFP2-NEXT:    it lt
 ; VFP2-NEXT:    movlt.w r12, #1
 ; VFP2-NEXT:    cmp.w r12, #0
-; VFP2-NEXT:    itte eq
-; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    itt eq
 ; VFP2-NEXT:    moveq r0, r12
-; VFP2-NEXT:    movne r12, r3
-; VFP2-NEXT:    cmp.w r12, #0
-; VFP2-NEXT:    itt mi
+; VFP2-NEXT:    moveq r3, r12
+; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r0, #0
+; VFP2-NEXT:    cmp.w r12, #0
+; VFP2-NEXT:    it eq
+; VFP2-NEXT:    moveq r1, r12
+; VFP2-NEXT:    cmp r3, #0
+; VFP2-NEXT:    it mi
 ; VFP2-NEXT:    movmi r1, #0
 ; VFP2-NEXT:    pop {r7, pc}
 ;
@@ -3632,12 +3957,15 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; FULL-NEXT:    sbcs r2, r3, #0
 ; FULL-NEXT:    cset r2, lt
 ; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    csel r1, r1, r2, ne
+; FULL-NEXT:    csel r3, r3, r2, ne
 ; FULL-NEXT:    csel r0, r0, r2, ne
-; FULL-NEXT:    csel r2, r3, r2, ne
-; FULL-NEXT:    cmp r2, #0
-; FULL-NEXT:    itt mi
+; FULL-NEXT:    cmp r3, #0
+; FULL-NEXT:    it mi
 ; FULL-NEXT:    movmi r0, #0
+; FULL-NEXT:    cmp r2, #0
+; FULL-NEXT:    csel r1, r1, r2, ne
+; FULL-NEXT:    cmp r3, #0
+; FULL-NEXT:    it mi
 ; FULL-NEXT:    movmi r1, #0
 ; FULL-NEXT:    pop {r7, pc}
 entry:
@@ -3686,15 +4014,16 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    mov r2, r1
 ; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r2, r3
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:    bge .LBB54_14
 ; SOFT-NEXT:  @ %bb.3: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    bge .LBB54_15
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB54_15
 ; SOFT-NEXT:  .LBB54_4: @ in Loop: Header=BB54_2 Depth=1
 ; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB54_6
 ; SOFT-NEXT:  .LBB54_5: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    ldr r0, .LCPI54_0
 ; SOFT-NEXT:  .LBB54_6: @ in Loop: Header=BB54_2 Depth=1
 ; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:    subs r2, r2, r0
@@ -3720,6 +4049,7 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB54_17
 ; SOFT-NEXT:  .LBB54_10: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB54_12
 ; SOFT-NEXT:  .LBB54_11: @ in Loop: Header=BB54_2 Depth=1
 ; SOFT-NEXT:    ldr r0, .LCPI54_0
@@ -3733,12 +4063,12 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:    b .LBB54_1
 ; SOFT-NEXT:  .LBB54_14: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT:    ldr r0, .LCPI54_0
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    blt .LBB54_4
-; SOFT-NEXT:  .LBB54_15: @ in Loop: Header=BB54_2 Depth=1
 ; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB54_4
+; SOFT-NEXT:  .LBB54_15: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB54_5
 ; SOFT-NEXT:    b .LBB54_6
 ; SOFT-NEXT:  .LBB54_16: @ in Loop: Header=BB54_2 Depth=1
@@ -3747,6 +4077,7 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    bne .LBB54_10
 ; SOFT-NEXT:  .LBB54_17: @ in Loop: Header=BB54_2 Depth=1
 ; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    beq .LBB54_11
 ; SOFT-NEXT:    b .LBB54_12
 ; SOFT-NEXT:  .LBB54_18:
@@ -3849,17 +4180,18 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #20
+; SOFT-NEXT:    sub sp, #20
 ; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r0, #0
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; SOFT-NEXT:    mvns r0, r0
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #1
 ; SOFT-NEXT:    lsls r1, r0, #31
-; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:    lsls r7, r0, #10
 ; SOFT-NEXT:    b .LBB55_2
 ; SOFT-NEXT:  .LBB55_1: @ in Loop: Header=BB55_2 Depth=1
@@ -3867,7 +4199,7 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    adds r4, #8
 ; SOFT-NEXT:    adds r5, #8
 ; SOFT-NEXT:    subs r7, r7, #2
-; SOFT-NEXT:    beq .LBB55_14
+; SOFT-NEXT:    beq .LBB55_18
 ; SOFT-NEXT:  .LBB55_2: @ =>This Inner Loop Header: Depth=1
 ; SOFT-NEXT:    ldr r0, [r4]
 ; SOFT-NEXT:    movs r1, #79
@@ -3875,21 +4207,24 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fmul
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    subs r2, r2, r0
 ; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    subs r2, r2, r0
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r2, r1
-; SOFT-NEXT:    blt .LBB55_4
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    bge .LBB55_14
 ; SOFT-NEXT:  @ %bb.3: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB55_15
 ; SOFT-NEXT:  .LBB55_4: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT:    blt .LBB55_6
-; SOFT-NEXT:  @ %bb.5: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB55_6
+; SOFT-NEXT:  .LBB55_5: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB55_6: @ in Loop: Header=BB55_2 Depth=1
 ; SOFT-NEXT:    ldr r2, .LCPI55_0
 ; SOFT-NEXT:    subs r2, r0, r2
-; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r1, r2
 ; SOFT-NEXT:    blt .LBB55_8
 ; SOFT-NEXT:  @ %bb.7: @ in Loop: Header=BB55_2 Depth=1
@@ -3900,31 +4235,52 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fmul
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    subs r2, r2, r0
 ; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    subs r2, r2, r0
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r2, r1
-; SOFT-NEXT:    blt .LBB55_10
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    bge .LBB55_16
 ; SOFT-NEXT:  @ %bb.9: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB55_17
 ; SOFT-NEXT:  .LBB55_10: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT:    blt .LBB55_12
-; SOFT-NEXT:  @ %bb.11: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB55_12
+; SOFT-NEXT:  .LBB55_11: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB55_12: @ in Loop: Header=BB55_2 Depth=1
 ; SOFT-NEXT:    ldr r2, .LCPI55_0
 ; SOFT-NEXT:    subs r2, r0, r2
-; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; SOFT-NEXT:    sbcs r1, r2
 ; SOFT-NEXT:    blt .LBB55_1
 ; SOFT-NEXT:  @ %bb.13: @ in Loop: Header=BB55_2 Depth=1
 ; SOFT-NEXT:    ldr r0, .LCPI55_0
 ; SOFT-NEXT:    b .LBB55_1
-; SOFT-NEXT:  .LBB55_14:
-; SOFT-NEXT:    add sp, #12
+; SOFT-NEXT:  .LBB55_14: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB55_4
+; SOFT-NEXT:  .LBB55_15: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB55_5
+; SOFT-NEXT:    b .LBB55_6
+; SOFT-NEXT:  .LBB55_16: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB55_10
+; SOFT-NEXT:  .LBB55_17: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB55_11
+; SOFT-NEXT:    b .LBB55_12
+; SOFT-NEXT:  .LBB55_18:
+; SOFT-NEXT:    add sp, #20
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.15:
+; SOFT-NEXT:  @ %bb.19:
 ; SOFT-NEXT:  .LCPI55_0:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;

diff  --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 4d091c23026581..78090083a00264 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -9,58 +9,58 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r11, lr}
 ; CHECK-NEXT:    push {r4, r5, r11, lr}
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vorr q4, q0, q0
-; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    vmov r0, r1, d8
-; CHECK-NEXT:    vmov.32 d9[0], r4
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    adr r2, .LCPI0_0
+; CHECK-NEXT:    vld1.64 {d8, d9}, [r2:128]
+; CHECK-NEXT:    vmov.32 d10[0], r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vmov.32 d8[0], r0
 ; CHECK-NEXT:    mvn r3, #-2147483648
 ; CHECK-NEXT:    subs r4, r4, r3
-; CHECK-NEXT:    adr r2, .LCPI0_0
-; CHECK-NEXT:    vmov.32 d9[1], r5
-; CHECK-NEXT:    sbcs r5, r5, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    mvn r4, #0
-; CHECK-NEXT:    movwlt r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    sbcs r4, r5, #0
+; CHECK-NEXT:    vmov.32 d11[0], r0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movwlt r4, #1
 ; CHECK-NEXT:    subs r0, r0, r3
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    vmov.32 d8[1], r1
+; CHECK-NEXT:    vmov.32 d11[1], r1
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    vmov.i32 q10, #0x80000000
 ; CHECK-NEXT:    movwlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vdup.32 d19, r5
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r2:128]
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    vdup.32 d18, r0
-; CHECK-NEXT:    vbit q8, q4, q9
-; CHECK-NEXT:    vmov r0, r1, d17
-; CHECK-NEXT:    vmov r3, r5, d16
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vmov.32 d10[1], r5
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d17, r0
+; CHECK-NEXT:    vdup.32 d16, r4
+; CHECK-NEXT:    mvn r4, #0
+; CHECK-NEXT:    vbsl q8, q5, q4
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r3, r5, d17
 ; CHECK-NEXT:    rsbs r0, r0, #-2147483648
 ; CHECK-NEXT:    sbcs r0, r4, r1
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
 ; CHECK-NEXT:    rsbs r1, r3, #-2147483648
 ; CHECK-NEXT:    sbcs r1, r4, r5
-; CHECK-NEXT:    vdup.32 d19, r0
 ; CHECK-NEXT:    movwlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d18, r2
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vdup.32 d19, r2
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d18, r0
 ; CHECK-NEXT:    vbif q8, q10, q9
 ; CHECK-NEXT:    vmovn.i64 d0, q8
-; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    pop {r4, r5, r11, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
@@ -95,21 +95,21 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    vmov.32 d9[0], r4
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    mvn r3, #0
-; CHECK-NEXT:    subs r4, r4, r3
-; CHECK-NEXT:    sbcs r5, r5, #0
 ; CHECK-NEXT:    vmov.32 d8[0], r0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwlo r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    mvnne r5, #0
 ; CHECK-NEXT:    subs r0, r0, r3
+; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    vdup.32 d17, r5
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movwlo r0, #1
+; CHECK-NEXT:    subs r1, r4, r3
+; CHECK-NEXT:    sbcs r1, r5, #0
 ; CHECK-NEXT:    movwlo r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d16, r2
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vdup.32 d17, r2
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d16, r0
 ; CHECK-NEXT:    vand q9, q4, q8
 ; CHECK-NEXT:    vorn q8, q9, q8
 ; CHECK-NEXT:    vmovn.i64 d0, q8
@@ -131,49 +131,49 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
-; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    vmov r0, r1, d8
-; CHECK-NEXT:    vmov.32 d9[0], r4
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov.32 d8[0], r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vmov.32 d8[0], r0
 ; CHECK-NEXT:    mvn r3, #0
 ; CHECK-NEXT:    subs r4, r4, r3
+; CHECK-NEXT:    sbcs r4, r5, #0
+; CHECK-NEXT:    vmov.32 d9[0], r0
+; CHECK-NEXT:    mov r4, #0
 ; CHECK-NEXT:    vmov.i64 q9, #0xffffffff
-; CHECK-NEXT:    vmov.32 d9[1], r5
-; CHECK-NEXT:    sbcs r5, r5, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwlt r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    movwlt r4, #1
 ; CHECK-NEXT:    subs r0, r0, r3
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    vmov.32 d8[1], r1
+; CHECK-NEXT:    vmov.32 d9[1], r1
 ; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    movwlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vdup.32 d17, r5
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d16, r0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vmov.32 d8[1], r5
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d17, r0
+; CHECK-NEXT:    vdup.32 d16, r4
 ; CHECK-NEXT:    vbsl q8, q4, q9
-; CHECK-NEXT:    vmov r0, r1, d17
-; CHECK-NEXT:    vmov r3, r5, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r3, r5, d17
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    rscs r0, r1, #0
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
 ; CHECK-NEXT:    rsbs r1, r3, #0
 ; CHECK-NEXT:    rscs r1, r5, #0
-; CHECK-NEXT:    vmov.32 d19[0], r0
 ; CHECK-NEXT:    movwlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vmov.32 d18[0], r2
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vmov.32 d19[0], r2
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vmov.32 d18[0], r0
 ; CHECK-NEXT:    vand q8, q8, q9
 ; CHECK-NEXT:    vmovn.i64 d0, q8
 ; CHECK-NEXT:    vpop {d8, d9}
@@ -195,103 +195,106 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vorr q4, q0, q0
-; CHECK-NEXT:    vmov r0, s19
+; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    adr r1, .LCPI3_0
-; CHECK-NEXT:    vld1.64 {d10, d11}, [r1:128]
-; CHECK-NEXT:    vmov r5, s17
-; CHECK-NEXT:    mov r4, #0
-; CHECK-NEXT:    mvn r9, #-2147483648
-; CHECK-NEXT:    vmov.32 d13[0], r6
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    vmov r6, s17
+; CHECK-NEXT:    vmov r10, s19
+; CHECK-NEXT:    vmov.32 d8[0], r7
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    subs r2, r6, r9
-; CHECK-NEXT:    vmov.32 d12[0], r0
-; CHECK-NEXT:    sbcs r2, r7, #0
-; CHECK-NEXT:    vmov r8, s16
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    vmov.32 d13[1], r7
-; CHECK-NEXT:    movwlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    subs r0, r0, r9
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov.32 d10[0], r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov.32 d9[0], r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    subs r3, r7, r6
+; CHECK-NEXT:    sbcs r3, r8, #0
+; CHECK-NEXT:    vmov.32 d11[0], r0
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    adr r2, .LCPI3_0
+; CHECK-NEXT:    movwlt r3, #1
+; CHECK-NEXT:    subs r7, r5, r6
+; CHECK-NEXT:    sbcs r7, r4, #0
+; CHECK-NEXT:    vmov.32 d11[1], r1
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    movwlt r7, #1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mvnne r7, #0
+; CHECK-NEXT:    subs r0, r0, r6
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    vdup.32 d17, r2
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r2:128]
 ; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    vmov.32 d12[1], r1
+; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    movwlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d16, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vorr q4, q8, q8
-; CHECK-NEXT:    vbsl q4, q6, q5
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vmov.32 d13[0], r0
-; CHECK-NEXT:    subs r0, r0, r9
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    vmov r11, r10, d8
-; CHECK-NEXT:    vmov.32 d13[1], r1
-; CHECK-NEXT:    mvnne r6, #0
-; CHECK-NEXT:    vmov r5, r7, d9
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vmov.32 d12[0], r0
-; CHECK-NEXT:    subs r0, r0, r9
-; CHECK-NEXT:    sbcs r0, r1, #0
+; CHECK-NEXT:    vmov.32 d10[1], r4
+; CHECK-NEXT:    vdup.32 d17, r0
+; CHECK-NEXT:    subs r0, r9, r6
+; CHECK-NEXT:    sbcs r0, r11, #0
+; CHECK-NEXT:    vdup.32 d16, r7
 ; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    vdup.32 d17, r6
+; CHECK-NEXT:    vbsl q8, q5, q9
 ; CHECK-NEXT:    movwlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vmov.32 d9[1], r11
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vmov.32 d12[1], r1
-; CHECK-NEXT:    rsbs r3, r11, #-2147483648
-; CHECK-NEXT:    vdup.32 d16, r0
-; CHECK-NEXT:    mvn r0, #0
-; CHECK-NEXT:    vbsl q8, q6, q5
-; CHECK-NEXT:    adr r1, .LCPI3_1
-; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
-; CHECK-NEXT:    sbcs r3, r0, r10
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    vmov r1, r2, d17
-; CHECK-NEXT:    movwlt r3, #1
 ; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mvn r6, #0
+; CHECK-NEXT:    vdup.32 d21, r0
 ; CHECK-NEXT:    mvnne r3, #0
-; CHECK-NEXT:    rsbs r6, r5, #-2147483648
-; CHECK-NEXT:    vmov r6, r5, d16
-; CHECK-NEXT:    sbcs r7, r0, r7
+; CHECK-NEXT:    vmov.32 d8[1], r8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vdup.32 d20, r3
+; CHECK-NEXT:    vbit q9, q4, q10
+; CHECK-NEXT:    adr r5, .LCPI3_1
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r5:128]
+; CHECK-NEXT:    vmov r5, r4, d17
+; CHECK-NEXT:    vmov r3, r7, d18
+; CHECK-NEXT:    rsbs r0, r0, #-2147483648
+; CHECK-NEXT:    sbcs r0, r6, r1
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    rsbs r1, r3, #-2147483648
+; CHECK-NEXT:    vmov r1, r3, d19
+; CHECK-NEXT:    sbcs r7, r6, r7
 ; CHECK-NEXT:    mov r7, #0
 ; CHECK-NEXT:    movwlt r7, #1
+; CHECK-NEXT:    rsbs r5, r5, #-2147483648
+; CHECK-NEXT:    sbcs r5, r6, r4
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    movwlt r5, #1
+; CHECK-NEXT:    rsbs r1, r1, #-2147483648
+; CHECK-NEXT:    sbcs r1, r6, r3
+; CHECK-NEXT:    movwlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mvnne r5, #0
 ; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vdup.32 d25, r5
 ; CHECK-NEXT:    mvnne r7, #0
-; CHECK-NEXT:    vdup.32 d23, r7
-; CHECK-NEXT:    vdup.32 d22, r3
-; CHECK-NEXT:    vbsl q11, q4, q9
-; CHECK-NEXT:    vmovn.i64 d1, q11
-; CHECK-NEXT:    rsbs r1, r1, #-2147483648
-; CHECK-NEXT:    sbcs r1, r0, r2
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mvnne r1, #0
-; CHECK-NEXT:    rsbs r2, r6, #-2147483648
-; CHECK-NEXT:    sbcs r0, r0, r5
-; CHECK-NEXT:    vdup.32 d21, r1
-; CHECK-NEXT:    movwlt r4, #1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mvnne r4, #0
-; CHECK-NEXT:    vdup.32 d20, r4
-; CHECK-NEXT:    vbif q8, q9, q10
-; CHECK-NEXT:    vmovn.i64 d0, q8
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vdup.32 d23, r2
+; CHECK-NEXT:    vdup.32 d24, r0
+; CHECK-NEXT:    vbif q8, q10, q12
+; CHECK-NEXT:    vdup.32 d22, r7
+; CHECK-NEXT:    vbif q9, q10, q11
+; CHECK-NEXT:    vmovn.i64 d1, q8
+; CHECK-NEXT:    vmovn.i64 d0, q9
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, sp, #4
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 4
@@ -326,52 +329,52 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    mov r8, r1
-; CHECK-NEXT:    vmov r5, s19
+; CHECK-NEXT:    vmov r6, s19
 ; CHECK-NEXT:    vmov r7, s18
-; CHECK-NEXT:    vmov.32 d9[0], r10
+; CHECK-NEXT:    vmov.32 d9[0], r9
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    vmov.32 d8[0], r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmov.32 d11[0], r0
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r10, r1
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mvn r3, #0
+; CHECK-NEXT:    mvn r7, #0
+; CHECK-NEXT:    subs r2, r5, r7
+; CHECK-NEXT:    sbcs r2, r4, #0
 ; CHECK-NEXT:    vmov.32 d10[0], r0
-; CHECK-NEXT:    subs r0, r0, r3
 ; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    movwlo r2, #1
+; CHECK-NEXT:    subs r0, r0, r7
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movwlo r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    subs r1, r5, r3
-; CHECK-NEXT:    sbcs r1, r4, #0
+; CHECK-NEXT:    subs r1, r6, r7
+; CHECK-NEXT:    sbcs r1, r10, #0
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    movwlo r1, #1
+; CHECK-NEXT:    subs r7, r9, r7
+; CHECK-NEXT:    sbcs r7, r8, #0
+; CHECK-NEXT:    movwlo r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mvnne r3, #0
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mvnne r1, #0
-; CHECK-NEXT:    subs r7, r10, r3
-; CHECK-NEXT:    sbcs r7, r8, #0
-; CHECK-NEXT:    vdup.32 d19, r1
-; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    vdup.32 d18, r0
-; CHECK-NEXT:    movwlo r7, #1
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    mvnne r7, #0
-; CHECK-NEXT:    subs r3, r6, r3
-; CHECK-NEXT:    sbcs r3, r9, #0
-; CHECK-NEXT:    vdup.32 d17, r7
-; CHECK-NEXT:    movwlo r2, #1
 ; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    vdup.32 d19, r1
 ; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d17, r3
+; CHECK-NEXT:    vdup.32 d18, r0
 ; CHECK-NEXT:    vand q10, q5, q9
 ; CHECK-NEXT:    vdup.32 d16, r2
 ; CHECK-NEXT:    vand q11, q4, q8
@@ -397,96 +400,97 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vorr q4, q0, q0
-; CHECK-NEXT:    vmov r0, s19
+; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    vmov r0, s18
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    vmov r5, s17
-; CHECK-NEXT:    vmov r8, s16
-; CHECK-NEXT:    vmov.32 d9[0], r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    mov r6, r1
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mvn r9, #0
-; CHECK-NEXT:    subs r2, r6, r9
-; CHECK-NEXT:    sbcs r2, r7, #0
-; CHECK-NEXT:    vmov.32 d8[0], r0
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov.32 d16[0], r2
+; CHECK-NEXT:    mvn r4, #0
+; CHECK-NEXT:    subs r2, r2, r4
+; CHECK-NEXT:    vmov r8, s19
+; CHECK-NEXT:    sbcs r2, r1, #0
+; CHECK-NEXT:    vmov.32 d17[0], r5
 ; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
 ; CHECK-NEXT:    movwlt r2, #1
+; CHECK-NEXT:    subs r3, r5, r4
+; CHECK-NEXT:    sbcs r3, r6, #0
+; CHECK-NEXT:    vmov.32 d17[1], r6
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    movwlt r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mvnne r3, #0
 ; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    vdup.32 d19, r3
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    subs r0, r0, r9
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    vmov.32 d9[1], r7
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    mov r4, #0
-; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vmov.32 d8[1], r1
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d17, r2
-; CHECK-NEXT:    vdup.32 d16, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    vbif q4, q5, q8
+; CHECK-NEXT:    vdup.32 d18, r2
+; CHECK-NEXT:    vmov.32 d16[1], r1
+; CHECK-NEXT:    vorr q4, q9, q9
+; CHECK-NEXT:    vbsl q4, q8, q5
+; CHECK-NEXT:    vmov r10, r9, d8
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    vmov.32 d13[0], r0
+; CHECK-NEXT:    vmov.32 d12[0], r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    vmov r7, r10, d8
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    subs r2, r5, r9
-; CHECK-NEXT:    vmov.32 d12[0], r0
+; CHECK-NEXT:    subs r2, r5, r4
+; CHECK-NEXT:    vmov.32 d13[0], r0
 ; CHECK-NEXT:    sbcs r2, r6, #0
 ; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    vmov.32 d13[1], r6
 ; CHECK-NEXT:    movwlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    subs r0, r0, r9
+; CHECK-NEXT:    subs r0, r0, r4
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    vdup.32 d17, r2
+; CHECK-NEXT:    vmov.32 d13[1], r1
 ; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    vmov.32 d12[1], r1
+; CHECK-NEXT:    vmov r5, r4, d9
 ; CHECK-NEXT:    movwlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vmov r2, r3, d9
-; CHECK-NEXT:    vdup.32 d16, r0
-; CHECK-NEXT:    rsbs r7, r7, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    vmov.32 d12[1], r6
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d17, r0
+; CHECK-NEXT:    rsbs r0, r10, #0
+; CHECK-NEXT:    vdup.32 d16, r2
+; CHECK-NEXT:    rscs r0, r9, #0
 ; CHECK-NEXT:    vbsl q8, q6, q5
-; CHECK-NEXT:    rscs r7, r10, #0
-; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    movwlt r7, #1
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    vmov r0, r1, d17
-; CHECK-NEXT:    mvnne r7, #0
-; CHECK-NEXT:    vmov r6, r5, d16
-; CHECK-NEXT:    rsbs r0, r0, #0
-; CHECK-NEXT:    rscs r0, r1, #0
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    rsbs r1, r2, #0
-; CHECK-NEXT:    rscs r1, r3, #0
-; CHECK-NEXT:    vmov.32 d19[0], r0
+; CHECK-NEXT:    vmov r1, r2, d16
+; CHECK-NEXT:    vmov r3, r6, d17
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    rscs r1, r2, #0
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    movwlt r1, #1
+; CHECK-NEXT:    rsbs r2, r3, #0
+; CHECK-NEXT:    rscs r2, r6, #0
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movwlt r2, #1
+; CHECK-NEXT:    rsbs r3, r5, #0
+; CHECK-NEXT:    rscs r3, r4, #0
+; CHECK-NEXT:    movwlt r7, #1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mvnne r7, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mvnne r1, #0
-; CHECK-NEXT:    rsbs r0, r6, #0
-; CHECK-NEXT:    rscs r0, r5, #0
-; CHECK-NEXT:    vmov.32 d21[0], r1
-; CHECK-NEXT:    movwlt r4, #1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vmov.32 d20[0], r7
-; CHECK-NEXT:    mvnne r4, #0
-; CHECK-NEXT:    vmov.32 d18[0], r4
-; CHECK-NEXT:    vand q10, q4, q10
-; CHECK-NEXT:    vand q8, q8, q9
-; CHECK-NEXT:    vmovn.i64 d1, q10
-; CHECK-NEXT:    vmovn.i64 d0, q8
+; CHECK-NEXT:    vmov.32 d21[0], r2
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vmov.32 d20[0], r1
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vmov.32 d19[0], r7
+; CHECK-NEXT:    vand q8, q8, q10
+; CHECK-NEXT:    vmov.32 d18[0], r0
+; CHECK-NEXT:    vmovn.i64 d1, q8
+; CHECK-NEXT:    vand q9, q4, q9
+; CHECK-NEXT:    vmovn.i64 d0, q9
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
@@ -506,108 +510,112 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEON-NEXT:    .pad #4
 ; CHECK-NEON-NEXT:    sub sp, sp, #4
-; CHECK-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEON-NEXT:    vmov r0, s2
+; CHECK-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT:    vmov r0, s0
 ; CHECK-NEON-NEXT:    vmov.f32 s16, s3
-; CHECK-NEON-NEXT:    vmov.f32 s18, s1
-; CHECK-NEON-NEXT:    vmov.f32 s20, s0
+; CHECK-NEON-NEXT:    vmov.f32 s18, s2
+; CHECK-NEON-NEXT:    vmov.f32 s20, s1
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    mov r6, r0
-; CHECK-NEON-NEXT:    vmov r0, s16
-; CHECK-NEON-NEXT:    mov r5, r1
+; CHECK-NEON-NEXT:    mov r9, r0
+; CHECK-NEON-NEXT:    vmov r0, s18
+; CHECK-NEON-NEXT:    vmov r10, s16
+; CHECK-NEON-NEXT:    mov r8, r1
+; CHECK-NEON-NEXT:    vmov r6, s20
+; CHECK-NEON-NEXT:    vmov.32 d8[0], r9
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    vmov r2, s18
-; CHECK-NEON-NEXT:    adr r3, .LCPI6_0
-; CHECK-NEON-NEXT:    vld1.64 {d8, d9}, [r3:128]
-; CHECK-NEON-NEXT:    mvn r9, #-2147483648
-; CHECK-NEON-NEXT:    subs r3, r6, r9
-; CHECK-NEON-NEXT:    mov r4, #0
-; CHECK-NEON-NEXT:    sbcs r3, r5, #0
-; CHECK-NEON-NEXT:    vmov.32 d15[0], r0
-; CHECK-NEON-NEXT:    movwlt r4, #1
-; CHECK-NEON-NEXT:    cmp r4, #0
-; CHECK-NEON-NEXT:    mvnne r4, #0
-; CHECK-NEON-NEXT:    subs r0, r0, r9
-; CHECK-NEON-NEXT:    sbcs r0, r1, #0
-; CHECK-NEON-NEXT:    vmov.32 d14[0], r6
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    vmov r8, s20
-; CHECK-NEON-NEXT:    movwlt r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    mvnne r0, #0
-; CHECK-NEON-NEXT:    vmov.32 d15[1], r1
-; CHECK-NEON-NEXT:    mov r7, #0
-; CHECK-NEON-NEXT:    vdup.32 d11, r0
-; CHECK-NEON-NEXT:    vmov.32 d14[1], r5
-; CHECK-NEON-NEXT:    mov r0, r2
+; CHECK-NEON-NEXT:    mov r5, r0
+; CHECK-NEON-NEXT:    vmov.32 d10[0], r0
+; CHECK-NEON-NEXT:    mov r0, r6
+; CHECK-NEON-NEXT:    mov r4, r1
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
-; CHECK-NEON-NEXT:    vdup.32 d10, r4
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    vmov.32 d13[0], r0
-; CHECK-NEON-NEXT:    subs r0, r0, r9
-; CHECK-NEON-NEXT:    vbsl q5, q7, q4
-; CHECK-NEON-NEXT:    sbcs r0, r1, #0
-; CHECK-NEON-NEXT:    mov r6, #0
-; CHECK-NEON-NEXT:    mov r0, r8
-; CHECK-NEON-NEXT:    movwlt r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    vmov r11, r10, d10
-; CHECK-NEON-NEXT:    vmov.32 d13[1], r1
-; CHECK-NEON-NEXT:    mvnne r6, #0
-; CHECK-NEON-NEXT:    vmov r5, r4, d11
+; CHECK-NEON-NEXT:    mov r11, r0
+; CHECK-NEON-NEXT:    vmov.32 d9[0], r0
+; CHECK-NEON-NEXT:    mov r0, r10
+; CHECK-NEON-NEXT:    mov r7, r1
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    vmov.32 d12[0], r0
-; CHECK-NEON-NEXT:    subs r0, r0, r9
+; CHECK-NEON-NEXT:    mvn r6, #-2147483648
+; CHECK-NEON-NEXT:    subs r3, r9, r6
+; CHECK-NEON-NEXT:    sbcs r3, r8, #0
+; CHECK-NEON-NEXT:    vmov.32 d11[0], r0
+; CHECK-NEON-NEXT:    mov r3, #0
+; CHECK-NEON-NEXT:    adr r2, .LCPI6_0
+; CHECK-NEON-NEXT:    movwlt r3, #1
+; CHECK-NEON-NEXT:    subs r5, r5, r6
+; CHECK-NEON-NEXT:    sbcs r5, r4, #0
+; CHECK-NEON-NEXT:    vmov.32 d11[1], r1
+; CHECK-NEON-NEXT:    mov r5, #0
+; CHECK-NEON-NEXT:    movwlt r5, #1
+; CHECK-NEON-NEXT:    cmp r5, #0
+; CHECK-NEON-NEXT:    mvnne r5, #0
+; CHECK-NEON-NEXT:    subs r0, r0, r6
 ; CHECK-NEON-NEXT:    sbcs r0, r1, #0
+; CHECK-NEON-NEXT:    vld1.64 {d18, d19}, [r2:128]
 ; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    vdup.32 d17, r6
+; CHECK-NEON-NEXT:    mov r2, #0
 ; CHECK-NEON-NEXT:    movwlt r0, #1
 ; CHECK-NEON-NEXT:    cmp r0, #0
 ; CHECK-NEON-NEXT:    mvnne r0, #0
-; CHECK-NEON-NEXT:    vmov.32 d12[1], r1
-; CHECK-NEON-NEXT:    rsbs r3, r11, #-2147483648
-; CHECK-NEON-NEXT:    vdup.32 d16, r0
-; CHECK-NEON-NEXT:    mvn r0, #0
-; CHECK-NEON-NEXT:    vbsl q8, q6, q4
-; CHECK-NEON-NEXT:    adr r1, .LCPI6_1
-; CHECK-NEON-NEXT:    vld1.64 {d18, d19}, [r1:128]
-; CHECK-NEON-NEXT:    sbcs r3, r0, r10
-; CHECK-NEON-NEXT:    mov r3, #0
-; CHECK-NEON-NEXT:    vmov r1, r2, d17
-; CHECK-NEON-NEXT:    movwlt r3, #1
+; CHECK-NEON-NEXT:    vmov.32 d10[1], r4
+; CHECK-NEON-NEXT:    vdup.32 d17, r0
+; CHECK-NEON-NEXT:    subs r0, r11, r6
+; CHECK-NEON-NEXT:    sbcs r0, r7, #0
+; CHECK-NEON-NEXT:    vdup.32 d16, r5
+; CHECK-NEON-NEXT:    mov r0, #0
+; CHECK-NEON-NEXT:    vbsl q8, q5, q9
+; CHECK-NEON-NEXT:    movwlt r0, #1
+; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    vmov.32 d9[1], r7
+; CHECK-NEON-NEXT:    mvnne r0, #0
 ; CHECK-NEON-NEXT:    cmp r3, #0
+; CHECK-NEON-NEXT:    mvn r6, #0
+; CHECK-NEON-NEXT:    vdup.32 d21, r0
 ; CHECK-NEON-NEXT:    mvnne r3, #0
-; CHECK-NEON-NEXT:    rsbs r6, r5, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r6, r0, r4
-; CHECK-NEON-NEXT:    vmov r5, r4, d16
-; CHECK-NEON-NEXT:    mov r6, #0
-; CHECK-NEON-NEXT:    movwlt r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    mvnne r6, #0
-; CHECK-NEON-NEXT:    vdup.32 d23, r6
-; CHECK-NEON-NEXT:    vdup.32 d22, r3
-; CHECK-NEON-NEXT:    vbsl q11, q5, q9
-; CHECK-NEON-NEXT:    vmovn.i64 d1, q11
-; CHECK-NEON-NEXT:    rsbs r1, r1, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r0, r2
-; CHECK-NEON-NEXT:    mov r1, #0
-; CHECK-NEON-NEXT:    movwlt r1, #1
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mvnne r1, #0
-; CHECK-NEON-NEXT:    rsbs r2, r5, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r0, r0, r4
-; CHECK-NEON-NEXT:    vdup.32 d21, r1
+; CHECK-NEON-NEXT:    vmov.32 d8[1], r8
+; CHECK-NEON-NEXT:    vmov r0, r1, d16
+; CHECK-NEON-NEXT:    vdup.32 d20, r3
+; CHECK-NEON-NEXT:    vbit q9, q4, q10
+; CHECK-NEON-NEXT:    adr r5, .LCPI6_1
+; CHECK-NEON-NEXT:    vld1.64 {d20, d21}, [r5:128]
+; CHECK-NEON-NEXT:    vmov r5, r4, d17
+; CHECK-NEON-NEXT:    vmov r3, r7, d18
+; CHECK-NEON-NEXT:    rsbs r0, r0, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r0, r6, r1
+; CHECK-NEON-NEXT:    mov r0, #0
+; CHECK-NEON-NEXT:    movwlt r0, #1
+; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    mvnne r0, #0
+; CHECK-NEON-NEXT:    rsbs r1, r3, #-2147483648
+; CHECK-NEON-NEXT:    vmov r1, r3, d19
+; CHECK-NEON-NEXT:    sbcs r7, r6, r7
+; CHECK-NEON-NEXT:    mov r7, #0
 ; CHECK-NEON-NEXT:    movwlt r7, #1
+; CHECK-NEON-NEXT:    rsbs r5, r5, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r5, r6, r4
+; CHECK-NEON-NEXT:    mov r5, #0
+; CHECK-NEON-NEXT:    movwlt r5, #1
+; CHECK-NEON-NEXT:    rsbs r1, r1, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r1, r6, r3
+; CHECK-NEON-NEXT:    movwlt r2, #1
+; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    mvnne r2, #0
+; CHECK-NEON-NEXT:    cmp r5, #0
+; CHECK-NEON-NEXT:    mvnne r5, #0
 ; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    vdup.32 d25, r5
 ; CHECK-NEON-NEXT:    mvnne r7, #0
-; CHECK-NEON-NEXT:    vdup.32 d20, r7
-; CHECK-NEON-NEXT:    vbif q8, q9, q10
-; CHECK-NEON-NEXT:    vmovn.i64 d0, q8
-; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT:    vdup.32 d23, r2
+; CHECK-NEON-NEXT:    vdup.32 d24, r0
+; CHECK-NEON-NEXT:    vbif q8, q10, q12
+; CHECK-NEON-NEXT:    vdup.32 d22, r7
+; CHECK-NEON-NEXT:    vbif q9, q10, q11
+; CHECK-NEON-NEXT:    vmovn.i64 d1, q8
+; CHECK-NEON-NEXT:    vmovn.i64 d0, q9
+; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEON-NEXT:    add sp, sp, #4
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEON-NEXT:    .p2align 4
@@ -627,104 +635,111 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT:    vmov.u16 r0, d0[3]
+; CHECK-FP16-NEXT:    .vsave {d10, d11, d12, d13}
+; CHECK-FP16-NEXT:    vpush {d10, d11, d12, d13}
+; CHECK-FP16-NEXT:    .vsave {d8}
+; CHECK-FP16-NEXT:    vpush {d8}
+; CHECK-FP16-NEXT:    vmov.u16 r0, d0[0]
 ; CHECK-FP16-NEXT:    vorr d8, d0, d0
-; CHECK-FP16-NEXT:    vmov.u16 r8, d0[0]
-; CHECK-FP16-NEXT:    vmov.u16 r9, d0[1]
+; CHECK-FP16-NEXT:    vmov.u16 r6, d0[1]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
 ; CHECK-FP16-NEXT:    mov r4, r0
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d8[2]
-; CHECK-FP16-NEXT:    mov r5, r1
-; CHECK-FP16-NEXT:    vmov.32 d9[0], r4
+; CHECK-FP16-NEXT:    mov r8, r1
+; CHECK-FP16-NEXT:    vmov.32 d10[0], r4
+; CHECK-FP16-NEXT:    vmov s0, r0
+; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    vmov s0, r6
+; CHECK-FP16-NEXT:    mov r5, r0
+; CHECK-FP16-NEXT:    mov r7, r1
+; CHECK-FP16-NEXT:    vmov.32 d12[0], r0
+; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    mov r9, r0
+; CHECK-FP16-NEXT:    vmov.u16 r0, d8[3]
+; CHECK-FP16-NEXT:    mov r10, r1
+; CHECK-FP16-NEXT:    vmov.32 d11[0], r9
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    mvn r6, #-2147483648
+; CHECK-FP16-NEXT:    subs r3, r4, r6
+; CHECK-FP16-NEXT:    sbcs r3, r8, #0
+; CHECK-FP16-NEXT:    vmov.32 d13[0], r0
+; CHECK-FP16-NEXT:    mov r3, #0
 ; CHECK-FP16-NEXT:    adr r2, .LCPI6_0
-; CHECK-FP16-NEXT:    mvn r10, #-2147483648
-; CHECK-FP16-NEXT:    vld1.64 {d10, d11}, [r2:128]
-; CHECK-FP16-NEXT:    subs r2, r4, r10
-; CHECK-FP16-NEXT:    sbcs r2, r5, #0
-; CHECK-FP16-NEXT:    vmov s0, r9
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    vmov.32 d8[0], r0
-; CHECK-FP16-NEXT:    movwlt r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    mvnne r2, #0
-; CHECK-FP16-NEXT:    subs r0, r0, r10
+; CHECK-FP16-NEXT:    movwlt r3, #1
+; CHECK-FP16-NEXT:    subs r5, r5, r6
+; CHECK-FP16-NEXT:    sbcs r5, r7, #0
+; CHECK-FP16-NEXT:    vmov.32 d13[1], r1
+; CHECK-FP16-NEXT:    mov r5, #0
+; CHECK-FP16-NEXT:    movwlt r5, #1
+; CHECK-FP16-NEXT:    cmp r5, #0
+; CHECK-FP16-NEXT:    mvnne r5, #0
+; CHECK-FP16-NEXT:    subs r0, r0, r6
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    vmov.32 d9[1], r5
+; CHECK-FP16-NEXT:    vld1.64 {d18, d19}, [r2:128]
 ; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    mov r6, #0
+; CHECK-FP16-NEXT:    mov r2, #0
 ; CHECK-FP16-NEXT:    movwlt r0, #1
 ; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    vmov.32 d8[1], r1
 ; CHECK-FP16-NEXT:    mvnne r0, #0
-; CHECK-FP16-NEXT:    vdup.32 d17, r2
-; CHECK-FP16-NEXT:    vdup.32 d16, r0
-; CHECK-FP16-NEXT:    vbif q4, q5, q8
-; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    vmov.32 d13[0], r0
-; CHECK-FP16-NEXT:    subs r0, r0, r10
-; CHECK-FP16-NEXT:    vmov s0, r8
-; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    mov r7, #0
-; CHECK-FP16-NEXT:    vmov r9, r8, d8
-; CHECK-FP16-NEXT:    movwlt r7, #1
-; CHECK-FP16-NEXT:    cmp r7, #0
-; CHECK-FP16-NEXT:    vmov.32 d13[1], r1
-; CHECK-FP16-NEXT:    vmov r5, r4, d9
-; CHECK-FP16-NEXT:    mvnne r7, #0
-; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    vmov.32 d12[0], r0
-; CHECK-FP16-NEXT:    subs r0, r0, r10
-; CHECK-FP16-NEXT:    sbcs r0, r1, #0
+; CHECK-FP16-NEXT:    vmov.32 d12[1], r7
+; CHECK-FP16-NEXT:    vdup.32 d17, r0
+; CHECK-FP16-NEXT:    subs r0, r9, r6
+; CHECK-FP16-NEXT:    sbcs r0, r10, #0
+; CHECK-FP16-NEXT:    vdup.32 d16, r5
 ; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    vdup.32 d17, r7
+; CHECK-FP16-NEXT:    vbsl q8, q6, q9
 ; CHECK-FP16-NEXT:    movwlt r0, #1
 ; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    vmov.32 d11[1], r10
 ; CHECK-FP16-NEXT:    mvnne r0, #0
-; CHECK-FP16-NEXT:    vmov.32 d12[1], r1
-; CHECK-FP16-NEXT:    rsbs r3, r9, #-2147483648
-; CHECK-FP16-NEXT:    vdup.32 d16, r0
-; CHECK-FP16-NEXT:    mvn r0, #0
-; CHECK-FP16-NEXT:    vbsl q8, q6, q5
-; CHECK-FP16-NEXT:    adr r1, .LCPI6_1
-; CHECK-FP16-NEXT:    vld1.64 {d18, d19}, [r1:128]
-; CHECK-FP16-NEXT:    sbcs r3, r0, r8
-; CHECK-FP16-NEXT:    mov r3, #0
-; CHECK-FP16-NEXT:    vmov r1, r2, d17
-; CHECK-FP16-NEXT:    movwlt r3, #1
 ; CHECK-FP16-NEXT:    cmp r3, #0
+; CHECK-FP16-NEXT:    mvn r6, #0
+; CHECK-FP16-NEXT:    vdup.32 d21, r0
 ; CHECK-FP16-NEXT:    mvnne r3, #0
-; CHECK-FP16-NEXT:    rsbs r7, r5, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r7, r0, r4
-; CHECK-FP16-NEXT:    vmov r5, r4, d16
+; CHECK-FP16-NEXT:    vmov.32 d10[1], r8
+; CHECK-FP16-NEXT:    vmov r0, r1, d16
+; CHECK-FP16-NEXT:    vdup.32 d20, r3
+; CHECK-FP16-NEXT:    vbit q9, q5, q10
+; CHECK-FP16-NEXT:    adr r5, .LCPI6_1
+; CHECK-FP16-NEXT:    vld1.64 {d20, d21}, [r5:128]
+; CHECK-FP16-NEXT:    vmov r5, r4, d17
+; CHECK-FP16-NEXT:    vmov r3, r7, d18
+; CHECK-FP16-NEXT:    rsbs r0, r0, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r6, r1
+; CHECK-FP16-NEXT:    mov r0, #0
+; CHECK-FP16-NEXT:    movwlt r0, #1
+; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    mvnne r0, #0
+; CHECK-FP16-NEXT:    rsbs r1, r3, #-2147483648
+; CHECK-FP16-NEXT:    vmov r1, r3, d19
+; CHECK-FP16-NEXT:    sbcs r7, r6, r7
 ; CHECK-FP16-NEXT:    mov r7, #0
 ; CHECK-FP16-NEXT:    movwlt r7, #1
+; CHECK-FP16-NEXT:    rsbs r5, r5, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r5, r6, r4
+; CHECK-FP16-NEXT:    mov r5, #0
+; CHECK-FP16-NEXT:    movwlt r5, #1
+; CHECK-FP16-NEXT:    rsbs r1, r1, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r1, r6, r3
+; CHECK-FP16-NEXT:    movwlt r2, #1
+; CHECK-FP16-NEXT:    cmp r2, #0
+; CHECK-FP16-NEXT:    mvnne r2, #0
+; CHECK-FP16-NEXT:    cmp r5, #0
+; CHECK-FP16-NEXT:    mvnne r5, #0
 ; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    vdup.32 d25, r5
 ; CHECK-FP16-NEXT:    mvnne r7, #0
-; CHECK-FP16-NEXT:    vdup.32 d23, r7
-; CHECK-FP16-NEXT:    vdup.32 d22, r3
-; CHECK-FP16-NEXT:    vbsl q11, q4, q9
-; CHECK-FP16-NEXT:    vmovn.i64 d1, q11
-; CHECK-FP16-NEXT:    rsbs r1, r1, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r0, r2
-; CHECK-FP16-NEXT:    mov r1, #0
-; CHECK-FP16-NEXT:    movwlt r1, #1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mvnne r1, #0
-; CHECK-FP16-NEXT:    rsbs r2, r5, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r0, r0, r4
-; CHECK-FP16-NEXT:    vdup.32 d21, r1
-; CHECK-FP16-NEXT:    movwlt r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    mvnne r6, #0
-; CHECK-FP16-NEXT:    vdup.32 d20, r6
-; CHECK-FP16-NEXT:    vbif q8, q9, q10
-; CHECK-FP16-NEXT:    vmovn.i64 d0, q8
-; CHECK-FP16-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-FP16-NEXT:    vdup.32 d23, r2
+; CHECK-FP16-NEXT:    vdup.32 d24, r0
+; CHECK-FP16-NEXT:    vbif q8, q10, q12
+; CHECK-FP16-NEXT:    vdup.32 d22, r7
+; CHECK-FP16-NEXT:    vbif q9, q10, q11
+; CHECK-FP16-NEXT:    vmovn.i64 d1, q8
+; CHECK-FP16-NEXT:    vmovn.i64 d0, q9
+; CHECK-FP16-NEXT:    vpop {d8}
+; CHECK-FP16-NEXT:    vpop {d10, d11, d12, d13}
 ; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ; CHECK-FP16-NEXT:    .p2align 4
 ; CHECK-FP16-NEXT:  @ %bb.1:
@@ -751,8 +766,8 @@ entry:
 define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEON-LABEL: utesth_f16i32:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEON-NEXT:    .vsave {d12, d13}
 ; CHECK-NEON-NEXT:    vpush {d12, d13}
 ; CHECK-NEON-NEXT:    .vsave {d8, d9, d10}
@@ -763,7 +778,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    vmov.f32 s20, s0
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEON-NEXT:    mov r4, r0
+; CHECK-NEON-NEXT:    mov r10, r0
 ; CHECK-NEON-NEXT:    vmov r0, s18
 ; CHECK-NEON-NEXT:    mov r8, r1
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
@@ -779,37 +794,37 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    vmov r0, s16
 ; CHECK-NEON-NEXT:    mov r7, r1
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
-; CHECK-NEON-NEXT:    vmov.32 d9[0], r4
+; CHECK-NEON-NEXT:    vmov.32 d9[0], r10
 ; CHECK-NEON-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEON-NEXT:    mvn r3, #0
+; CHECK-NEON-NEXT:    mvn r4, #0
+; CHECK-NEON-NEXT:    subs r2, r5, r4
+; CHECK-NEON-NEXT:    sbcs r2, r7, #0
 ; CHECK-NEON-NEXT:    vmov.32 d8[0], r0
-; CHECK-NEON-NEXT:    subs r0, r0, r3
 ; CHECK-NEON-NEXT:    mov r2, #0
+; CHECK-NEON-NEXT:    mov r3, #0
+; CHECK-NEON-NEXT:    movwlo r2, #1
+; CHECK-NEON-NEXT:    subs r0, r0, r4
 ; CHECK-NEON-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEON-NEXT:    mov r0, #0
 ; CHECK-NEON-NEXT:    movwlo r0, #1
 ; CHECK-NEON-NEXT:    cmp r0, #0
 ; CHECK-NEON-NEXT:    mvnne r0, #0
-; CHECK-NEON-NEXT:    subs r1, r4, r3
+; CHECK-NEON-NEXT:    subs r1, r10, r4
 ; CHECK-NEON-NEXT:    sbcs r1, r8, #0
 ; CHECK-NEON-NEXT:    mov r1, #0
 ; CHECK-NEON-NEXT:    movwlo r1, #1
+; CHECK-NEON-NEXT:    subs r7, r6, r4
+; CHECK-NEON-NEXT:    sbcs r7, r9, #0
+; CHECK-NEON-NEXT:    movwlo r3, #1
+; CHECK-NEON-NEXT:    cmp r3, #0
+; CHECK-NEON-NEXT:    mvnne r3, #0
 ; CHECK-NEON-NEXT:    cmp r1, #0
 ; CHECK-NEON-NEXT:    mvnne r1, #0
-; CHECK-NEON-NEXT:    subs r6, r6, r3
-; CHECK-NEON-NEXT:    sbcs r6, r9, #0
-; CHECK-NEON-NEXT:    vdup.32 d19, r1
-; CHECK-NEON-NEXT:    mov r6, #0
-; CHECK-NEON-NEXT:    vdup.32 d18, r0
-; CHECK-NEON-NEXT:    movwlo r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    mvnne r6, #0
-; CHECK-NEON-NEXT:    subs r3, r5, r3
-; CHECK-NEON-NEXT:    sbcs r3, r7, #0
-; CHECK-NEON-NEXT:    vdup.32 d17, r6
-; CHECK-NEON-NEXT:    movwlo r2, #1
 ; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    vdup.32 d19, r1
 ; CHECK-NEON-NEXT:    mvnne r2, #0
+; CHECK-NEON-NEXT:    vdup.32 d17, r3
+; CHECK-NEON-NEXT:    vdup.32 d18, r0
 ; CHECK-NEON-NEXT:    vand q10, q4, q9
 ; CHECK-NEON-NEXT:    vdup.32 d16, r2
 ; CHECK-NEON-NEXT:    vand q11, q6, q8
@@ -819,12 +834,12 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    vmovn.i64 d0, q8
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10}
 ; CHECK-NEON-NEXT:    vpop {d12, d13}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
 ; CHECK-FP16-LABEL: utesth_f16i32:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-FP16-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-FP16-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[1]
@@ -832,52 +847,52 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-FP16-NEXT:    vmov.u16 r5, d0[3]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixunshfdi
-; CHECK-FP16-NEXT:    mov r4, r0
+; CHECK-FP16-NEXT:    mov r10, r0
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d8[0]
 ; CHECK-FP16-NEXT:    mov r8, r1
-; CHECK-FP16-NEXT:    vmov.32 d11[0], r4
+; CHECK-FP16-NEXT:    vmov.32 d11[0], r10
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixunshfdi
 ; CHECK-FP16-NEXT:    vmov s0, r5
 ; CHECK-FP16-NEXT:    mov r6, r0
-; CHECK-FP16-NEXT:    mov r9, r1
+; CHECK-FP16-NEXT:    mov r7, r1
 ; CHECK-FP16-NEXT:    vmov.32 d10[0], r0
 ; CHECK-FP16-NEXT:    bl __fixunshfdi
 ; CHECK-FP16-NEXT:    mov r5, r0
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d8[2]
-; CHECK-FP16-NEXT:    mov r7, r1
+; CHECK-FP16-NEXT:    mov r9, r1
 ; CHECK-FP16-NEXT:    vmov.32 d9[0], r5
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixunshfdi
-; CHECK-FP16-NEXT:    mvn r3, #0
+; CHECK-FP16-NEXT:    mvn r4, #0
+; CHECK-FP16-NEXT:    subs r2, r6, r4
+; CHECK-FP16-NEXT:    sbcs r2, r7, #0
 ; CHECK-FP16-NEXT:    vmov.32 d8[0], r0
-; CHECK-FP16-NEXT:    subs r0, r0, r3
 ; CHECK-FP16-NEXT:    mov r2, #0
+; CHECK-FP16-NEXT:    mov r3, #0
+; CHECK-FP16-NEXT:    movwlo r2, #1
+; CHECK-FP16-NEXT:    subs r0, r0, r4
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
 ; CHECK-FP16-NEXT:    mov r0, #0
 ; CHECK-FP16-NEXT:    movwlo r0, #1
 ; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    mvnne r0, #0
-; CHECK-FP16-NEXT:    subs r1, r5, r3
-; CHECK-FP16-NEXT:    sbcs r1, r7, #0
+; CHECK-FP16-NEXT:    subs r1, r5, r4
+; CHECK-FP16-NEXT:    sbcs r1, r9, #0
 ; CHECK-FP16-NEXT:    mov r1, #0
 ; CHECK-FP16-NEXT:    movwlo r1, #1
+; CHECK-FP16-NEXT:    subs r7, r10, r4
+; CHECK-FP16-NEXT:    sbcs r7, r8, #0
+; CHECK-FP16-NEXT:    movwlo r3, #1
+; CHECK-FP16-NEXT:    cmp r3, #0
+; CHECK-FP16-NEXT:    mvnne r3, #0
 ; CHECK-FP16-NEXT:    cmp r1, #0
 ; CHECK-FP16-NEXT:    mvnne r1, #0
-; CHECK-FP16-NEXT:    subs r7, r4, r3
-; CHECK-FP16-NEXT:    sbcs r7, r8, #0
-; CHECK-FP16-NEXT:    vdup.32 d19, r1
-; CHECK-FP16-NEXT:    mov r7, #0
-; CHECK-FP16-NEXT:    vdup.32 d18, r0
-; CHECK-FP16-NEXT:    movwlo r7, #1
-; CHECK-FP16-NEXT:    cmp r7, #0
-; CHECK-FP16-NEXT:    mvnne r7, #0
-; CHECK-FP16-NEXT:    subs r3, r6, r3
-; CHECK-FP16-NEXT:    sbcs r3, r9, #0
-; CHECK-FP16-NEXT:    vdup.32 d17, r7
-; CHECK-FP16-NEXT:    movwlo r2, #1
 ; CHECK-FP16-NEXT:    cmp r2, #0
+; CHECK-FP16-NEXT:    vdup.32 d19, r1
 ; CHECK-FP16-NEXT:    mvnne r2, #0
+; CHECK-FP16-NEXT:    vdup.32 d17, r3
+; CHECK-FP16-NEXT:    vdup.32 d18, r0
 ; CHECK-FP16-NEXT:    vand q10, q4, q9
 ; CHECK-FP16-NEXT:    vdup.32 d16, r2
 ; CHECK-FP16-NEXT:    vand q11, q5, q8
@@ -886,7 +901,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-FP16-NEXT:    vmovn.i64 d1, q9
 ; CHECK-FP16-NEXT:    vmovn.i64 d0, q8
 ; CHECK-FP16-NEXT:    vpop {d8, d9, d10, d11}
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
   %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -902,207 +917,211 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEON-NEXT:    vmov r0, s3
-; CHECK-NEON-NEXT:    vmov.f32 s16, s2
-; CHECK-NEON-NEXT:    vmov.f32 s18, s1
+; CHECK-NEON-NEXT:    vmov r0, s1
+; CHECK-NEON-NEXT:    vmov.f32 s16, s3
+; CHECK-NEON-NEXT:    vmov.f32 s18, s2
 ; CHECK-NEON-NEXT:    vmov.f32 s20, s0
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    mov r5, r0
-; CHECK-NEON-NEXT:    vmov r0, s16
-; CHECK-NEON-NEXT:    mov r6, r1
+; CHECK-NEON-NEXT:    mov r6, r0
+; CHECK-NEON-NEXT:    vmov r0, s20
+; CHECK-NEON-NEXT:    mov r7, r1
+; CHECK-NEON-NEXT:    vmov r5, s18
+; CHECK-NEON-NEXT:    vmov r8, s16
+; CHECK-NEON-NEXT:    vmov.32 d9[0], r6
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
-; CHECK-NEON-NEXT:    vmov r8, s20
-; CHECK-NEON-NEXT:    vmov.32 d13[0], r5
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    vmov r2, s18
-; CHECK-NEON-NEXT:    vmov.32 d12[0], r0
+; CHECK-NEON-NEXT:    vmov.32 d8[0], r0
 ; CHECK-NEON-NEXT:    mvn r9, #0
 ; CHECK-NEON-NEXT:    subs r0, r0, r9
+; CHECK-NEON-NEXT:    mov r4, #0
 ; CHECK-NEON-NEXT:    sbcs r0, r1, #0
-; CHECK-NEON-NEXT:    vmov.32 d13[1], r6
+; CHECK-NEON-NEXT:    vmov.32 d9[1], r7
 ; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    mov r7, #0
 ; CHECK-NEON-NEXT:    movwlt r0, #1
 ; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    vmov.32 d12[1], r1
+; CHECK-NEON-NEXT:    vmov.32 d8[1], r1
 ; CHECK-NEON-NEXT:    mvnne r0, #0
-; CHECK-NEON-NEXT:    subs r1, r5, r9
-; CHECK-NEON-NEXT:    sbcs r1, r6, #0
+; CHECK-NEON-NEXT:    subs r1, r6, r9
+; CHECK-NEON-NEXT:    sbcs r1, r7, #0
 ; CHECK-NEON-NEXT:    mov r1, #0
 ; CHECK-NEON-NEXT:    movwlt r1, #1
 ; CHECK-NEON-NEXT:    cmp r1, #0
 ; CHECK-NEON-NEXT:    mvnne r1, #0
-; CHECK-NEON-NEXT:    vdup.32 d9, r1
-; CHECK-NEON-NEXT:    vdup.32 d8, r0
-; CHECK-NEON-NEXT:    mov r0, r2
+; CHECK-NEON-NEXT:    vdup.32 d13, r1
+; CHECK-NEON-NEXT:    vdup.32 d12, r0
+; CHECK-NEON-NEXT:    mov r0, r5
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov.i64 q5, #0xffffffff
-; CHECK-NEON-NEXT:    vbsl q4, q6, q5
+; CHECK-NEON-NEXT:    vbif q4, q5, q6
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    mov r5, r0
-; CHECK-NEON-NEXT:    vmov.32 d13[0], r0
+; CHECK-NEON-NEXT:    vmov.32 d12[0], r0
 ; CHECK-NEON-NEXT:    mov r0, r8
 ; CHECK-NEON-NEXT:    mov r6, r1
-; CHECK-NEON-NEXT:    vmov r4, r10, d8
+; CHECK-NEON-NEXT:    vmov r7, r10, d8
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    subs r2, r5, r9
-; CHECK-NEON-NEXT:    vmov.32 d12[0], r0
+; CHECK-NEON-NEXT:    vmov.32 d13[0], r0
 ; CHECK-NEON-NEXT:    sbcs r2, r6, #0
 ; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    vmov.32 d13[1], r6
 ; CHECK-NEON-NEXT:    movwlt r2, #1
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    mvnne r2, #0
 ; CHECK-NEON-NEXT:    subs r0, r0, r9
 ; CHECK-NEON-NEXT:    sbcs r0, r1, #0
-; CHECK-NEON-NEXT:    vdup.32 d17, r2
+; CHECK-NEON-NEXT:    vmov.32 d13[1], r1
 ; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    vmov.32 d12[1], r1
 ; CHECK-NEON-NEXT:    movwlt r0, #1
 ; CHECK-NEON-NEXT:    cmp r0, #0
 ; CHECK-NEON-NEXT:    mvnne r0, #0
-; CHECK-NEON-NEXT:    vmov r2, r3, d9
-; CHECK-NEON-NEXT:    vdup.32 d16, r0
-; CHECK-NEON-NEXT:    rsbs r6, r4, #0
+; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    vmov.32 d12[1], r6
+; CHECK-NEON-NEXT:    mvnne r2, #0
+; CHECK-NEON-NEXT:    vdup.32 d17, r0
+; CHECK-NEON-NEXT:    rsbs r0, r7, #0
+; CHECK-NEON-NEXT:    vdup.32 d16, r2
+; CHECK-NEON-NEXT:    vmov r7, r5, d9
 ; CHECK-NEON-NEXT:    vbsl q8, q6, q5
-; CHECK-NEON-NEXT:    rscs r6, r10, #0
-; CHECK-NEON-NEXT:    mov r6, #0
-; CHECK-NEON-NEXT:    movwlt r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    vmov r0, r1, d17
-; CHECK-NEON-NEXT:    mvnne r6, #0
-; CHECK-NEON-NEXT:    vmov r5, r4, d16
-; CHECK-NEON-NEXT:    rsbs r0, r0, #0
-; CHECK-NEON-NEXT:    rscs r0, r1, #0
+; CHECK-NEON-NEXT:    rscs r0, r10, #0
 ; CHECK-NEON-NEXT:    mov r0, #0
 ; CHECK-NEON-NEXT:    movwlt r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    mvnne r0, #0
-; CHECK-NEON-NEXT:    rsbs r1, r2, #0
-; CHECK-NEON-NEXT:    rscs r1, r3, #0
-; CHECK-NEON-NEXT:    vmov.32 d19[0], r0
+; CHECK-NEON-NEXT:    vmov r1, r2, d16
+; CHECK-NEON-NEXT:    vmov r3, r6, d17
+; CHECK-NEON-NEXT:    rsbs r1, r1, #0
+; CHECK-NEON-NEXT:    rscs r1, r2, #0
 ; CHECK-NEON-NEXT:    mov r1, #0
 ; CHECK-NEON-NEXT:    movwlt r1, #1
+; CHECK-NEON-NEXT:    rsbs r2, r3, #0
+; CHECK-NEON-NEXT:    rscs r2, r6, #0
+; CHECK-NEON-NEXT:    mov r2, #0
+; CHECK-NEON-NEXT:    movwlt r2, #1
+; CHECK-NEON-NEXT:    rsbs r3, r7, #0
+; CHECK-NEON-NEXT:    rscs r3, r5, #0
+; CHECK-NEON-NEXT:    movwlt r4, #1
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    mvnne r4, #0
+; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    mvnne r2, #0
 ; CHECK-NEON-NEXT:    cmp r1, #0
 ; CHECK-NEON-NEXT:    mvnne r1, #0
-; CHECK-NEON-NEXT:    rsbs r0, r5, #0
-; CHECK-NEON-NEXT:    rscs r0, r4, #0
-; CHECK-NEON-NEXT:    vmov.32 d21[0], r1
-; CHECK-NEON-NEXT:    movwlt r7, #1
-; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    vmov.32 d20[0], r6
-; CHECK-NEON-NEXT:    mvnne r7, #0
-; CHECK-NEON-NEXT:    vmov.32 d18[0], r7
-; CHECK-NEON-NEXT:    vand q10, q4, q10
-; CHECK-NEON-NEXT:    vand q8, q8, q9
-; CHECK-NEON-NEXT:    vmovn.i64 d1, q10
-; CHECK-NEON-NEXT:    vmovn.i64 d0, q8
+; CHECK-NEON-NEXT:    vmov.32 d21[0], r2
+; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    vmov.32 d20[0], r1
+; CHECK-NEON-NEXT:    mvnne r0, #0
+; CHECK-NEON-NEXT:    vmov.32 d19[0], r4
+; CHECK-NEON-NEXT:    vand q8, q8, q10
+; CHECK-NEON-NEXT:    vmov.32 d18[0], r0
+; CHECK-NEON-NEXT:    vmovn.i64 d1, q8
+; CHECK-NEON-NEXT:    vand q9, q4, q9
+; CHECK-NEON-NEXT:    vmovn.i64 d0, q9
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
 ; CHECK-FP16-LABEL: ustest_f16i32:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT:    vmov.u16 r0, d0[3]
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT:    .vsave {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT:    vpush {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT:    .vsave {d8}
+; CHECK-FP16-NEXT:    vpush {d8}
+; CHECK-FP16-NEXT:    vmov.u16 r0, d0[1]
 ; CHECK-FP16-NEXT:    vorr d8, d0, d0
-; CHECK-FP16-NEXT:    vmov.u16 r8, d0[0]
-; CHECK-FP16-NEXT:    vmov.u16 r9, d0[1]
+; CHECK-FP16-NEXT:    vmov.u16 r8, d0[2]
+; CHECK-FP16-NEXT:    vmov.u16 r9, d0[3]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
 ; CHECK-FP16-NEXT:    mov r4, r0
-; CHECK-FP16-NEXT:    vmov.u16 r0, d8[2]
+; CHECK-FP16-NEXT:    vmov.u16 r0, d8[0]
 ; CHECK-FP16-NEXT:    mov r5, r1
-; CHECK-FP16-NEXT:    vmov.32 d9[0], r4
+; CHECK-FP16-NEXT:    vmov.32 d11[0], r4
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    mvn r10, #0
-; CHECK-FP16-NEXT:    subs r2, r4, r10
-; CHECK-FP16-NEXT:    sbcs r2, r5, #0
-; CHECK-FP16-NEXT:    vmov.32 d8[0], r0
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    vmov s0, r9
-; CHECK-FP16-NEXT:    movwlt r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    mvnne r2, #0
-; CHECK-FP16-NEXT:    subs r0, r0, r10
+; CHECK-FP16-NEXT:    vmov.32 d10[0], r0
+; CHECK-FP16-NEXT:    mvn r7, #0
+; CHECK-FP16-NEXT:    subs r0, r0, r7
+; CHECK-FP16-NEXT:    vmov.i64 q6, #0xffffffff
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    vmov.32 d9[1], r5
+; CHECK-FP16-NEXT:    vmov.32 d11[1], r5
 ; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    vmov.i64 q5, #0xffffffff
+; CHECK-FP16-NEXT:    vmov s0, r8
 ; CHECK-FP16-NEXT:    movwlt r0, #1
 ; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    vmov.32 d8[1], r1
+; CHECK-FP16-NEXT:    vmov.32 d10[1], r1
 ; CHECK-FP16-NEXT:    mvnne r0, #0
+; CHECK-FP16-NEXT:    subs r1, r4, r7
 ; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    vdup.32 d17, r2
+; CHECK-FP16-NEXT:    sbcs r1, r5, #0
+; CHECK-FP16-NEXT:    vmov s16, r9
+; CHECK-FP16-NEXT:    mov r1, #0
+; CHECK-FP16-NEXT:    movwlt r1, #1
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    mvnne r1, #0
+; CHECK-FP16-NEXT:    vdup.32 d17, r1
 ; CHECK-FP16-NEXT:    vdup.32 d16, r0
-; CHECK-FP16-NEXT:    vbif q4, q5, q8
+; CHECK-FP16-NEXT:    vbif q5, q6, q8
+; CHECK-FP16-NEXT:    vmov r9, r8, d10
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    vmov s0, r8
+; CHECK-FP16-NEXT:    vmov.f32 s0, s16
 ; CHECK-FP16-NEXT:    mov r4, r0
 ; CHECK-FP16-NEXT:    mov r5, r1
-; CHECK-FP16-NEXT:    vmov.32 d13[0], r0
-; CHECK-FP16-NEXT:    vmov r7, r8, d8
+; CHECK-FP16-NEXT:    vmov.32 d14[0], r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    subs r2, r4, r10
-; CHECK-FP16-NEXT:    vmov.32 d12[0], r0
+; CHECK-FP16-NEXT:    subs r2, r4, r7
+; CHECK-FP16-NEXT:    vmov.32 d15[0], r0
 ; CHECK-FP16-NEXT:    sbcs r2, r5, #0
 ; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    vmov.32 d13[1], r5
 ; CHECK-FP16-NEXT:    movwlt r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    mvnne r2, #0
-; CHECK-FP16-NEXT:    subs r0, r0, r10
+; CHECK-FP16-NEXT:    subs r0, r0, r7
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    vdup.32 d17, r2
+; CHECK-FP16-NEXT:    vmov.32 d15[1], r1
 ; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    vmov.32 d12[1], r1
 ; CHECK-FP16-NEXT:    movwlt r0, #1
 ; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    mvnne r0, #0
-; CHECK-FP16-NEXT:    vmov r2, r3, d9
-; CHECK-FP16-NEXT:    vdup.32 d16, r0
-; CHECK-FP16-NEXT:    rsbs r7, r7, #0
-; CHECK-FP16-NEXT:    vbsl q8, q6, q5
-; CHECK-FP16-NEXT:    rscs r7, r8, #0
-; CHECK-FP16-NEXT:    mov r7, #0
-; CHECK-FP16-NEXT:    movwlt r7, #1
-; CHECK-FP16-NEXT:    cmp r7, #0
-; CHECK-FP16-NEXT:    vmov r0, r1, d17
-; CHECK-FP16-NEXT:    mvnne r7, #0
-; CHECK-FP16-NEXT:    vmov r5, r4, d16
-; CHECK-FP16-NEXT:    rsbs r0, r0, #0
-; CHECK-FP16-NEXT:    rscs r0, r1, #0
+; CHECK-FP16-NEXT:    cmp r2, #0
+; CHECK-FP16-NEXT:    vmov.32 d14[1], r5
+; CHECK-FP16-NEXT:    mvnne r2, #0
+; CHECK-FP16-NEXT:    vmov r5, r4, d11
+; CHECK-FP16-NEXT:    vdup.32 d17, r0
+; CHECK-FP16-NEXT:    rsbs r0, r9, #0
+; CHECK-FP16-NEXT:    vdup.32 d16, r2
+; CHECK-FP16-NEXT:    rscs r0, r8, #0
+; CHECK-FP16-NEXT:    vbsl q8, q7, q6
 ; CHECK-FP16-NEXT:    mov r0, #0
 ; CHECK-FP16-NEXT:    movwlt r0, #1
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    mvnne r0, #0
-; CHECK-FP16-NEXT:    rsbs r1, r2, #0
-; CHECK-FP16-NEXT:    rscs r1, r3, #0
-; CHECK-FP16-NEXT:    vmov.32 d19[0], r0
+; CHECK-FP16-NEXT:    vmov r1, r2, d16
+; CHECK-FP16-NEXT:    vmov r3, r7, d17
+; CHECK-FP16-NEXT:    rsbs r1, r1, #0
+; CHECK-FP16-NEXT:    rscs r1, r2, #0
 ; CHECK-FP16-NEXT:    mov r1, #0
 ; CHECK-FP16-NEXT:    movwlt r1, #1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mvnne r1, #0
-; CHECK-FP16-NEXT:    rsbs r0, r5, #0
-; CHECK-FP16-NEXT:    rscs r0, r4, #0
-; CHECK-FP16-NEXT:    vmov.32 d21[0], r1
+; CHECK-FP16-NEXT:    rsbs r2, r3, #0
+; CHECK-FP16-NEXT:    rscs r2, r7, #0
+; CHECK-FP16-NEXT:    mov r2, #0
+; CHECK-FP16-NEXT:    movwlt r2, #1
+; CHECK-FP16-NEXT:    rsbs r3, r5, #0
+; CHECK-FP16-NEXT:    rscs r3, r4, #0
 ; CHECK-FP16-NEXT:    movwlt r6, #1
 ; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    vmov.32 d20[0], r7
 ; CHECK-FP16-NEXT:    mvnne r6, #0
-; CHECK-FP16-NEXT:    vmov.32 d18[0], r6
-; CHECK-FP16-NEXT:    vand q10, q4, q10
-; CHECK-FP16-NEXT:    vand q8, q8, q9
-; CHECK-FP16-NEXT:    vmovn.i64 d1, q10
-; CHECK-FP16-NEXT:    vmovn.i64 d0, q8
-; CHECK-FP16-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT:    cmp r2, #0
+; CHECK-FP16-NEXT:    mvnne r2, #0
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    mvnne r1, #0
+; CHECK-FP16-NEXT:    vmov.32 d21[0], r2
+; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    vmov.32 d20[0], r1
+; CHECK-FP16-NEXT:    mvnne r0, #0
+; CHECK-FP16-NEXT:    vmov.32 d19[0], r6
+; CHECK-FP16-NEXT:    vand q8, q8, q10
+; CHECK-FP16-NEXT:    vmov.32 d18[0], r0
+; CHECK-FP16-NEXT:    vmovn.i64 d1, q8
+; CHECK-FP16-NEXT:    vand q9, q5, q9
+; CHECK-FP16-NEXT:    vmovn.i64 d0, q9
+; CHECK-FP16-NEXT:    vpop {d8}
+; CHECK-FP16-NEXT:    vpop {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -1624,56 +1643,59 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mvn r9, #0
-; CHECK-NEXT:    subs r1, r0, r9
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r4, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mvn r8, #0
+; CHECK-NEXT:    subs r0, r0, r8
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    sbcs r0, r1, r6
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    sbcs r0, r2, #0
 ; CHECK-NEXT:    vorr d0, d8, d8
-; CHECK-NEXT:    sbcs r1, r2, #0
+; CHECK-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r8, #-2147483648
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    mov r10, #0
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r3, r1
-; CHECK-NEXT:    movne r1, r2
-; CHECK-NEXT:    moveq r4, r5
-; CHECK-NEXT:    moveq r0, r9
-; CHECK-NEXT:    rsbs r2, r0, #0
-; CHECK-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r1
-; CHECK-NEXT:    sbcs r1, r9, r3
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq r3, r0
+; CHECK-NEXT:    movne r0, r2
+; CHECK-NEXT:    moveq r10, r6
+; CHECK-NEXT:    moveq r5, r8
+; CHECK-NEXT:    rsbs r1, r5, #0
+; CHECK-NEXT:    rscs r1, r10, #-2147483648
+; CHECK-NEXT:    sbcs r0, r8, r0
+; CHECK-NEXT:    sbcs r0, r8, r3
 ; CHECK-NEXT:    movwlt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    movne r7, r0
-; CHECK-NEXT:    moveq r4, r8
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    subs r6, r0, r9
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    sbcs r6, r1, r5
-; CHECK-NEXT:    sbcs r6, r2, #0
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r3, r6
-; CHECK-NEXT:    movne r6, r2
-; CHECK-NEXT:    movne r5, r1
-; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    subs r4, r0, r8
+; CHECK-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEXT:    sbcs r4, r1, r6
+; CHECK-NEXT:    sbcs r4, r2, #0
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    movne r4, r2
+; CHECK-NEXT:    moveq r0, r8
 ; CHECK-NEXT:    rsbs r1, r0, #0
-; CHECK-NEXT:    rscs r1, r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r6
-; CHECK-NEXT:    sbcs r1, r9, r3
-; CHECK-NEXT:    movwlt r10, #1
-; CHECK-NEXT:    cmp r10, #0
-; CHECK-NEXT:    movne r10, r0
-; CHECK-NEXT:    moveq r5, r8
-; CHECK-NEXT:    vmov.32 d0[0], r10
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    rscs r1, r6, #-2147483648
+; CHECK-NEXT:    sbcs r1, r8, r4
+; CHECK-NEXT:    sbcs r1, r8, r3
+; CHECK-NEXT:    movwlt r9, #1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    mov r1, #-2147483648
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEXT:    moveq r10, r1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    vmov.32 d1[1], r10
+; CHECK-NEXT:    moveq r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
@@ -1689,36 +1711,39 @@ entry:
 define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    subs r0, r2, #1
 ; CHECK-NEXT:    vorr d0, d8, d8
-; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    sbcs r0, r3, #0
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    movwlo r7, #1
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movwlo r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r4, r6
-; CHECK-NEXT:    movne r6, r0
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    subs r2, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    vmov.32 d1[0], r5
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    movwlo r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    moveq r0, r5
-; CHECK-NEXT:    movne r5, r1
+; CHECK-NEXT:    movwlo r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r6
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r4
 ; CHECK-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vmov.32 d1[1], r7
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 entry:
   %conv = fptoui <2 x double> %x to <2 x i128>
   %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1730,26 +1755,26 @@ entry:
 define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r8, r1
 ; CHECK-NEXT:    subs r1, r2, #1
 ; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r8, #1
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    movge r2, r8
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r3, r1
-; CHECK-NEXT:    moveq r4, r1
-; CHECK-NEXT:    movne r1, r0
-; CHECK-NEXT:    rsbs r0, r1, #0
-; CHECK-NEXT:    rscs r0, r4, #0
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    movwlt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mov r9, #1
+; CHECK-NEXT:    moveq r3, r6
+; CHECK-NEXT:    moveq r8, r6
+; CHECK-NEXT:    moveq r2, r9
+; CHECK-NEXT:    movne r6, r0
+; CHECK-NEXT:    rsbs r0, r6, #0
+; CHECK-NEXT:    rscs r0, r8, #0
 ; CHECK-NEXT:    vorr d0, d8, d8
 ; CHECK-NEXT:    rscs r0, r2, #0
 ; CHECK-NEXT:    mov r7, #0
@@ -1757,32 +1782,34 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    movwlt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    moveq r4, r7
-; CHECK-NEXT:    movne r7, r1
+; CHECK-NEXT:    moveq r6, r7
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    subs r6, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    movlt r8, r2
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    moveq r3, r2
-; CHECK-NEXT:    moveq r1, r2
-; CHECK-NEXT:    movne r2, r0
-; CHECK-NEXT:    rsbs r0, r2, #0
+; CHECK-NEXT:    subs r4, r2, #1
+; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movne r9, r2
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    moveq r1, r4
+; CHECK-NEXT:    movne r4, r0
+; CHECK-NEXT:    rsbs r0, r4, #0
 ; CHECK-NEXT:    rscs r0, r1, #0
-; CHECK-NEXT:    rscs r0, r8, #0
+; CHECK-NEXT:    rscs r0, r9, #0
 ; CHECK-NEXT:    rscs r0, r3, #0
 ; CHECK-NEXT:    movwlt r5, #1
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    moveq r2, r5
+; CHECK-NEXT:    moveq r4, r5
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r8
+; CHECK-NEXT:    vmov.32 d0[0], r4
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    vmov.32 d1[1], r7
 ; CHECK-NEXT:    movne r5, r1
-; CHECK-NEXT:    vmov.32 d0[0], r2
-; CHECK-NEXT:    vmov.32 d1[1], r4
 ; CHECK-NEXT:    vmov.32 d0[1], r5
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1803,56 +1830,59 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    vmov.f64 d8, d0
 ; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mvn r9, #0
-; CHECK-NEXT:    subs r1, r0, r9
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r4, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mvn r8, #0
+; CHECK-NEXT:    subs r0, r0, r8
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    sbcs r0, r1, r6
 ; CHECK-NEXT:    vmov.f32 s0, s16
-; CHECK-NEXT:    sbcs r1, r2, #0
+; CHECK-NEXT:    sbcs r0, r2, #0
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r8, #-2147483648
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    mov r10, #0
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r3, r1
-; CHECK-NEXT:    movne r1, r2
-; CHECK-NEXT:    moveq r4, r5
-; CHECK-NEXT:    moveq r0, r9
-; CHECK-NEXT:    rsbs r2, r0, #0
-; CHECK-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r1
-; CHECK-NEXT:    sbcs r1, r9, r3
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq r3, r0
+; CHECK-NEXT:    movne r0, r2
+; CHECK-NEXT:    moveq r10, r6
+; CHECK-NEXT:    moveq r5, r8
+; CHECK-NEXT:    rsbs r1, r5, #0
+; CHECK-NEXT:    rscs r1, r10, #-2147483648
+; CHECK-NEXT:    sbcs r0, r8, r0
+; CHECK-NEXT:    sbcs r0, r8, r3
 ; CHECK-NEXT:    movwlt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    movne r7, r0
-; CHECK-NEXT:    moveq r4, r8
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs r6, r0, r9
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    sbcs r6, r1, r5
-; CHECK-NEXT:    sbcs r6, r2, #0
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r3, r6
-; CHECK-NEXT:    movne r6, r2
-; CHECK-NEXT:    movne r5, r1
-; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    subs r4, r0, r8
+; CHECK-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEXT:    sbcs r4, r1, r6
+; CHECK-NEXT:    sbcs r4, r2, #0
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    movne r4, r2
+; CHECK-NEXT:    moveq r0, r8
 ; CHECK-NEXT:    rsbs r1, r0, #0
-; CHECK-NEXT:    rscs r1, r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r6
-; CHECK-NEXT:    sbcs r1, r9, r3
-; CHECK-NEXT:    movwlt r10, #1
-; CHECK-NEXT:    cmp r10, #0
-; CHECK-NEXT:    movne r10, r0
-; CHECK-NEXT:    moveq r5, r8
-; CHECK-NEXT:    vmov.32 d0[0], r10
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    rscs r1, r6, #-2147483648
+; CHECK-NEXT:    sbcs r1, r8, r4
+; CHECK-NEXT:    sbcs r1, r8, r3
+; CHECK-NEXT:    movwlt r9, #1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    mov r1, #-2147483648
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEXT:    moveq r10, r1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    vmov.32 d1[1], r10
+; CHECK-NEXT:    moveq r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
@@ -1868,36 +1898,39 @@ entry:
 define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: utest_f32i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    .vsave {d8}
 ; CHECK-NEXT:    vpush {d8}
 ; CHECK-NEXT:    vmov.f64 d8, d0
 ; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vmov.f32 s0, s16
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    subs r0, r2, #1
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    movwlo r7, #1
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movwlo r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r4, r6
-; CHECK-NEXT:    movne r6, r0
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    subs r2, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    vmov.32 d1[0], r5
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    movwlo r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    moveq r0, r5
-; CHECK-NEXT:    movne r5, r1
+; CHECK-NEXT:    movwlo r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r6
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r4
 ; CHECK-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vmov.32 d1[1], r7
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 entry:
   %conv = fptoui <2 x float> %x to <2 x i128>
   %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1909,59 +1942,61 @@ entry:
 define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: ustest_f32i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-NEXT:    .vsave {d8}
 ; CHECK-NEXT:    vpush {d8}
 ; CHECK-NEXT:    vmov.f64 d8, d0
 ; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r8, r1
 ; CHECK-NEXT:    subs r1, r2, #1
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r8, #1
-; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    vmov.f32 s0, s16
-; CHECK-NEXT:    movge r2, r8
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    mov r9, #1
+; CHECK-NEXT:    movwlt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r3, r6
+; CHECK-NEXT:    moveq r8, r6
+; CHECK-NEXT:    moveq r2, r9
+; CHECK-NEXT:    movne r6, r0
+; CHECK-NEXT:    rsbs r0, r6, #0
 ; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    moveq r3, r1
-; CHECK-NEXT:    moveq r4, r1
-; CHECK-NEXT:    movne r1, r0
-; CHECK-NEXT:    rsbs r0, r1, #0
-; CHECK-NEXT:    rscs r0, r4, #0
+; CHECK-NEXT:    rscs r0, r8, #0
 ; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    rscs r0, r2, #0
 ; CHECK-NEXT:    rscs r0, r3, #0
 ; CHECK-NEXT:    movwlt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    moveq r4, r7
-; CHECK-NEXT:    movne r7, r1
+; CHECK-NEXT:    moveq r6, r7
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs r6, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    movlt r8, r2
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    moveq r3, r2
-; CHECK-NEXT:    moveq r1, r2
-; CHECK-NEXT:    movne r2, r0
-; CHECK-NEXT:    rsbs r0, r2, #0
+; CHECK-NEXT:    subs r4, r2, #1
+; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movne r9, r2
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    moveq r1, r4
+; CHECK-NEXT:    movne r4, r0
+; CHECK-NEXT:    rsbs r0, r4, #0
 ; CHECK-NEXT:    rscs r0, r1, #0
-; CHECK-NEXT:    rscs r0, r8, #0
+; CHECK-NEXT:    rscs r0, r9, #0
 ; CHECK-NEXT:    rscs r0, r3, #0
 ; CHECK-NEXT:    movwlt r5, #1
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    moveq r2, r5
+; CHECK-NEXT:    moveq r4, r5
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r8
+; CHECK-NEXT:    vmov.32 d0[0], r4
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    vmov.32 d1[1], r7
 ; CHECK-NEXT:    movne r5, r1
-; CHECK-NEXT:    vmov.32 d0[0], r2
-; CHECK-NEXT:    vmov.32 d1[1], r4
 ; CHECK-NEXT:    vmov.32 d0[1], r5
 ; CHECK-NEXT:    vpop {d8}
-; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1975,11 +2010,13 @@ entry:
 define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEON-LABEL: stest_f16i64:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT:    .vsave {d8}
-; CHECK-NEON-NEXT:    vpush {d8}
-; CHECK-NEON-NEXT:    vmov r0, s0
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT:    .pad #4
+; CHECK-NEON-NEXT:    sub sp, sp, #4
+; CHECK-NEON-NEXT:    .vsave {d8}
+; CHECK-NEON-NEXT:    vpush {d8}
+; CHECK-NEON-NEXT:    vmov r0, s0
 ; CHECK-NEON-NEXT:    vmov.f32 s16, s1
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    mov r8, r0
@@ -1987,58 +2024,62 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    mov r4, r1
+; CHECK-NEON-NEXT:    mov r5, r0
 ; CHECK-NEON-NEXT:    mvn r9, #0
-; CHECK-NEON-NEXT:    subs r1, r0, r9
-; CHECK-NEON-NEXT:    mvn r6, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r4, r6
+; CHECK-NEON-NEXT:    subs r0, r0, r9
+; CHECK-NEON-NEXT:    mvn r7, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r0, r1, r7
+; CHECK-NEON-NEXT:    mov r11, r1
+; CHECK-NEON-NEXT:    sbcs r0, r2, #0
 ; CHECK-NEON-NEXT:    vmov s0, r8
-; CHECK-NEON-NEXT:    sbcs r1, r2, #0
-; CHECK-NEON-NEXT:    mov r5, #0
-; CHECK-NEON-NEXT:    sbcs r1, r3, #0
-; CHECK-NEON-NEXT:    mov r8, #-2147483648
-; CHECK-NEON-NEXT:    mov r1, #0
+; CHECK-NEON-NEXT:    sbcs r0, r3, #0
+; CHECK-NEON-NEXT:    mov r6, #0
+; CHECK-NEON-NEXT:    mov r0, #0
 ; CHECK-NEON-NEXT:    mov r10, #0
-; CHECK-NEON-NEXT:    movwlt r1, #1
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r3, r1
-; CHECK-NEON-NEXT:    movne r1, r2
-; CHECK-NEON-NEXT:    moveq r4, r6
-; CHECK-NEON-NEXT:    moveq r0, r9
-; CHECK-NEON-NEXT:    rsbs r2, r0, #0
-; CHECK-NEON-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r9, r1
-; CHECK-NEON-NEXT:    sbcs r1, r9, r3
-; CHECK-NEON-NEXT:    movwlt r5, #1
-; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    movne r5, r0
-; CHECK-NEON-NEXT:    moveq r4, r8
+; CHECK-NEON-NEXT:    movwlt r0, #1
+; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    moveq r3, r0
+; CHECK-NEON-NEXT:    movne r0, r2
+; CHECK-NEON-NEXT:    moveq r11, r7
+; CHECK-NEON-NEXT:    moveq r5, r9
+; CHECK-NEON-NEXT:    rsbs r1, r5, #0
+; CHECK-NEON-NEXT:    rscs r1, r11, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r0, r9, r0
+; CHECK-NEON-NEXT:    sbcs r0, r9, r3
+; CHECK-NEON-NEXT:    movwlt r6, #1
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    moveq r5, r6
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    subs r7, r0, r9
+; CHECK-NEON-NEXT:    subs r4, r0, r9
 ; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
-; CHECK-NEON-NEXT:    sbcs r7, r1, r6
-; CHECK-NEON-NEXT:    sbcs r7, r2, #0
-; CHECK-NEON-NEXT:    sbcs r7, r3, #0
-; CHECK-NEON-NEXT:    mov r7, #0
-; CHECK-NEON-NEXT:    movwlt r7, #1
-; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    moveq r3, r7
-; CHECK-NEON-NEXT:    movne r7, r2
-; CHECK-NEON-NEXT:    movne r6, r1
+; CHECK-NEON-NEXT:    sbcs r4, r1, r7
+; CHECK-NEON-NEXT:    sbcs r4, r2, #0
+; CHECK-NEON-NEXT:    sbcs r4, r3, #0
+; CHECK-NEON-NEXT:    mov r4, #0
+; CHECK-NEON-NEXT:    movwlt r4, #1
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    moveq r3, r4
+; CHECK-NEON-NEXT:    movne r7, r1
+; CHECK-NEON-NEXT:    movne r4, r2
 ; CHECK-NEON-NEXT:    moveq r0, r9
 ; CHECK-NEON-NEXT:    rsbs r1, r0, #0
-; CHECK-NEON-NEXT:    rscs r1, r6, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r9, r7
+; CHECK-NEON-NEXT:    rscs r1, r7, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r1, r9, r4
 ; CHECK-NEON-NEXT:    sbcs r1, r9, r3
 ; CHECK-NEON-NEXT:    movwlt r10, #1
 ; CHECK-NEON-NEXT:    cmp r10, #0
-; CHECK-NEON-NEXT:    movne r10, r0
-; CHECK-NEON-NEXT:    moveq r6, r8
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r10
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEON-NEXT:    moveq r0, r10
+; CHECK-NEON-NEXT:    mov r1, #-2147483648
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEON-NEXT:    moveq r11, r1
+; CHECK-NEON-NEXT:    cmp r10, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r11
+; CHECK-NEON-NEXT:    moveq r7, r1
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEON-NEXT:    add sp, sp, #4
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-FP16-LABEL: stest_f16i64:
 ; CHECK-FP16:       @ %bb.0: @ %entry
@@ -2048,56 +2089,59 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-FP16-NEXT:    vmov.u16 r7, d0[0]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    mov r4, r1
-; CHECK-FP16-NEXT:    mvn r9, #0
-; CHECK-FP16-NEXT:    subs r1, r0, r9
-; CHECK-FP16-NEXT:    mvn r5, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r4, r5
+; CHECK-FP16-NEXT:    mov r5, r0
+; CHECK-FP16-NEXT:    mvn r8, #0
+; CHECK-FP16-NEXT:    subs r0, r0, r8
+; CHECK-FP16-NEXT:    mvn r6, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r1, r6
+; CHECK-FP16-NEXT:    mov r10, r1
+; CHECK-FP16-NEXT:    sbcs r0, r2, #0
 ; CHECK-FP16-NEXT:    vmov s0, r7
-; CHECK-FP16-NEXT:    sbcs r1, r2, #0
+; CHECK-FP16-NEXT:    sbcs r0, r3, #0
 ; CHECK-FP16-NEXT:    mov r7, #0
-; CHECK-FP16-NEXT:    sbcs r1, r3, #0
-; CHECK-FP16-NEXT:    mov r8, #-2147483648
-; CHECK-FP16-NEXT:    mov r1, #0
-; CHECK-FP16-NEXT:    mov r10, #0
-; CHECK-FP16-NEXT:    movwlt r1, #1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r3, r1
-; CHECK-FP16-NEXT:    movne r1, r2
-; CHECK-FP16-NEXT:    moveq r4, r5
-; CHECK-FP16-NEXT:    moveq r0, r9
-; CHECK-FP16-NEXT:    rsbs r2, r0, #0
-; CHECK-FP16-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r9, r1
-; CHECK-FP16-NEXT:    sbcs r1, r9, r3
+; CHECK-FP16-NEXT:    mov r0, #0
+; CHECK-FP16-NEXT:    mov r9, #0
+; CHECK-FP16-NEXT:    movwlt r0, #1
+; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    moveq r3, r0
+; CHECK-FP16-NEXT:    movne r0, r2
+; CHECK-FP16-NEXT:    moveq r10, r6
+; CHECK-FP16-NEXT:    moveq r5, r8
+; CHECK-FP16-NEXT:    rsbs r1, r5, #0
+; CHECK-FP16-NEXT:    rscs r1, r10, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r8, r0
+; CHECK-FP16-NEXT:    sbcs r0, r8, r3
 ; CHECK-FP16-NEXT:    movwlt r7, #1
 ; CHECK-FP16-NEXT:    cmp r7, #0
-; CHECK-FP16-NEXT:    movne r7, r0
-; CHECK-FP16-NEXT:    moveq r4, r8
+; CHECK-FP16-NEXT:    moveq r5, r7
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    subs r6, r0, r9
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r7
-; CHECK-FP16-NEXT:    sbcs r6, r1, r5
-; CHECK-FP16-NEXT:    sbcs r6, r2, #0
-; CHECK-FP16-NEXT:    sbcs r6, r3, #0
-; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    movwlt r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    moveq r3, r6
-; CHECK-FP16-NEXT:    movne r6, r2
-; CHECK-FP16-NEXT:    movne r5, r1
-; CHECK-FP16-NEXT:    moveq r0, r9
+; CHECK-FP16-NEXT:    subs r4, r0, r8
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
+; CHECK-FP16-NEXT:    sbcs r4, r1, r6
+; CHECK-FP16-NEXT:    sbcs r4, r2, #0
+; CHECK-FP16-NEXT:    sbcs r4, r3, #0
+; CHECK-FP16-NEXT:    mov r4, #0
+; CHECK-FP16-NEXT:    movwlt r4, #1
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    moveq r3, r4
+; CHECK-FP16-NEXT:    movne r6, r1
+; CHECK-FP16-NEXT:    movne r4, r2
+; CHECK-FP16-NEXT:    moveq r0, r8
 ; CHECK-FP16-NEXT:    rsbs r1, r0, #0
-; CHECK-FP16-NEXT:    rscs r1, r5, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r9, r6
-; CHECK-FP16-NEXT:    sbcs r1, r9, r3
-; CHECK-FP16-NEXT:    movwlt r10, #1
-; CHECK-FP16-NEXT:    cmp r10, #0
-; CHECK-FP16-NEXT:    movne r10, r0
-; CHECK-FP16-NEXT:    moveq r5, r8
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r10
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
-; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
+; CHECK-FP16-NEXT:    rscs r1, r6, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r1, r8, r4
+; CHECK-FP16-NEXT:    sbcs r1, r8, r3
+; CHECK-FP16-NEXT:    movwlt r9, #1
+; CHECK-FP16-NEXT:    cmp r9, #0
+; CHECK-FP16-NEXT:    moveq r0, r9
+; CHECK-FP16-NEXT:    mov r1, #-2147483648
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r0
+; CHECK-FP16-NEXT:    moveq r10, r1
+; CHECK-FP16-NEXT:    cmp r9, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r10
+; CHECK-FP16-NEXT:    moveq r6, r1
+; CHECK-FP16-NEXT:    vmov.32 d0[1], r6
 ; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptosi <2 x half> %x to <2 x i128>
@@ -2112,8 +2156,8 @@ entry:
 define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEON-LABEL: utesth_f16i64:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8}
 ; CHECK-NEON-NEXT:    vpush {d8}
 ; CHECK-NEON-NEXT:    vmov r0, s0
@@ -2124,60 +2168,66 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixunssfti
-; CHECK-NEON-NEXT:    mov r4, r1
-; CHECK-NEON-NEXT:    subs r1, r2, #1
+; CHECK-NEON-NEXT:    mov r6, r0
+; CHECK-NEON-NEXT:    subs r0, r2, #1
 ; CHECK-NEON-NEXT:    vmov s0, r5
-; CHECK-NEON-NEXT:    sbcs r1, r3, #0
+; CHECK-NEON-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEON-NEXT:    mov r5, #0
-; CHECK-NEON-NEXT:    mov r6, #0
+; CHECK-NEON-NEXT:    mov r4, r1
 ; CHECK-NEON-NEXT:    movwlo r5, #1
 ; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    moveq r4, r5
-; CHECK-NEON-NEXT:    movne r5, r0
+; CHECK-NEON-NEXT:    mov r7, #0
+; CHECK-NEON-NEXT:    moveq r6, r5
 ; CHECK-NEON-NEXT:    bl __fixunssfti
 ; CHECK-NEON-NEXT:    subs r2, r2, #1
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r6
 ; CHECK-NEON-NEXT:    sbcs r2, r3, #0
-; CHECK-NEON-NEXT:    movwlo r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    moveq r0, r6
-; CHECK-NEON-NEXT:    movne r6, r1
+; CHECK-NEON-NEXT:    movwlo r7, #1
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    moveq r0, r7
+; CHECK-NEON-NEXT:    cmp r5, #0
+; CHECK-NEON-NEXT:    movne r5, r4
 ; CHECK-NEON-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r5
+; CHECK-NEON-NEXT:    movne r7, r1
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
 ; CHECK-FP16-LABEL: utesth_f16i64:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, lr}
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT:    vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT:    vmov.u16 r7, d0[0]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixunshfti
+; CHECK-FP16-NEXT:    mov r5, r0
+; CHECK-FP16-NEXT:    subs r0, r2, #1
+; CHECK-FP16-NEXT:    vmov s0, r7
+; CHECK-FP16-NEXT:    sbcs r0, r3, #0
+; CHECK-FP16-NEXT:    mov r7, #0
 ; CHECK-FP16-NEXT:    mov r4, r1
-; CHECK-FP16-NEXT:    subs r1, r2, #1
-; CHECK-FP16-NEXT:    vmov s0, r6
-; CHECK-FP16-NEXT:    sbcs r1, r3, #0
+; CHECK-FP16-NEXT:    movwlo r7, #1
+; CHECK-FP16-NEXT:    cmp r7, #0
 ; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    mov r5, #0
-; CHECK-FP16-NEXT:    movwlo r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    moveq r4, r6
-; CHECK-FP16-NEXT:    movne r6, r0
+; CHECK-FP16-NEXT:    moveq r5, r7
 ; CHECK-FP16-NEXT:    bl __fixunshfti
 ; CHECK-FP16-NEXT:    subs r2, r2, #1
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r6
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
 ; CHECK-FP16-NEXT:    sbcs r2, r3, #0
-; CHECK-FP16-NEXT:    movwlo r5, #1
-; CHECK-FP16-NEXT:    cmp r5, #0
-; CHECK-FP16-NEXT:    moveq r0, r5
-; CHECK-FP16-NEXT:    movne r5, r1
+; CHECK-FP16-NEXT:    movwlo r6, #1
+; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    moveq r0, r6
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    movne r7, r4
 ; CHECK-FP16-NEXT:    vmov.32 d0[0], r0
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
-; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r7
+; CHECK-FP16-NEXT:    movne r6, r1
+; CHECK-FP16-NEXT:    vmov.32 d0[1], r6
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 entry:
   %conv = fptoui <2 x half> %x to <2 x i128>
   %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -2189,8 +2239,8 @@ entry:
 define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEON-LABEL: ustest_f16i64:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8}
 ; CHECK-NEON-NEXT:    vpush {d8}
 ; CHECK-NEON-NEXT:    vmov r0, s0
@@ -2201,106 +2251,110 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    mov r4, r1
+; CHECK-NEON-NEXT:    mov r8, r1
 ; CHECK-NEON-NEXT:    subs r1, r2, #1
-; CHECK-NEON-NEXT:    sbcs r1, r3, #0
-; CHECK-NEON-NEXT:    mov r8, #1
-; CHECK-NEON-NEXT:    mov r1, #0
-; CHECK-NEON-NEXT:    movge r2, r8
-; CHECK-NEON-NEXT:    movwlt r1, #1
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r3, r1
-; CHECK-NEON-NEXT:    moveq r4, r1
-; CHECK-NEON-NEXT:    movne r1, r0
-; CHECK-NEON-NEXT:    rsbs r0, r1, #0
-; CHECK-NEON-NEXT:    rscs r0, r4, #0
 ; CHECK-NEON-NEXT:    vmov s0, r5
-; CHECK-NEON-NEXT:    rscs r0, r2, #0
+; CHECK-NEON-NEXT:    sbcs r1, r3, #0
+; CHECK-NEON-NEXT:    mov r5, #0
+; CHECK-NEON-NEXT:    mov r9, #1
+; CHECK-NEON-NEXT:    movwlt r5, #1
+; CHECK-NEON-NEXT:    cmp r5, #0
+; CHECK-NEON-NEXT:    moveq r3, r5
+; CHECK-NEON-NEXT:    moveq r8, r5
+; CHECK-NEON-NEXT:    moveq r2, r9
+; CHECK-NEON-NEXT:    movne r5, r0
+; CHECK-NEON-NEXT:    rsbs r0, r5, #0
 ; CHECK-NEON-NEXT:    mov r7, #0
+; CHECK-NEON-NEXT:    rscs r0, r8, #0
+; CHECK-NEON-NEXT:    mov r6, #0
+; CHECK-NEON-NEXT:    rscs r0, r2, #0
 ; CHECK-NEON-NEXT:    rscs r0, r3, #0
-; CHECK-NEON-NEXT:    mov r5, #0
 ; CHECK-NEON-NEXT:    movwlt r7, #1
 ; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    moveq r4, r7
-; CHECK-NEON-NEXT:    movne r7, r1
+; CHECK-NEON-NEXT:    moveq r5, r7
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    subs r6, r2, #1
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEON-NEXT:    sbcs r6, r3, #0
-; CHECK-NEON-NEXT:    movlt r8, r2
-; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movwlt r2, #1
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    moveq r3, r2
-; CHECK-NEON-NEXT:    moveq r1, r2
-; CHECK-NEON-NEXT:    movne r2, r0
-; CHECK-NEON-NEXT:    rsbs r0, r2, #0
+; CHECK-NEON-NEXT:    subs r4, r2, #1
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEON-NEXT:    sbcs r4, r3, #0
+; CHECK-NEON-NEXT:    mov r4, #0
+; CHECK-NEON-NEXT:    movwlt r4, #1
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    movne r9, r2
+; CHECK-NEON-NEXT:    moveq r3, r4
+; CHECK-NEON-NEXT:    moveq r1, r4
+; CHECK-NEON-NEXT:    movne r4, r0
+; CHECK-NEON-NEXT:    rsbs r0, r4, #0
 ; CHECK-NEON-NEXT:    rscs r0, r1, #0
-; CHECK-NEON-NEXT:    rscs r0, r8, #0
+; CHECK-NEON-NEXT:    rscs r0, r9, #0
 ; CHECK-NEON-NEXT:    rscs r0, r3, #0
-; CHECK-NEON-NEXT:    movwlt r5, #1
-; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    moveq r2, r5
-; CHECK-NEON-NEXT:    movne r5, r1
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r2
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEON-NEXT:    movwlt r6, #1
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    moveq r4, r6
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    movne r7, r8
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r4
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r7
+; CHECK-NEON-NEXT:    movne r6, r1
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEON-NEXT:    vpop {d8}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 ;
 ; CHECK-FP16-LABEL: ustest_f16i64:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT:    vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT:    vmov.u16 r4, d0[0]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    mov r4, r1
+; CHECK-FP16-NEXT:    mov r8, r1
 ; CHECK-FP16-NEXT:    subs r1, r2, #1
 ; CHECK-FP16-NEXT:    sbcs r1, r3, #0
-; CHECK-FP16-NEXT:    mov r8, #1
-; CHECK-FP16-NEXT:    mov r1, #0
-; CHECK-FP16-NEXT:    movge r2, r8
-; CHECK-FP16-NEXT:    movwlt r1, #1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r3, r1
-; CHECK-FP16-NEXT:    moveq r4, r1
-; CHECK-FP16-NEXT:    movne r1, r0
-; CHECK-FP16-NEXT:    rsbs r0, r1, #0
-; CHECK-FP16-NEXT:    rscs r0, r4, #0
-; CHECK-FP16-NEXT:    vmov s0, r5
+; CHECK-FP16-NEXT:    mov r6, #0
+; CHECK-FP16-NEXT:    movwlt r6, #1
+; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    mov r9, #1
+; CHECK-FP16-NEXT:    moveq r3, r6
+; CHECK-FP16-NEXT:    moveq r8, r6
+; CHECK-FP16-NEXT:    moveq r2, r9
+; CHECK-FP16-NEXT:    movne r6, r0
+; CHECK-FP16-NEXT:    rsbs r0, r6, #0
+; CHECK-FP16-NEXT:    rscs r0, r8, #0
+; CHECK-FP16-NEXT:    vmov s0, r4
 ; CHECK-FP16-NEXT:    rscs r0, r2, #0
 ; CHECK-FP16-NEXT:    mov r7, #0
 ; CHECK-FP16-NEXT:    rscs r0, r3, #0
 ; CHECK-FP16-NEXT:    mov r5, #0
 ; CHECK-FP16-NEXT:    movwlt r7, #1
 ; CHECK-FP16-NEXT:    cmp r7, #0
-; CHECK-FP16-NEXT:    moveq r4, r7
-; CHECK-FP16-NEXT:    movne r7, r1
+; CHECK-FP16-NEXT:    moveq r6, r7
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    subs r6, r2, #1
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r7
-; CHECK-FP16-NEXT:    sbcs r6, r3, #0
-; CHECK-FP16-NEXT:    movlt r8, r2
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movwlt r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    moveq r3, r2
-; CHECK-FP16-NEXT:    moveq r1, r2
-; CHECK-FP16-NEXT:    movne r2, r0
-; CHECK-FP16-NEXT:    rsbs r0, r2, #0
+; CHECK-FP16-NEXT:    subs r4, r2, #1
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r6
+; CHECK-FP16-NEXT:    sbcs r4, r3, #0
+; CHECK-FP16-NEXT:    mov r4, #0
+; CHECK-FP16-NEXT:    movwlt r4, #1
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    movne r9, r2
+; CHECK-FP16-NEXT:    moveq r3, r4
+; CHECK-FP16-NEXT:    moveq r1, r4
+; CHECK-FP16-NEXT:    movne r4, r0
+; CHECK-FP16-NEXT:    rsbs r0, r4, #0
 ; CHECK-FP16-NEXT:    rscs r0, r1, #0
-; CHECK-FP16-NEXT:    rscs r0, r8, #0
+; CHECK-FP16-NEXT:    rscs r0, r9, #0
 ; CHECK-FP16-NEXT:    rscs r0, r3, #0
 ; CHECK-FP16-NEXT:    movwlt r5, #1
 ; CHECK-FP16-NEXT:    cmp r5, #0
-; CHECK-FP16-NEXT:    moveq r2, r5
+; CHECK-FP16-NEXT:    moveq r4, r5
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    movne r7, r8
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r4
+; CHECK-FP16-NEXT:    cmp r5, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r7
 ; CHECK-FP16-NEXT:    movne r5, r1
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r2
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
 ; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 entry:
   %conv = fptosi <2 x half> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -2326,34 +2380,34 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r8, r1
-; CHECK-NEXT:    vmov r0, r1, d9
-; CHECK-NEXT:    mvn r6, #-2147483648
-; CHECK-NEXT:    subs r2, r4, r6
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    sbcs r2, r8, #0
-; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    movge r4, r6
-; CHECK-NEXT:    movwlt r5, #1
+; CHECK-NEXT:    vmov r0, r2, d9
+; CHECK-NEXT:    mvn r5, #-2147483648
+; CHECK-NEXT:    subs r3, r4, r5
+; CHECK-NEXT:    sbcs r3, r1, #0
+; CHECK-NEXT:    mvn r7, #0
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    mov r8, #-2147483648
+; CHECK-NEXT:    movwlt r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    movne r3, r1
+; CHECK-NEXT:    moveq r4, r5
+; CHECK-NEXT:    rsbs r1, r4, #-2147483648
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    sbcs r1, r7, r3
+; CHECK-NEXT:    movge r4, r8
+; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    subs r2, r0, r6
-; CHECK-NEXT:    sbcs r2, r1, #0
-; CHECK-NEXT:    movlt r6, r0
-; CHECK-NEXT:    movwlt r7, #1
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    movne r7, r1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    movne r5, r8
-; CHECK-NEXT:    rsbs r2, r4, #-2147483648
-; CHECK-NEXT:    mvn r1, #0
-; CHECK-NEXT:    sbcs r2, r1, r5
-; CHECK-NEXT:    movge r4, r0
-; CHECK-NEXT:    rsbs r2, r6, #-2147483648
+; CHECK-NEXT:    subs r2, r0, r5
 ; CHECK-NEXT:    vmov.32 d0[0], r4
-; CHECK-NEXT:    sbcs r1, r1, r7
-; CHECK-NEXT:    movge r6, r0
-; CHECK-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEXT:    sbcs r2, r1, #0
+; CHECK-NEXT:    movwlt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    movne r5, r0
+; CHECK-NEXT:    rsbs r0, r5, #-2147483648
+; CHECK-NEXT:    sbcs r0, r7, r6
+; CHECK-NEXT:    movge r5, r8
+; CHECK-NEXT:    vmov.32 d0[1], r5
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 entry:
@@ -2406,16 +2460,16 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    vmov r2, r12, d9
-; CHECK-NEXT:    mvn r4, #0
-; CHECK-NEXT:    subs r5, r0, r4
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    sbcs r5, r1, #0
+; CHECK-NEXT:    mvn r5, #0
+; CHECK-NEXT:    subs r3, r0, r5
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movge r0, r4
+; CHECK-NEXT:    sbcs r3, r1, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    mov r3, #0
 ; CHECK-NEXT:    movwlt r3, #1
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    movne r3, r1
+; CHECK-NEXT:    moveq r0, r5
 ; CHECK-NEXT:    rsbs r1, r0, #0
 ; CHECK-NEXT:    rscs r1, r3, #0
 ; CHECK-NEXT:    movwlt r6, #1
@@ -2424,20 +2478,20 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    mov r1, r12
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    subs r2, r0, r4
+; CHECK-NEXT:    subs r2, r0, r5
 ; CHECK-NEXT:    vmov.32 d0[0], r6
 ; CHECK-NEXT:    sbcs r2, r1, #0
-; CHECK-NEXT:    movlt r4, r0
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    movne r0, r1
-; CHECK-NEXT:    rsbs r1, r4, #0
-; CHECK-NEXT:    rscs r0, r0, #0
-; CHECK-NEXT:    movwlt r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    movne r5, r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movwlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    movne r2, r1
+; CHECK-NEXT:    movne r5, r0
+; CHECK-NEXT:    rsbs r0, r5, #0
+; CHECK-NEXT:    rscs r0, r2, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movne r4, r5
+; CHECK-NEXT:    vmov.32 d0[1], r4
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:
@@ -2451,81 +2505,72 @@ entry:
 define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: stest_f32i32_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, sp, #8
 ; CHECK-NEXT:    vorr q4, q0, q0
+; CHECK-NEXT:    mov r8, #-2147483648
+; CHECK-NEXT:    mvn r9, #0
+; CHECK-NEXT:    mov r10, #0
 ; CHECK-NEXT:    vmov r0, s19
+; CHECK-NEXT:    vmov r5, s16
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vmov r2, s18
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    vmov r0, s17
-; CHECK-NEXT:    mvn r6, #-2147483648
-; CHECK-NEXT:    mov r3, #-2147483648
-; CHECK-NEXT:    mvn r10, #0
-; CHECK-NEXT:    vmov r7, s16
-; CHECK-NEXT:    mov r4, #0
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    subs r2, r11, r6
-; CHECK-NEXT:    sbcs r2, r1, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mvn r7, #-2147483648
+; CHECK-NEXT:    subs r0, r0, r7
+; CHECK-NEXT:    sbcs r0, r1, #0
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    movne r0, r1
+; CHECK-NEXT:    moveq r4, r7
+; CHECK-NEXT:    rsbs r1, r4, #-2147483648
+; CHECK-NEXT:    sbcs r0, r9, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    movge r4, r8
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    subs r0, r0, r7
+; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movge r11, r6
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    movwlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    movne r2, r1
-; CHECK-NEXT:    rsbs r1, r11, #-2147483648
-; CHECK-NEXT:    sbcs r1, r10, r2
-; CHECK-NEXT:    movge r11, r3
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    subs r0, r0, r6
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    mov r9, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    movge r5, r6
-; CHECK-NEXT:    movwlt r9, #1
-; CHECK-NEXT:    cmp r9, #0
-; CHECK-NEXT:    movne r9, r1
+; CHECK-NEXT:    moveq r5, r7
+; CHECK-NEXT:    rsbs r1, r5, #-2147483648
+; CHECK-NEXT:    sbcs r1, r9, r2
+; CHECK-NEXT:    movge r5, r8
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    subs r0, r0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    subs r0, r0, r7
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    mov r8, #0
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    movge r7, r6
-; CHECK-NEXT:    movwlt r8, #1
-; CHECK-NEXT:    cmp r8, #0
-; CHECK-NEXT:    movne r8, r1
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    movne r0, r1
+; CHECK-NEXT:    moveq r6, r7
+; CHECK-NEXT:    rsbs r1, r6, #-2147483648
+; CHECK-NEXT:    sbcs r0, r9, r0
+; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    movge r6, r8
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    subs r2, r0, r6
+; CHECK-NEXT:    subs r2, r0, r7
+; CHECK-NEXT:    vmov.32 d1[0], r6
 ; CHECK-NEXT:    sbcs r2, r1, #0
-; CHECK-NEXT:    movlt r6, r0
-; CHECK-NEXT:    movwlt r4, #1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    movne r4, r1
-; CHECK-NEXT:    rsbs r0, r6, #-2147483648
-; CHECK-NEXT:    sbcs r0, r10, r4
-; CHECK-NEXT:    mov r1, #-2147483648
-; CHECK-NEXT:    movge r6, r1
+; CHECK-NEXT:    movwlt r10, #1
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    movne r10, r1
+; CHECK-NEXT:    movne r7, r0
 ; CHECK-NEXT:    rsbs r0, r7, #-2147483648
-; CHECK-NEXT:    sbcs r0, r10, r8
-; CHECK-NEXT:    vmov.32 d1[0], r6
-; CHECK-NEXT:    movge r7, r1
-; CHECK-NEXT:    rsbs r0, r5, #-2147483648
-; CHECK-NEXT:    vmov.32 d0[0], r7
-; CHECK-NEXT:    sbcs r0, r10, r9
-; CHECK-NEXT:    movge r5, r1
-; CHECK-NEXT:    vmov.32 d1[1], r11
-; CHECK-NEXT:    vmov.32 d0[1], r5
-; CHECK-NEXT:    add sp, sp, #8
+; CHECK-NEXT:    vmov.32 d0[0], r5
+; CHECK-NEXT:    sbcs r0, r9, r10
+; CHECK-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEXT:    movge r7, r8
+; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    add sp, sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2591,33 +2636,33 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    vmov r0, s19
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    vmov r2, s16
-; CHECK-NEXT:    mvn r6, #0
-; CHECK-NEXT:    subs r3, r0, r6
+; CHECK-NEXT:    mvn r7, #0
+; CHECK-NEXT:    subs r3, r0, r7
 ; CHECK-NEXT:    mov r4, #0
 ; CHECK-NEXT:    sbcs r3, r1, #0
-; CHECK-NEXT:    vmov r8, s17
+; CHECK-NEXT:    mov r10, #0
 ; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    movge r0, r6
+; CHECK-NEXT:    vmov r9, s18
 ; CHECK-NEXT:    movwlt r3, #1
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movne r3, r1
+; CHECK-NEXT:    moveq r0, r7
 ; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    vmov r8, s17
 ; CHECK-NEXT:    rscs r1, r3, #0
-; CHECK-NEXT:    vmov r9, s18
 ; CHECK-NEXT:    movwlt r4, #1
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    movne r4, r0
-; CHECK-NEXT:    mov r10, #0
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    subs r2, r0, r6
+; CHECK-NEXT:    subs r2, r0, r7
 ; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    sbcs r2, r1, #0
 ; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movge r0, r6
 ; CHECK-NEXT:    movwlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    movne r2, r1
+; CHECK-NEXT:    moveq r0, r7
 ; CHECK-NEXT:    rsbs r1, r0, #0
 ; CHECK-NEXT:    rscs r1, r2, #0
 ; CHECK-NEXT:    movwlt r5, #1
@@ -2625,36 +2670,36 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    movne r5, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    subs r2, r0, r6
-; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    subs r2, r0, r7
+; CHECK-NEXT:    mov r6, #0
 ; CHECK-NEXT:    sbcs r2, r1, #0
 ; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movge r0, r6
 ; CHECK-NEXT:    movwlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    movne r2, r1
+; CHECK-NEXT:    moveq r0, r7
 ; CHECK-NEXT:    rsbs r1, r0, #0
 ; CHECK-NEXT:    rscs r1, r2, #0
-; CHECK-NEXT:    movwlt r7, #1
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    movne r7, r0
+; CHECK-NEXT:    movwlt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movne r6, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    subs r2, r0, r6
-; CHECK-NEXT:    vmov.32 d1[0], r7
+; CHECK-NEXT:    subs r2, r0, r7
+; CHECK-NEXT:    vmov.32 d1[0], r6
 ; CHECK-NEXT:    sbcs r2, r1, #0
-; CHECK-NEXT:    movlt r6, r0
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    movne r0, r1
-; CHECK-NEXT:    rsbs r1, r6, #0
-; CHECK-NEXT:    rscs r0, r0, #0
+; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    vmov.32 d0[0], r5
+; CHECK-NEXT:    movwlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    movne r2, r1
+; CHECK-NEXT:    movne r7, r0
+; CHECK-NEXT:    rsbs r0, r7, #0
+; CHECK-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEXT:    rscs r0, r2, #0
 ; CHECK-NEXT:    movwlt r10, #1
 ; CHECK-NEXT:    cmp r10, #0
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    movne r10, r6
+; CHECK-NEXT:    movne r10, r7
 ; CHECK-NEXT:    vmov.32 d0[1], r10
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -2669,164 +2714,150 @@ entry:
 define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEON-LABEL: stest_f16i32_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT:    .pad #4
-; CHECK-NEON-NEXT:    sub sp, sp, #4
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8, d9, d10}
 ; CHECK-NEON-NEXT:    vpush {d8, d9, d10}
-; CHECK-NEON-NEXT:    .pad #8
-; CHECK-NEON-NEXT:    sub sp, sp, #8
 ; CHECK-NEON-NEXT:    vmov r0, s3
-; CHECK-NEON-NEXT:    vmov.f32 s16, s2
-; CHECK-NEON-NEXT:    vmov.f32 s18, s1
+; CHECK-NEON-NEXT:    vmov.f32 s18, s2
+; CHECK-NEON-NEXT:    vmov.f32 s16, s1
 ; CHECK-NEON-NEXT:    vmov.f32 s20, s0
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    vmov r2, s16
-; CHECK-NEON-NEXT:    mov r11, r0
-; CHECK-NEON-NEXT:    vmov r0, s18
-; CHECK-NEON-NEXT:    mvn r6, #-2147483648
-; CHECK-NEON-NEXT:    mov r3, #-2147483648
-; CHECK-NEON-NEXT:    mvn r10, #0
-; CHECK-NEON-NEXT:    vmov r7, s20
-; CHECK-NEON-NEXT:    mov r4, #0
-; CHECK-NEON-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEON-NEXT:    subs r2, r11, r6
+; CHECK-NEON-NEXT:    mov r4, r0
+; CHECK-NEON-NEXT:    vmov r0, s20
+; CHECK-NEON-NEXT:    mvn r7, #-2147483648
+; CHECK-NEON-NEXT:    subs r2, r4, r7
 ; CHECK-NEON-NEXT:    sbcs r2, r1, #0
+; CHECK-NEON-NEXT:    mov r8, #-2147483648
 ; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movge r11, r6
+; CHECK-NEON-NEXT:    mvn r9, #0
 ; CHECK-NEON-NEXT:    movwlt r2, #1
 ; CHECK-NEON-NEXT:    cmp r2, #0
 ; CHECK-NEON-NEXT:    movne r2, r1
-; CHECK-NEON-NEXT:    rsbs r1, r11, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r10, r2
-; CHECK-NEON-NEXT:    movge r11, r3
+; CHECK-NEON-NEXT:    moveq r4, r7
+; CHECK-NEON-NEXT:    rsbs r1, r4, #-2147483648
+; CHECK-NEON-NEXT:    mov r10, #0
+; CHECK-NEON-NEXT:    sbcs r1, r9, r2
+; CHECK-NEON-NEXT:    movge r4, r8
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    mov r5, r0
-; CHECK-NEON-NEXT:    subs r0, r0, r6
+; CHECK-NEON-NEXT:    subs r0, r0, r7
 ; CHECK-NEON-NEXT:    sbcs r0, r1, #0
-; CHECK-NEON-NEXT:    mov r8, #0
-; CHECK-NEON-NEXT:    mov r0, r7
-; CHECK-NEON-NEXT:    movge r5, r6
-; CHECK-NEON-NEXT:    movwlt r8, #1
-; CHECK-NEON-NEXT:    cmp r8, #0
-; CHECK-NEON-NEXT:    movne r8, r1
+; CHECK-NEON-NEXT:    mov r2, #0
+; CHECK-NEON-NEXT:    vmov r0, s18
+; CHECK-NEON-NEXT:    movwlt r2, #1
+; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    movne r2, r1
+; CHECK-NEON-NEXT:    moveq r5, r7
+; CHECK-NEON-NEXT:    rsbs r1, r5, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r1, r9, r2
+; CHECK-NEON-NEXT:    movge r5, r8
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    mov r7, r0
-; CHECK-NEON-NEXT:    subs r0, r0, r6
+; CHECK-NEON-NEXT:    mov r6, r0
+; CHECK-NEON-NEXT:    subs r0, r0, r7
 ; CHECK-NEON-NEXT:    sbcs r0, r1, #0
-; CHECK-NEON-NEXT:    mov r9, #0
-; CHECK-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEON-NEXT:    movge r7, r6
-; CHECK-NEON-NEXT:    movwlt r9, #1
-; CHECK-NEON-NEXT:    cmp r9, #0
-; CHECK-NEON-NEXT:    movne r9, r1
+; CHECK-NEON-NEXT:    mov r0, #0
+; CHECK-NEON-NEXT:    movwlt r0, #1
+; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    movne r0, r1
+; CHECK-NEON-NEXT:    moveq r6, r7
+; CHECK-NEON-NEXT:    rsbs r1, r6, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r0, r9, r0
+; CHECK-NEON-NEXT:    vmov r0, s16
+; CHECK-NEON-NEXT:    movge r6, r8
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    subs r2, r0, r6
+; CHECK-NEON-NEXT:    subs r2, r0, r7
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r6
 ; CHECK-NEON-NEXT:    sbcs r2, r1, #0
-; CHECK-NEON-NEXT:    movlt r6, r0
-; CHECK-NEON-NEXT:    movwlt r4, #1
-; CHECK-NEON-NEXT:    cmp r4, #0
-; CHECK-NEON-NEXT:    movne r4, r1
-; CHECK-NEON-NEXT:    rsbs r0, r6, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r0, r10, r4
-; CHECK-NEON-NEXT:    mov r1, #-2147483648
-; CHECK-NEON-NEXT:    movge r6, r1
+; CHECK-NEON-NEXT:    movwlt r10, #1
+; CHECK-NEON-NEXT:    cmp r10, #0
+; CHECK-NEON-NEXT:    movne r10, r1
+; CHECK-NEON-NEXT:    movne r7, r0
 ; CHECK-NEON-NEXT:    rsbs r0, r7, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r0, r10, r9
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r6
-; CHECK-NEON-NEXT:    movge r7, r1
-; CHECK-NEON-NEXT:    rsbs r0, r5, #-2147483648
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r7
-; CHECK-NEON-NEXT:    sbcs r0, r10, r8
-; CHECK-NEON-NEXT:    movge r5, r1
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r11
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r5
-; CHECK-NEON-NEXT:    add sp, sp, #8
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r5
+; CHECK-NEON-NEXT:    sbcs r0, r9, r10
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEON-NEXT:    movge r7, r8
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10}
-; CHECK-NEON-NEXT:    add sp, sp, #4
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
 ; CHECK-FP16-LABEL: stest_f16i32_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP16-NEXT:    .pad #4
-; CHECK-FP16-NEXT:    sub sp, sp, #4
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-FP16-NEXT:    .vsave {d8, d9}
 ; CHECK-FP16-NEXT:    vpush {d8, d9}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[3]
-; CHECK-FP16-NEXT:    vmov.u16 r4, d0[2]
-; CHECK-FP16-NEXT:    vmov.u16 r5, d0[0]
-; CHECK-FP16-NEXT:    vmov.u16 r6, d0[1]
+; CHECK-FP16-NEXT:    vorr d8, d0, d0
+; CHECK-FP16-NEXT:    vmov.u16 r5, d0[2]
+; CHECK-FP16-NEXT:    vmov.u16 r6, d0[0]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    mov r10, r0
+; CHECK-FP16-NEXT:    mov r4, r0
 ; CHECK-FP16-NEXT:    mvn r7, #-2147483648
 ; CHECK-FP16-NEXT:    subs r0, r0, r7
 ; CHECK-FP16-NEXT:    vmov s0, r6
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    mov r2, #-2147483648
+; CHECK-FP16-NEXT:    mov r8, #-2147483648
 ; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    movge r10, r7
+; CHECK-FP16-NEXT:    mvn r9, #0
 ; CHECK-FP16-NEXT:    movwlt r0, #1
 ; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    movne r0, r1
-; CHECK-FP16-NEXT:    rsbs r1, r10, #-2147483648
-; CHECK-FP16-NEXT:    mvn r9, #0
+; CHECK-FP16-NEXT:    moveq r4, r7
+; CHECK-FP16-NEXT:    rsbs r1, r4, #-2147483648
+; CHECK-FP16-NEXT:    mov r10, #0
 ; CHECK-FP16-NEXT:    sbcs r0, r9, r0
-; CHECK-FP16-NEXT:    vmov s16, r4
-; CHECK-FP16-NEXT:    mov r11, #0
 ; CHECK-FP16-NEXT:    vmov s18, r5
-; CHECK-FP16-NEXT:    movge r10, r2
+; CHECK-FP16-NEXT:    movge r4, r8
 ; CHECK-FP16-NEXT:    bl __fixhfdi
 ; CHECK-FP16-NEXT:    vmov.f32 s0, s18
 ; CHECK-FP16-NEXT:    mov r5, r0
 ; CHECK-FP16-NEXT:    subs r0, r0, r7
-; CHECK-FP16-NEXT:    mov r4, #0
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    movge r5, r7
-; CHECK-FP16-NEXT:    movwlt r4, #1
-; CHECK-FP16-NEXT:    cmp r4, #0
-; CHECK-FP16-NEXT:    movne r4, r1
+; CHECK-FP16-NEXT:    mov r0, #0
+; CHECK-FP16-NEXT:    movwlt r0, #1
+; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    movne r0, r1
+; CHECK-FP16-NEXT:    moveq r5, r7
+; CHECK-FP16-NEXT:    rsbs r1, r5, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r9, r0
+; CHECK-FP16-NEXT:    movge r5, r8
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    vmov.f32 s0, s16
 ; CHECK-FP16-NEXT:    mov r6, r0
 ; CHECK-FP16-NEXT:    subs r0, r0, r7
-; CHECK-FP16-NEXT:    mov r8, #0
 ; CHECK-FP16-NEXT:    sbcs r0, r1, #0
-; CHECK-FP16-NEXT:    movge r6, r7
-; CHECK-FP16-NEXT:    movwlt r8, #1
-; CHECK-FP16-NEXT:    cmp r8, #0
-; CHECK-FP16-NEXT:    movne r8, r1
+; CHECK-FP16-NEXT:    mov r0, #0
+; CHECK-FP16-NEXT:    movwlt r0, #1
+; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    movne r0, r1
+; CHECK-FP16-NEXT:    vmov.u16 r1, d8[1]
+; CHECK-FP16-NEXT:    moveq r6, r7
+; CHECK-FP16-NEXT:    vmov s0, r1
+; CHECK-FP16-NEXT:    rsbs r1, r6, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r9, r0
+; CHECK-FP16-NEXT:    movge r6, r8
 ; CHECK-FP16-NEXT:    bl __fixhfdi
 ; CHECK-FP16-NEXT:    subs r2, r0, r7
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r6
 ; CHECK-FP16-NEXT:    sbcs r2, r1, #0
-; CHECK-FP16-NEXT:    movlt r7, r0
-; CHECK-FP16-NEXT:    movwlt r11, #1
-; CHECK-FP16-NEXT:    cmp r11, #0
-; CHECK-FP16-NEXT:    movne r11, r1
+; CHECK-FP16-NEXT:    movwlt r10, #1
+; CHECK-FP16-NEXT:    cmp r10, #0
+; CHECK-FP16-NEXT:    movne r10, r1
+; CHECK-FP16-NEXT:    movne r7, r0
 ; CHECK-FP16-NEXT:    rsbs r0, r7, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r0, r9, r11
-; CHECK-FP16-NEXT:    mov r1, #-2147483648
-; CHECK-FP16-NEXT:    movge r7, r1
-; CHECK-FP16-NEXT:    rsbs r0, r6, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r0, r9, r8
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r7
-; CHECK-FP16-NEXT:    movge r6, r1
-; CHECK-FP16-NEXT:    rsbs r0, r5, #-2147483648
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r6
-; CHECK-FP16-NEXT:    sbcs r0, r9, r4
-; CHECK-FP16-NEXT:    movge r5, r1
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r10
-; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r5
+; CHECK-FP16-NEXT:    sbcs r0, r9, r10
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
+; CHECK-FP16-NEXT:    movge r7, r8
+; CHECK-FP16-NEXT:    vmov.32 d0[1], r7
 ; CHECK-FP16-NEXT:    vpop {d8, d9}
-; CHECK-FP16-NEXT:    add sp, sp, #4
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2944,34 +2975,34 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    vmov r2, s20
-; CHECK-NEON-NEXT:    mvn r6, #0
-; CHECK-NEON-NEXT:    subs r3, r0, r6
+; CHECK-NEON-NEXT:    mvn r7, #0
+; CHECK-NEON-NEXT:    subs r3, r0, r7
 ; CHECK-NEON-NEXT:    mov r4, #0
 ; CHECK-NEON-NEXT:    sbcs r3, r1, #0
-; CHECK-NEON-NEXT:    vmov r8, s18
+; CHECK-NEON-NEXT:    mov r10, #0
 ; CHECK-NEON-NEXT:    mov r3, #0
-; CHECK-NEON-NEXT:    movge r0, r6
+; CHECK-NEON-NEXT:    vmov r8, s18
 ; CHECK-NEON-NEXT:    movwlt r3, #1
 ; CHECK-NEON-NEXT:    cmp r3, #0
 ; CHECK-NEON-NEXT:    movne r3, r1
+; CHECK-NEON-NEXT:    moveq r0, r7
 ; CHECK-NEON-NEXT:    rsbs r1, r0, #0
-; CHECK-NEON-NEXT:    rscs r1, r3, #0
 ; CHECK-NEON-NEXT:    vmov r9, s16
+; CHECK-NEON-NEXT:    rscs r1, r3, #0
 ; CHECK-NEON-NEXT:    movwlt r4, #1
 ; CHECK-NEON-NEXT:    cmp r4, #0
 ; CHECK-NEON-NEXT:    movne r4, r0
-; CHECK-NEON-NEXT:    mov r10, #0
 ; CHECK-NEON-NEXT:    mov r0, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    subs r2, r0, r6
+; CHECK-NEON-NEXT:    subs r2, r0, r7
 ; CHECK-NEON-NEXT:    mov r5, #0
 ; CHECK-NEON-NEXT:    sbcs r2, r1, #0
 ; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movge r0, r6
 ; CHECK-NEON-NEXT:    movwlt r2, #1
 ; CHECK-NEON-NEXT:    cmp r2, #0
 ; CHECK-NEON-NEXT:    movne r2, r1
+; CHECK-NEON-NEXT:    moveq r0, r7
 ; CHECK-NEON-NEXT:    rsbs r1, r0, #0
 ; CHECK-NEON-NEXT:    rscs r1, r2, #0
 ; CHECK-NEON-NEXT:    movwlt r5, #1
@@ -2980,37 +3011,37 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    mov r0, r9
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    subs r2, r0, r6
-; CHECK-NEON-NEXT:    mov r7, #0
+; CHECK-NEON-NEXT:    subs r2, r0, r7
+; CHECK-NEON-NEXT:    mov r6, #0
 ; CHECK-NEON-NEXT:    sbcs r2, r1, #0
 ; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movge r0, r6
 ; CHECK-NEON-NEXT:    movwlt r2, #1
 ; CHECK-NEON-NEXT:    cmp r2, #0
 ; CHECK-NEON-NEXT:    movne r2, r1
+; CHECK-NEON-NEXT:    moveq r0, r7
 ; CHECK-NEON-NEXT:    rsbs r1, r0, #0
 ; CHECK-NEON-NEXT:    rscs r1, r2, #0
-; CHECK-NEON-NEXT:    movwlt r7, #1
-; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    movne r7, r0
+; CHECK-NEON-NEXT:    movwlt r6, #1
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    movne r6, r0
 ; CHECK-NEON-NEXT:    mov r0, r8
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    subs r2, r0, r6
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r7
+; CHECK-NEON-NEXT:    subs r2, r0, r7
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r6
 ; CHECK-NEON-NEXT:    sbcs r2, r1, #0
-; CHECK-NEON-NEXT:    movlt r6, r0
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    movwlt r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    movne r0, r1
-; CHECK-NEON-NEXT:    rsbs r1, r6, #0
-; CHECK-NEON-NEXT:    rscs r0, r0, #0
+; CHECK-NEON-NEXT:    mov r2, #0
 ; CHECK-NEON-NEXT:    vmov.32 d0[0], r5
+; CHECK-NEON-NEXT:    movwlt r2, #1
+; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    movne r2, r1
+; CHECK-NEON-NEXT:    movne r7, r0
+; CHECK-NEON-NEXT:    rsbs r0, r7, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEON-NEXT:    rscs r0, r2, #0
 ; CHECK-NEON-NEXT:    movwlt r10, #1
 ; CHECK-NEON-NEXT:    cmp r10, #0
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    movne r10, r6
+; CHECK-NEON-NEXT:    movne r10, r7
 ; CHECK-NEON-NEXT:    vmov.32 d0[1], r10
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -3023,75 +3054,75 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-FP16-NEXT:    vpush {d8, d9}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[3]
 ; CHECK-FP16-NEXT:    vorr d8, d0, d0
-; CHECK-FP16-NEXT:    vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT:    vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT:    vmov.u16 r7, d0[2]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
 ; CHECK-FP16-NEXT:    vmov.u16 r2, d8[1]
-; CHECK-FP16-NEXT:    mvn r4, #0
-; CHECK-FP16-NEXT:    vmov.u16 r3, d8[2]
-; CHECK-FP16-NEXT:    vmov s0, r5
+; CHECK-FP16-NEXT:    mvn r5, #0
+; CHECK-FP16-NEXT:    vmov s0, r6
 ; CHECK-FP16-NEXT:    mov r6, #0
 ; CHECK-FP16-NEXT:    mov r8, #0
+; CHECK-FP16-NEXT:    vmov s18, r7
 ; CHECK-FP16-NEXT:    vmov s16, r2
-; CHECK-FP16-NEXT:    subs r2, r0, r4
+; CHECK-FP16-NEXT:    subs r2, r0, r5
 ; CHECK-FP16-NEXT:    sbcs r2, r1, #0
-; CHECK-FP16-NEXT:    vmov s18, r3
 ; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movge r0, r4
 ; CHECK-FP16-NEXT:    movwlt r2, #1
 ; CHECK-FP16-NEXT:    cmp r2, #0
 ; CHECK-FP16-NEXT:    movne r2, r1
+; CHECK-FP16-NEXT:    moveq r0, r5
 ; CHECK-FP16-NEXT:    rsbs r1, r0, #0
 ; CHECK-FP16-NEXT:    rscs r1, r2, #0
 ; CHECK-FP16-NEXT:    movwlt r6, #1
 ; CHECK-FP16-NEXT:    cmp r6, #0
 ; CHECK-FP16-NEXT:    movne r6, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    subs r2, r0, r4
+; CHECK-FP16-NEXT:    subs r2, r0, r5
 ; CHECK-FP16-NEXT:    vmov.f32 s0, s18
 ; CHECK-FP16-NEXT:    sbcs r2, r1, #0
 ; CHECK-FP16-NEXT:    mov r7, #0
 ; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movge r0, r4
 ; CHECK-FP16-NEXT:    movwlt r2, #1
 ; CHECK-FP16-NEXT:    cmp r2, #0
 ; CHECK-FP16-NEXT:    movne r2, r1
+; CHECK-FP16-NEXT:    moveq r0, r5
 ; CHECK-FP16-NEXT:    rsbs r1, r0, #0
 ; CHECK-FP16-NEXT:    rscs r1, r2, #0
 ; CHECK-FP16-NEXT:    movwlt r7, #1
 ; CHECK-FP16-NEXT:    cmp r7, #0
 ; CHECK-FP16-NEXT:    movne r7, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    subs r2, r0, r4
+; CHECK-FP16-NEXT:    subs r2, r0, r5
 ; CHECK-FP16-NEXT:    vmov.f32 s0, s16
 ; CHECK-FP16-NEXT:    sbcs r2, r1, #0
-; CHECK-FP16-NEXT:    mov r5, #0
+; CHECK-FP16-NEXT:    mov r4, #0
 ; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movge r0, r4
 ; CHECK-FP16-NEXT:    movwlt r2, #1
 ; CHECK-FP16-NEXT:    cmp r2, #0
 ; CHECK-FP16-NEXT:    movne r2, r1
+; CHECK-FP16-NEXT:    moveq r0, r5
 ; CHECK-FP16-NEXT:    rsbs r1, r0, #0
 ; CHECK-FP16-NEXT:    rscs r1, r2, #0
-; CHECK-FP16-NEXT:    movwlt r5, #1
-; CHECK-FP16-NEXT:    cmp r5, #0
-; CHECK-FP16-NEXT:    movne r5, r0
+; CHECK-FP16-NEXT:    movwlt r4, #1
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    movne r4, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    subs r2, r0, r4
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
+; CHECK-FP16-NEXT:    subs r2, r0, r5
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r4
 ; CHECK-FP16-NEXT:    sbcs r2, r1, #0
-; CHECK-FP16-NEXT:    movlt r4, r0
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    movwlt r0, #1
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    movne r0, r1
-; CHECK-FP16-NEXT:    rsbs r1, r4, #0
-; CHECK-FP16-NEXT:    rscs r0, r0, #0
+; CHECK-FP16-NEXT:    mov r2, #0
 ; CHECK-FP16-NEXT:    vmov.32 d0[0], r7
+; CHECK-FP16-NEXT:    movwlt r2, #1
+; CHECK-FP16-NEXT:    cmp r2, #0
+; CHECK-FP16-NEXT:    movne r2, r1
+; CHECK-FP16-NEXT:    movne r5, r0
+; CHECK-FP16-NEXT:    rsbs r0, r5, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r6
+; CHECK-FP16-NEXT:    rscs r0, r2, #0
 ; CHECK-FP16-NEXT:    movwlt r8, #1
 ; CHECK-FP16-NEXT:    cmp r8, #0
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r6
-; CHECK-FP16-NEXT:    movne r8, r4
+; CHECK-FP16-NEXT:    movne r8, r5
 ; CHECK-FP16-NEXT:    vmov.32 d0[1], r8
 ; CHECK-FP16-NEXT:    vpop {d8, d9}
 ; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, pc}
@@ -3599,56 +3630,59 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mvn r9, #0
-; CHECK-NEXT:    subs r1, r0, r9
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r4, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mvn r8, #0
+; CHECK-NEXT:    subs r0, r0, r8
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    sbcs r0, r1, r6
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    sbcs r0, r2, #0
 ; CHECK-NEXT:    vorr d0, d8, d8
-; CHECK-NEXT:    sbcs r1, r2, #0
+; CHECK-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r8, #-2147483648
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    mov r10, #0
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r3, r1
-; CHECK-NEXT:    movne r1, r2
-; CHECK-NEXT:    moveq r4, r5
-; CHECK-NEXT:    moveq r0, r9
-; CHECK-NEXT:    rsbs r2, r0, #0
-; CHECK-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r1
-; CHECK-NEXT:    sbcs r1, r9, r3
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq r3, r0
+; CHECK-NEXT:    movne r0, r2
+; CHECK-NEXT:    moveq r10, r6
+; CHECK-NEXT:    moveq r5, r8
+; CHECK-NEXT:    rsbs r1, r5, #0
+; CHECK-NEXT:    rscs r1, r10, #-2147483648
+; CHECK-NEXT:    sbcs r0, r8, r0
+; CHECK-NEXT:    sbcs r0, r8, r3
 ; CHECK-NEXT:    movwlt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    movne r7, r0
-; CHECK-NEXT:    moveq r4, r8
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    subs r6, r0, r9
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    sbcs r6, r1, r5
-; CHECK-NEXT:    sbcs r6, r2, #0
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r3, r6
-; CHECK-NEXT:    movne r6, r2
-; CHECK-NEXT:    movne r5, r1
-; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    subs r4, r0, r8
+; CHECK-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEXT:    sbcs r4, r1, r6
+; CHECK-NEXT:    sbcs r4, r2, #0
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    movne r4, r2
+; CHECK-NEXT:    moveq r0, r8
 ; CHECK-NEXT:    rsbs r1, r0, #0
-; CHECK-NEXT:    rscs r1, r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r6
-; CHECK-NEXT:    sbcs r1, r9, r3
-; CHECK-NEXT:    movwlt r10, #1
-; CHECK-NEXT:    cmp r10, #0
-; CHECK-NEXT:    movne r10, r0
-; CHECK-NEXT:    moveq r5, r8
-; CHECK-NEXT:    vmov.32 d0[0], r10
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    rscs r1, r6, #-2147483648
+; CHECK-NEXT:    sbcs r1, r8, r4
+; CHECK-NEXT:    sbcs r1, r8, r3
+; CHECK-NEXT:    movwlt r9, #1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    mov r1, #-2147483648
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEXT:    moveq r10, r1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    vmov.32 d1[1], r10
+; CHECK-NEXT:    moveq r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
@@ -3662,36 +3696,39 @@ entry:
 define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    subs r0, r2, #1
 ; CHECK-NEXT:    vorr d0, d8, d8
-; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    sbcs r0, r3, #0
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    movwlo r7, #1
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movwlo r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r4, r6
-; CHECK-NEXT:    movne r6, r0
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    subs r2, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    vmov.32 d1[0], r5
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    movwlo r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    moveq r0, r5
-; CHECK-NEXT:    movne r5, r1
+; CHECK-NEXT:    movwlo r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r6
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r4
 ; CHECK-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vmov.32 d1[1], r7
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 entry:
   %conv = fptoui <2 x double> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3702,45 +3739,49 @@ entry:
 define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
-; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    subs r0, r2, #1
+; CHECK-NEXT:    vorr d0, d9, d9
 ; CHECK-NEXT:    sbcs r0, r3, #0
-; CHECK-NEXT:    vorr d0, d8, d8
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    moveq r5, r0
-; CHECK-NEXT:    moveq r4, r0
-; CHECK-NEXT:    movne r0, r3
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movwmi r4, #0
-; CHECK-NEXT:    movwmi r5, #0
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    movwlt r7, #1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    moveq r6, r7
+; CHECK-NEXT:    moveq r5, r7
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwmi r6, #0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    subs r2, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEXT:    vmov.32 d0[0], r6
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r1, r6
-; CHECK-NEXT:    moveq r0, r6
-; CHECK-NEXT:    movne r6, r3
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r0, r4
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movwmi r0, #0
-; CHECK-NEXT:    movwmi r1, #0
-; CHECK-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movne r4, r1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    vmov.32 d1[0], r0
+; CHECK-NEXT:    movwmi r4, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r8
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r1
+; CHECK-NEXT:    movwmi r7, #0
+; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3759,56 +3800,59 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    vmov.f64 d8, d0
 ; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mvn r9, #0
-; CHECK-NEXT:    subs r1, r0, r9
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r4, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mvn r8, #0
+; CHECK-NEXT:    subs r0, r0, r8
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    sbcs r0, r1, r6
 ; CHECK-NEXT:    vmov.f32 s0, s16
-; CHECK-NEXT:    sbcs r1, r2, #0
+; CHECK-NEXT:    sbcs r0, r2, #0
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r8, #-2147483648
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    mov r10, #0
-; CHECK-NEXT:    movwlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r3, r1
-; CHECK-NEXT:    movne r1, r2
-; CHECK-NEXT:    moveq r4, r5
-; CHECK-NEXT:    moveq r0, r9
-; CHECK-NEXT:    rsbs r2, r0, #0
-; CHECK-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r1
-; CHECK-NEXT:    sbcs r1, r9, r3
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    movwlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq r3, r0
+; CHECK-NEXT:    movne r0, r2
+; CHECK-NEXT:    moveq r10, r6
+; CHECK-NEXT:    moveq r5, r8
+; CHECK-NEXT:    rsbs r1, r5, #0
+; CHECK-NEXT:    rscs r1, r10, #-2147483648
+; CHECK-NEXT:    sbcs r0, r8, r0
+; CHECK-NEXT:    sbcs r0, r8, r3
 ; CHECK-NEXT:    movwlt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    movne r7, r0
-; CHECK-NEXT:    moveq r4, r8
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs r6, r0, r9
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    sbcs r6, r1, r5
-; CHECK-NEXT:    sbcs r6, r2, #0
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r3, r6
-; CHECK-NEXT:    movne r6, r2
-; CHECK-NEXT:    movne r5, r1
-; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    subs r4, r0, r8
+; CHECK-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEXT:    sbcs r4, r1, r6
+; CHECK-NEXT:    sbcs r4, r2, #0
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    movne r4, r2
+; CHECK-NEXT:    moveq r0, r8
 ; CHECK-NEXT:    rsbs r1, r0, #0
-; CHECK-NEXT:    rscs r1, r5, #-2147483648
-; CHECK-NEXT:    sbcs r1, r9, r6
-; CHECK-NEXT:    sbcs r1, r9, r3
-; CHECK-NEXT:    movwlt r10, #1
-; CHECK-NEXT:    cmp r10, #0
-; CHECK-NEXT:    movne r10, r0
-; CHECK-NEXT:    moveq r5, r8
-; CHECK-NEXT:    vmov.32 d0[0], r10
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    rscs r1, r6, #-2147483648
+; CHECK-NEXT:    sbcs r1, r8, r4
+; CHECK-NEXT:    sbcs r1, r8, r3
+; CHECK-NEXT:    movwlt r9, #1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    moveq r0, r9
+; CHECK-NEXT:    mov r1, #-2147483648
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEXT:    moveq r10, r1
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    vmov.32 d1[1], r10
+; CHECK-NEXT:    moveq r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
@@ -3822,36 +3866,39 @@ entry:
 define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: utest_f32i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    .vsave {d8}
 ; CHECK-NEXT:    vpush {d8}
 ; CHECK-NEXT:    vmov.f64 d8, d0
 ; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vmov.f32 s0, s16
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    subs r0, r2, #1
+; CHECK-NEXT:    mov r7, #0
+; CHECK-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    movwlo r7, #1
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    sbcs r1, r3, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movwlo r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r4, r6
-; CHECK-NEXT:    movne r6, r0
+; CHECK-NEXT:    moveq r5, r7
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    subs r2, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    vmov.32 d1[0], r5
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    movwlo r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    moveq r0, r5
-; CHECK-NEXT:    movne r5, r1
+; CHECK-NEXT:    movwlo r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    moveq r0, r6
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r4
 ; CHECK-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r5
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vmov.32 d1[1], r7
+; CHECK-NEXT:    movne r6, r1
+; CHECK-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEXT:    vpop {d8}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 entry:
   %conv = fptoui <2 x float> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3862,45 +3909,49 @@ entry:
 define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: ustest_f32i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    .vsave {d8}
 ; CHECK-NEXT:    vpush {d8}
 ; CHECK-NEXT:    vmov.f64 d8, d0
-; CHECK-NEXT:    vmov.f32 s0, s17
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vmov.f32 s0, s16
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov.f32 s0, s17
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    subs r0, r2, #1
-; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r7, #0
 ; CHECK-NEXT:    sbcs r0, r3, #0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    moveq r5, r0
-; CHECK-NEXT:    moveq r4, r0
-; CHECK-NEXT:    movne r0, r3
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    movwmi r4, #0
-; CHECK-NEXT:    movwmi r5, #0
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    movwlt r7, #1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    moveq r6, r7
+; CHECK-NEXT:    moveq r5, r7
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwmi r6, #0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    subs r2, r2, #1
-; CHECK-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEXT:    vmov.32 d0[0], r6
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    movwlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    moveq r1, r6
-; CHECK-NEXT:    moveq r0, r6
-; CHECK-NEXT:    movne r6, r3
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movwlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    moveq r0, r4
+; CHECK-NEXT:    moveq r3, r4
+; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    movwmi r0, #0
-; CHECK-NEXT:    movwmi r1, #0
-; CHECK-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movne r4, r1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    vmov.32 d1[0], r0
+; CHECK-NEXT:    movwmi r4, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    movne r7, r8
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    vmov.32 d0[1], r1
+; CHECK-NEXT:    movwmi r7, #0
+; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    vpop {d8}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3912,8 +3963,10 @@ entry:
 define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-LABEL: stest_f16i64_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT:    .pad #4
+; CHECK-NEON-NEXT:    sub sp, sp, #4
 ; CHECK-NEON-NEXT:    .vsave {d8}
 ; CHECK-NEON-NEXT:    vpush {d8}
 ; CHECK-NEON-NEXT:    vmov r0, s0
@@ -3924,58 +3977,62 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    mov r4, r1
+; CHECK-NEON-NEXT:    mov r5, r0
 ; CHECK-NEON-NEXT:    mvn r9, #0
-; CHECK-NEON-NEXT:    subs r1, r0, r9
-; CHECK-NEON-NEXT:    mvn r6, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r4, r6
+; CHECK-NEON-NEXT:    subs r0, r0, r9
+; CHECK-NEON-NEXT:    mvn r7, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r0, r1, r7
+; CHECK-NEON-NEXT:    mov r11, r1
+; CHECK-NEON-NEXT:    sbcs r0, r2, #0
 ; CHECK-NEON-NEXT:    vmov s0, r8
-; CHECK-NEON-NEXT:    sbcs r1, r2, #0
-; CHECK-NEON-NEXT:    mov r5, #0
-; CHECK-NEON-NEXT:    sbcs r1, r3, #0
-; CHECK-NEON-NEXT:    mov r8, #-2147483648
-; CHECK-NEON-NEXT:    mov r1, #0
+; CHECK-NEON-NEXT:    sbcs r0, r3, #0
+; CHECK-NEON-NEXT:    mov r6, #0
+; CHECK-NEON-NEXT:    mov r0, #0
 ; CHECK-NEON-NEXT:    mov r10, #0
-; CHECK-NEON-NEXT:    movwlt r1, #1
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r3, r1
-; CHECK-NEON-NEXT:    movne r1, r2
-; CHECK-NEON-NEXT:    moveq r4, r6
-; CHECK-NEON-NEXT:    moveq r0, r9
-; CHECK-NEON-NEXT:    rsbs r2, r0, #0
-; CHECK-NEON-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r9, r1
-; CHECK-NEON-NEXT:    sbcs r1, r9, r3
-; CHECK-NEON-NEXT:    movwlt r5, #1
-; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    movne r5, r0
-; CHECK-NEON-NEXT:    moveq r4, r8
+; CHECK-NEON-NEXT:    movwlt r0, #1
+; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    moveq r3, r0
+; CHECK-NEON-NEXT:    movne r0, r2
+; CHECK-NEON-NEXT:    moveq r11, r7
+; CHECK-NEON-NEXT:    moveq r5, r9
+; CHECK-NEON-NEXT:    rsbs r1, r5, #0
+; CHECK-NEON-NEXT:    rscs r1, r11, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r0, r9, r0
+; CHECK-NEON-NEXT:    sbcs r0, r9, r3
+; CHECK-NEON-NEXT:    movwlt r6, #1
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    moveq r5, r6
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    subs r7, r0, r9
+; CHECK-NEON-NEXT:    subs r4, r0, r9
 ; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
-; CHECK-NEON-NEXT:    sbcs r7, r1, r6
-; CHECK-NEON-NEXT:    sbcs r7, r2, #0
-; CHECK-NEON-NEXT:    sbcs r7, r3, #0
-; CHECK-NEON-NEXT:    mov r7, #0
-; CHECK-NEON-NEXT:    movwlt r7, #1
-; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    moveq r3, r7
-; CHECK-NEON-NEXT:    movne r7, r2
-; CHECK-NEON-NEXT:    movne r6, r1
+; CHECK-NEON-NEXT:    sbcs r4, r1, r7
+; CHECK-NEON-NEXT:    sbcs r4, r2, #0
+; CHECK-NEON-NEXT:    sbcs r4, r3, #0
+; CHECK-NEON-NEXT:    mov r4, #0
+; CHECK-NEON-NEXT:    movwlt r4, #1
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    moveq r3, r4
+; CHECK-NEON-NEXT:    movne r7, r1
+; CHECK-NEON-NEXT:    movne r4, r2
 ; CHECK-NEON-NEXT:    moveq r0, r9
 ; CHECK-NEON-NEXT:    rsbs r1, r0, #0
-; CHECK-NEON-NEXT:    rscs r1, r6, #-2147483648
-; CHECK-NEON-NEXT:    sbcs r1, r9, r7
+; CHECK-NEON-NEXT:    rscs r1, r7, #-2147483648
+; CHECK-NEON-NEXT:    sbcs r1, r9, r4
 ; CHECK-NEON-NEXT:    sbcs r1, r9, r3
 ; CHECK-NEON-NEXT:    movwlt r10, #1
 ; CHECK-NEON-NEXT:    cmp r10, #0
-; CHECK-NEON-NEXT:    movne r10, r0
-; CHECK-NEON-NEXT:    moveq r6, r8
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r10
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEON-NEXT:    moveq r0, r10
+; CHECK-NEON-NEXT:    mov r1, #-2147483648
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEON-NEXT:    moveq r11, r1
+; CHECK-NEON-NEXT:    cmp r10, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r11
+; CHECK-NEON-NEXT:    moveq r7, r1
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEON-NEXT:    add sp, sp, #4
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-FP16-LABEL: stest_f16i64_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
@@ -3985,56 +4042,59 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-FP16-NEXT:    vmov.u16 r7, d0[0]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    mov r4, r1
-; CHECK-FP16-NEXT:    mvn r9, #0
-; CHECK-FP16-NEXT:    subs r1, r0, r9
-; CHECK-FP16-NEXT:    mvn r5, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r4, r5
+; CHECK-FP16-NEXT:    mov r5, r0
+; CHECK-FP16-NEXT:    mvn r8, #0
+; CHECK-FP16-NEXT:    subs r0, r0, r8
+; CHECK-FP16-NEXT:    mvn r6, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r1, r6
+; CHECK-FP16-NEXT:    mov r10, r1
+; CHECK-FP16-NEXT:    sbcs r0, r2, #0
 ; CHECK-FP16-NEXT:    vmov s0, r7
-; CHECK-FP16-NEXT:    sbcs r1, r2, #0
+; CHECK-FP16-NEXT:    sbcs r0, r3, #0
 ; CHECK-FP16-NEXT:    mov r7, #0
-; CHECK-FP16-NEXT:    sbcs r1, r3, #0
-; CHECK-FP16-NEXT:    mov r8, #-2147483648
-; CHECK-FP16-NEXT:    mov r1, #0
-; CHECK-FP16-NEXT:    mov r10, #0
-; CHECK-FP16-NEXT:    movwlt r1, #1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r3, r1
-; CHECK-FP16-NEXT:    movne r1, r2
-; CHECK-FP16-NEXT:    moveq r4, r5
-; CHECK-FP16-NEXT:    moveq r0, r9
-; CHECK-FP16-NEXT:    rsbs r2, r0, #0
-; CHECK-FP16-NEXT:    rscs r2, r4, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r9, r1
-; CHECK-FP16-NEXT:    sbcs r1, r9, r3
+; CHECK-FP16-NEXT:    mov r0, #0
+; CHECK-FP16-NEXT:    mov r9, #0
+; CHECK-FP16-NEXT:    movwlt r0, #1
+; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    moveq r3, r0
+; CHECK-FP16-NEXT:    movne r0, r2
+; CHECK-FP16-NEXT:    moveq r10, r6
+; CHECK-FP16-NEXT:    moveq r5, r8
+; CHECK-FP16-NEXT:    rsbs r1, r5, #0
+; CHECK-FP16-NEXT:    rscs r1, r10, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r0, r8, r0
+; CHECK-FP16-NEXT:    sbcs r0, r8, r3
 ; CHECK-FP16-NEXT:    movwlt r7, #1
 ; CHECK-FP16-NEXT:    cmp r7, #0
-; CHECK-FP16-NEXT:    movne r7, r0
-; CHECK-FP16-NEXT:    moveq r4, r8
+; CHECK-FP16-NEXT:    moveq r5, r7
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    subs r6, r0, r9
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r7
-; CHECK-FP16-NEXT:    sbcs r6, r1, r5
-; CHECK-FP16-NEXT:    sbcs r6, r2, #0
-; CHECK-FP16-NEXT:    sbcs r6, r3, #0
-; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    movwlt r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    moveq r3, r6
-; CHECK-FP16-NEXT:    movne r6, r2
-; CHECK-FP16-NEXT:    movne r5, r1
-; CHECK-FP16-NEXT:    moveq r0, r9
+; CHECK-FP16-NEXT:    subs r4, r0, r8
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
+; CHECK-FP16-NEXT:    sbcs r4, r1, r6
+; CHECK-FP16-NEXT:    sbcs r4, r2, #0
+; CHECK-FP16-NEXT:    sbcs r4, r3, #0
+; CHECK-FP16-NEXT:    mov r4, #0
+; CHECK-FP16-NEXT:    movwlt r4, #1
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    moveq r3, r4
+; CHECK-FP16-NEXT:    movne r6, r1
+; CHECK-FP16-NEXT:    movne r4, r2
+; CHECK-FP16-NEXT:    moveq r0, r8
 ; CHECK-FP16-NEXT:    rsbs r1, r0, #0
-; CHECK-FP16-NEXT:    rscs r1, r5, #-2147483648
-; CHECK-FP16-NEXT:    sbcs r1, r9, r6
-; CHECK-FP16-NEXT:    sbcs r1, r9, r3
-; CHECK-FP16-NEXT:    movwlt r10, #1
-; CHECK-FP16-NEXT:    cmp r10, #0
-; CHECK-FP16-NEXT:    movne r10, r0
-; CHECK-FP16-NEXT:    moveq r5, r8
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r10
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
-; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
+; CHECK-FP16-NEXT:    rscs r1, r6, #-2147483648
+; CHECK-FP16-NEXT:    sbcs r1, r8, r4
+; CHECK-FP16-NEXT:    sbcs r1, r8, r3
+; CHECK-FP16-NEXT:    movwlt r9, #1
+; CHECK-FP16-NEXT:    cmp r9, #0
+; CHECK-FP16-NEXT:    moveq r0, r9
+; CHECK-FP16-NEXT:    mov r1, #-2147483648
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r0
+; CHECK-FP16-NEXT:    moveq r10, r1
+; CHECK-FP16-NEXT:    cmp r9, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r10
+; CHECK-FP16-NEXT:    moveq r6, r1
+; CHECK-FP16-NEXT:    vmov.32 d0[1], r6
 ; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptosi <2 x half> %x to <2 x i128>
@@ -4047,8 +4107,8 @@ entry:
 define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-LABEL: utesth_f16i64_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8}
 ; CHECK-NEON-NEXT:    vpush {d8}
 ; CHECK-NEON-NEXT:    vmov r0, s0
@@ -4059,60 +4119,66 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixunssfti
-; CHECK-NEON-NEXT:    mov r4, r1
-; CHECK-NEON-NEXT:    subs r1, r2, #1
+; CHECK-NEON-NEXT:    mov r6, r0
+; CHECK-NEON-NEXT:    subs r0, r2, #1
 ; CHECK-NEON-NEXT:    vmov s0, r5
-; CHECK-NEON-NEXT:    sbcs r1, r3, #0
+; CHECK-NEON-NEXT:    sbcs r0, r3, #0
 ; CHECK-NEON-NEXT:    mov r5, #0
-; CHECK-NEON-NEXT:    mov r6, #0
+; CHECK-NEON-NEXT:    mov r4, r1
 ; CHECK-NEON-NEXT:    movwlo r5, #1
 ; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    moveq r4, r5
-; CHECK-NEON-NEXT:    movne r5, r0
+; CHECK-NEON-NEXT:    mov r7, #0
+; CHECK-NEON-NEXT:    moveq r6, r5
 ; CHECK-NEON-NEXT:    bl __fixunssfti
 ; CHECK-NEON-NEXT:    subs r2, r2, #1
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r6
 ; CHECK-NEON-NEXT:    sbcs r2, r3, #0
-; CHECK-NEON-NEXT:    movwlo r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    moveq r0, r6
-; CHECK-NEON-NEXT:    movne r6, r1
+; CHECK-NEON-NEXT:    movwlo r7, #1
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    moveq r0, r7
+; CHECK-NEON-NEXT:    cmp r5, #0
+; CHECK-NEON-NEXT:    movne r5, r4
 ; CHECK-NEON-NEXT:    vmov.32 d0[0], r0
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r5
+; CHECK-NEON-NEXT:    movne r7, r1
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
 ; CHECK-FP16-LABEL: utesth_f16i64_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, lr}
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT:    vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT:    vmov.u16 r7, d0[0]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixunshfti
+; CHECK-FP16-NEXT:    mov r5, r0
+; CHECK-FP16-NEXT:    subs r0, r2, #1
+; CHECK-FP16-NEXT:    vmov s0, r7
+; CHECK-FP16-NEXT:    sbcs r0, r3, #0
+; CHECK-FP16-NEXT:    mov r7, #0
 ; CHECK-FP16-NEXT:    mov r4, r1
-; CHECK-FP16-NEXT:    subs r1, r2, #1
-; CHECK-FP16-NEXT:    vmov s0, r6
-; CHECK-FP16-NEXT:    sbcs r1, r3, #0
+; CHECK-FP16-NEXT:    movwlo r7, #1
+; CHECK-FP16-NEXT:    cmp r7, #0
 ; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    mov r5, #0
-; CHECK-FP16-NEXT:    movwlo r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    moveq r4, r6
-; CHECK-FP16-NEXT:    movne r6, r0
+; CHECK-FP16-NEXT:    moveq r5, r7
 ; CHECK-FP16-NEXT:    bl __fixunshfti
 ; CHECK-FP16-NEXT:    subs r2, r2, #1
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r6
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
 ; CHECK-FP16-NEXT:    sbcs r2, r3, #0
-; CHECK-FP16-NEXT:    movwlo r5, #1
-; CHECK-FP16-NEXT:    cmp r5, #0
-; CHECK-FP16-NEXT:    moveq r0, r5
-; CHECK-FP16-NEXT:    movne r5, r1
+; CHECK-FP16-NEXT:    movwlo r6, #1
+; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    moveq r0, r6
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    movne r7, r4
 ; CHECK-FP16-NEXT:    vmov.32 d0[0], r0
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
-; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r7
+; CHECK-FP16-NEXT:    movne r6, r1
+; CHECK-FP16-NEXT:    vmov.32 d0[1], r6
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 entry:
   %conv = fptoui <2 x half> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -4123,90 +4189,100 @@ entry:
 define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-LABEL: ustest_f16i64_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8}
 ; CHECK-NEON-NEXT:    vpush {d8}
-; CHECK-NEON-NEXT:    vmov r0, s0
-; CHECK-NEON-NEXT:    vmov.f32 s16, s1
+; CHECK-NEON-NEXT:    vmov r0, s1
+; CHECK-NEON-NEXT:    vmov.f32 s16, s0
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
-; CHECK-NEON-NEXT:    mov r6, r0
+; CHECK-NEON-NEXT:    mov r7, r0
 ; CHECK-NEON-NEXT:    vmov r0, s16
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    vmov s0, r0
 ; CHECK-NEON-NEXT:    bl __fixsfti
-; CHECK-NEON-NEXT:    mov r5, r0
+; CHECK-NEON-NEXT:    mov r6, r0
 ; CHECK-NEON-NEXT:    subs r0, r2, #1
+; CHECK-NEON-NEXT:    vmov s0, r7
 ; CHECK-NEON-NEXT:    sbcs r0, r3, #0
-; CHECK-NEON-NEXT:    vmov s0, r6
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    mov r4, r1
-; CHECK-NEON-NEXT:    movwlt r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    moveq r5, r0
-; CHECK-NEON-NEXT:    moveq r4, r0
-; CHECK-NEON-NEXT:    movne r0, r3
-; CHECK-NEON-NEXT:    cmp r0, #0
 ; CHECK-NEON-NEXT:    mov r7, #0
-; CHECK-NEON-NEXT:    movwmi r4, #0
-; CHECK-NEON-NEXT:    movwmi r5, #0
+; CHECK-NEON-NEXT:    mov r5, r3
+; CHECK-NEON-NEXT:    movwlt r7, #1
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    moveq r6, r7
+; CHECK-NEON-NEXT:    moveq r5, r7
+; CHECK-NEON-NEXT:    cmp r5, #0
+; CHECK-NEON-NEXT:    mov r8, r1
+; CHECK-NEON-NEXT:    mov r4, #0
+; CHECK-NEON-NEXT:    movwmi r6, #0
 ; CHECK-NEON-NEXT:    bl __fixsfti
 ; CHECK-NEON-NEXT:    subs r2, r2, #1
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r6
 ; CHECK-NEON-NEXT:    sbcs r2, r3, #0
-; CHECK-NEON-NEXT:    movwlt r7, #1
-; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    moveq r1, r7
-; CHECK-NEON-NEXT:    moveq r0, r7
-; CHECK-NEON-NEXT:    movne r7, r3
-; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    movwlt r4, #1
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    moveq r0, r4
+; CHECK-NEON-NEXT:    moveq r3, r4
+; CHECK-NEON-NEXT:    cmp r3, #0
 ; CHECK-NEON-NEXT:    movwmi r0, #0
-; CHECK-NEON-NEXT:    movwmi r1, #0
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r0
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    movne r4, r1
+; CHECK-NEON-NEXT:    cmp r3, #0
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r0
+; CHECK-NEON-NEXT:    movwmi r4, #0
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    movne r7, r8
+; CHECK-NEON-NEXT:    cmp r5, #0
 ; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    vmov.32 d0[1], r1
+; CHECK-NEON-NEXT:    movwmi r7, #0
+; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8}
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 ;
 ; CHECK-FP16-LABEL: ustest_f16i64_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; CHECK-FP16-NEXT:    vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT:    vmov.u16 r7, d0[0]
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT:    vmov.u16 r0, d0[0]
+; CHECK-FP16-NEXT:    vmov.u16 r7, d0[1]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfti
-; CHECK-FP16-NEXT:    mov r5, r0
+; CHECK-FP16-NEXT:    mov r6, r0
 ; CHECK-FP16-NEXT:    subs r0, r2, #1
-; CHECK-FP16-NEXT:    sbcs r0, r3, #0
 ; CHECK-FP16-NEXT:    vmov s0, r7
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    mov r4, r1
-; CHECK-FP16-NEXT:    movwlt r0, #1
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    moveq r5, r0
-; CHECK-FP16-NEXT:    moveq r4, r0
-; CHECK-FP16-NEXT:    movne r0, r3
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    movwmi r4, #0
-; CHECK-FP16-NEXT:    movwmi r5, #0
+; CHECK-FP16-NEXT:    sbcs r0, r3, #0
+; CHECK-FP16-NEXT:    mov r7, #0
+; CHECK-FP16-NEXT:    mov r5, r3
+; CHECK-FP16-NEXT:    movwlt r7, #1
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    moveq r6, r7
+; CHECK-FP16-NEXT:    moveq r5, r7
+; CHECK-FP16-NEXT:    cmp r5, #0
+; CHECK-FP16-NEXT:    mov r8, r1
+; CHECK-FP16-NEXT:    mov r4, #0
+; CHECK-FP16-NEXT:    movwmi r6, #0
 ; CHECK-FP16-NEXT:    bl __fixhfti
 ; CHECK-FP16-NEXT:    subs r2, r2, #1
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r6
 ; CHECK-FP16-NEXT:    sbcs r2, r3, #0
-; CHECK-FP16-NEXT:    movwlt r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    moveq r1, r6
-; CHECK-FP16-NEXT:    moveq r0, r6
-; CHECK-FP16-NEXT:    movne r6, r3
-; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    movwlt r4, #1
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    moveq r0, r4
+; CHECK-FP16-NEXT:    moveq r3, r4
+; CHECK-FP16-NEXT:    cmp r3, #0
 ; CHECK-FP16-NEXT:    movwmi r0, #0
-; CHECK-FP16-NEXT:    movwmi r1, #0
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r0
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    movne r4, r1
+; CHECK-FP16-NEXT:    cmp r3, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r0
+; CHECK-FP16-NEXT:    movwmi r4, #0
+; CHECK-FP16-NEXT:    cmp r7, #0
+; CHECK-FP16-NEXT:    movne r7, r8
+; CHECK-FP16-NEXT:    cmp r5, #0
 ; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
-; CHECK-FP16-NEXT:    vmov.32 d0[1], r1
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-FP16-NEXT:    movwmi r7, #0
+; CHECK-FP16-NEXT:    vmov.32 d0[1], r7
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <2 x half> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)

diff  --git a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll
index c59c9824a902cf..3e77ad65df9927 100644
--- a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll
+++ b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll
@@ -1,51 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck --check-prefix=THUMB %s
 
 declare double @fn()
 
 define void @test(ptr %p, ptr %res) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    push {r4, r5, r11, lr}
-; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vpush {d8}
+; CHECK-NEXT:    vldr d8, [r0]
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    vcmp.f64 d16, #0
+; CHECK-NEXT:    vcmp.f64 d8, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mrs r5, apsr
-; CHECK-NEXT:    vneg.f64 d17, d16
-; CHECK-NEXT:    vmovne.f64 d16, d17
-; CHECK-NEXT:    vstr d16, [r1]
+; CHECK-NEXT:    vneg.f64 d16, d8
+; CHECK-NEXT:    vmov.f64 d17, d8
+; CHECK-NEXT:    vmovne.f64 d17, d16
+; CHECK-NEXT:    vstr d17, [r1]
 ; CHECK-NEXT:    bl fn
+; CHECK-NEXT:    vcmp.f64 d8, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vmov d16, r0, r1
 ; CHECK-NEXT:    eor r1, r1, #-2147483648
-; CHECK-NEXT:    msr APSR_nzcvq, r5
 ; CHECK-NEXT:    vmov d17, r0, r1
 ; CHECK-NEXT:    vmovne.f64 d16, d17
 ; CHECK-NEXT:    vstr d16, [r4]
-; CHECK-NEXT:    pop {r4, r5, r11, pc}
-;
-; THUMB-LABEL: test:
-; THUMB:       @ %bb.0: @ %entry
-; THUMB-NEXT:    push {r4, r5, r7, lr}
-; THUMB-NEXT:    vldr d16, [r0]
-; THUMB-NEXT:    mov r4, r1
-; THUMB-NEXT:    vcmp.f64 d16, #0
-; THUMB-NEXT:    vmrs APSR_nzcv, fpscr
-; THUMB-NEXT:    mrs r5, apsr
-; THUMB-NEXT:    vneg.f64 d17, d16
-; THUMB-NEXT:    it ne
-; THUMB-NEXT:    vmovne.f64 d16, d17
-; THUMB-NEXT:    vstr d16, [r1]
-; THUMB-NEXT:    bl fn
-; THUMB-NEXT:    vmov d16, r0, r1
-; THUMB-NEXT:    eor r1, r1, #-2147483648
-; THUMB-NEXT:    msr APSR_nzcvq, r5
-; THUMB-NEXT:    vmov d17, r0, r1
-; THUMB-NEXT:    it ne
-; THUMB-NEXT:    vmovne.f64 d16, d17
-; THUMB-NEXT:    vstr d16, [r4]
-; THUMB-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    vpop {d8}
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %x = load double, ptr %p
   %cmp = fcmp une double %x, 0.000000e+00

diff  --git a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
index d895fe89a2cdc3..67edf9855f372f 100644
--- a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
+++ b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
@@ -75,62 +75,64 @@ define void @test_signed_i32_f64(ptr %d, double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r3
-; SOFT-NEXT:    mov r7, r2
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    ldr r3, .LCPI1_0
-; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    mov r6, r2
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    ldr r2, .LCPI1_0
+; SOFT-NEXT:    ldr r3, .LCPI1_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    ldr r3, .LCPI1_2
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_d2iz
 ; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB1_2
+; SOFT-NEXT:    bne .LBB1_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    b .LBB1_3
-; SOFT-NEXT:  .LBB1_2:
 ; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r4, r0, #31
-; SOFT-NEXT:  .LBB1_3:
-; SOFT-NEXT:    ldr r2, .LCPI1_1
-; SOFT-NEXT:    ldr r3, .LCPI1_2
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB1_5
-; SOFT-NEXT:  @ %bb.4:
+; SOFT-NEXT:    lsls r0, r0, #31
+; SOFT-NEXT:  .LBB1_2:
+; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB1_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    b .LBB1_5
+; SOFT-NEXT:  .LBB1_4:
 ; SOFT-NEXT:    ldr r4, .LCPI1_3
 ; SOFT-NEXT:  .LBB1_5:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    bl __aeabi_dcmpun
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    bne .LBB1_7
 ; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    mov r5, r4
+; SOFT-NEXT:    mov r7, r4
 ; SOFT-NEXT:  .LBB1_7:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    str r5, [r0]
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    str r7, [r0]
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.8:
 ; SOFT-NEXT:  .LCPI1_0:
-; SOFT-NEXT:    .long 3252682752 @ 0xc1e00000
-; SOFT-NEXT:  .LCPI1_1:
 ; SOFT-NEXT:    .long 4290772992 @ 0xffc00000
-; SOFT-NEXT:  .LCPI1_2:
+; SOFT-NEXT:  .LCPI1_1:
 ; SOFT-NEXT:    .long 1105199103 @ 0x41dfffff
+; SOFT-NEXT:  .LCPI1_2:
+; SOFT-NEXT:    .long 3252682752 @ 0xc1e00000
 ; SOFT-NEXT:  .LCPI1_3:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -159,31 +161,31 @@ define void @test_unsigned_i32_f32(ptr %d, float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r1
+; SOFT-NEXT:    mov r7, r1
 ; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    ldr r1, .LCPI2_0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_f2uiz
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __aeabi_f2uiz
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB2_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r7
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB2_2:
-; SOFT-NEXT:    ldr r1, .LCPI2_0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    beq .LBB2_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r4, r5
+; SOFT-NEXT:    mvns r0, r5
 ; SOFT-NEXT:  .LBB2_4:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    str r4, [r0]
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    str r0, [r1]
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -207,40 +209,41 @@ define void @test_unsigned_i32_f64(ptr %d, double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r3
-; SOFT-NEXT:    mov r7, r2
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r0, r2
-; SOFT-NEXT:    mov r1, r3
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    mov r5, r3
+; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    ldr r2, .LCPI3_0
+; SOFT-NEXT:    ldr r3, .LCPI3_1
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_d2uiz
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB3_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:  .LBB3_2:
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    ldr r2, .LCPI3_0
-; SOFT-NEXT:    ldr r3, .LCPI3_1
 ; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:  .LBB3_2:
+; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    beq .LBB3_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r4, r5
+; SOFT-NEXT:    mvns r0, r6
 ; SOFT-NEXT:  .LBB3_4:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    str r4, [r0]
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    str r0, [r1]
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:

diff  --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f976244892..84f6ee276ba5f1 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -468,10 +468,10 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
 ; VFP2-NEXT:    movlt r0, #0
 ; VFP2-NEXT:    movtlt r0, #65532
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP2-NEXT:    vcmp.f32 s0, s0
 ; VFP2-NEXT:    itt gt
 ; VFP2-NEXT:    movwgt r0, #65535
 ; VFP2-NEXT:    movtgt r0, #3
+; VFP2-NEXT:    vcmp.f32 s0, s0
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
 ; VFP2-NEXT:    it vs
 ; VFP2-NEXT:    movvs r0, #0
@@ -569,67 +569,71 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI6_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #27
 ; SOFT-NEXT:    lsls r1, r0, #27
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_f2lz
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __aeabi_f2lz
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB6_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB6_2:
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB6_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    b .LBB6_5
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB6_4:
-; SOFT-NEXT:    ldr r0, .LCPI6_0
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:  .LBB6_5:
-; SOFT-NEXT:    ldr r1, .LCPI6_1
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mvns r6, r7
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB6_7
-; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    ldr r0, .LCPI6_2
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:  .LBB6_7:
-; SOFT-NEXT:    bne .LBB6_9
-; SOFT-NEXT:  @ %bb.8:
-; SOFT-NEXT:    mov r6, r5
-; SOFT-NEXT:  .LBB6_9:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB6_11
-; SOFT-NEXT:  @ %bb.10:
 ; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    bne .LBB6_6
+; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:  .LBB6_6:
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB6_8
+; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB6_9
+; SOFT-NEXT:  .LBB6_8:
+; SOFT-NEXT:    ldr r3, .LCPI6_1
+; SOFT-NEXT:  .LBB6_9:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB6_11
+; SOFT-NEXT:  @ %bb.10:
+; SOFT-NEXT:    ldr r3, .LCPI6_2
 ; SOFT-NEXT:  .LBB6_11:
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB6_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:  .LBB6_13:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.14:
 ; SOFT-NEXT:  .LCPI6_0:
-; SOFT-NEXT:    .long 4294836224 @ 0xfffe0000
-; SOFT-NEXT:  .LCPI6_1:
 ; SOFT-NEXT:    .long 1476395007 @ 0x57ffffff
+; SOFT-NEXT:  .LCPI6_1:
+; SOFT-NEXT:    .long 4294836224 @ 0xfffe0000
 ; SOFT-NEXT:  .LCPI6_2:
 ; SOFT-NEXT:    .long 131071 @ 0x1ffff
 ;
@@ -650,12 +654,11 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
 ; VFP-NEXT:    movlt r0, #0
 ; VFP-NEXT:    vcmp.f32 s2, s4
 ; VFP-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP-NEXT:    it gt
-; VFP-NEXT:    movgt.w r0, #-1
-; VFP-NEXT:    vcmp.f32 s2, s2
-; VFP-NEXT:    itt gt
+; VFP-NEXT:    ittt gt
 ; VFP-NEXT:    movwgt r1, #65535
 ; VFP-NEXT:    movtgt r1, #1
+; VFP-NEXT:    movgt.w r0, #-1
+; VFP-NEXT:    vcmp.f32 s2, s2
 ; VFP-NEXT:    vmrs APSR_nzcv, fpscr
 ; VFP-NEXT:    itt vs
 ; VFP-NEXT:    movvs r0, #0
@@ -676,60 +679,68 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI7_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #223
 ; SOFT-NEXT:    lsls r1, r0, #24
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    movs r2, #1
-; SOFT-NEXT:    lsls r7, r2, #31
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB7_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB7_2:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB7_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB7_4:
-; SOFT-NEXT:    ldr r1, .LCPI7_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r5, r6
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB7_6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    bne .LBB7_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    ldr r7, .LCPI7_1
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB7_6:
-; SOFT-NEXT:    bne .LBB7_8
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB7_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB7_9
 ; SOFT-NEXT:  .LBB7_8:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB7_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB7_10:
-; SOFT-NEXT:    bne .LBB7_12
-; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r6, r7
-; SOFT-NEXT:  .LBB7_12:
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    lsls r3, r2, #31
+; SOFT-NEXT:  .LBB7_9:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB7_11
+; SOFT-NEXT:  @ %bb.10:
+; SOFT-NEXT:    ldr r3, .LCPI7_1
+; SOFT-NEXT:  .LBB7_11:
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB7_13
+; SOFT-NEXT:  @ %bb.12:
+; SOFT-NEXT:    mov r6, r3
+; SOFT-NEXT:  .LBB7_13:
 ; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:  @ %bb.14:
 ; SOFT-NEXT:  .LCPI7_0:
 ; SOFT-NEXT:    .long 1593835519 @ 0x5effffff
 ; SOFT-NEXT:  .LCPI7_1:
@@ -777,106 +788,110 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #20
 ; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI8_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #241
 ; SOFT-NEXT:    lsls r1, r0, #24
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    movs r5, #7
-; SOFT-NEXT:    str r5, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mvns r7, r5
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    beq .LBB8_17
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB8_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB8_18
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB8_2:
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB8_4
-; SOFT-NEXT:  .LBB8_3:
-; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB8_4:
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB8_6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bne .LBB8_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:  .LBB8_6:
-; SOFT-NEXT:    ldr r1, .LCPI8_0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r5, r6
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB8_19
+; SOFT-NEXT:  .LBB8_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB8_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    beq .LBB8_20
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB8_8:
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    beq .LBB8_21
-; SOFT-NEXT:  .LBB8_9:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB8_11
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    bne .LBB8_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB8_10:
-; SOFT-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB8_11:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    beq .LBB8_22
-; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB8_23
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB8_18
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB8_19
+; SOFT-NEXT:  .LBB8_12:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB8_14
 ; SOFT-NEXT:  .LBB8_13:
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    beq .LBB8_24
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB8_14:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    bne .LBB8_16
-; SOFT-NEXT:  .LBB8_15:
-; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:  @ %bb.15:
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB8_16:
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB8_17:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB8_2
+; SOFT-NEXT:    movs r4, #7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB8_20
+; SOFT-NEXT:  @ %bb.17:
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB8_21
 ; SOFT-NEXT:  .LBB8_18:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB8_3
-; SOFT-NEXT:    b .LBB8_4
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB8_12
 ; SOFT-NEXT:  .LBB8_19:
-; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bne .LBB8_8
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB8_13
+; SOFT-NEXT:    b .LBB8_14
 ; SOFT-NEXT:  .LBB8_20:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    bne .LBB8_9
+; SOFT-NEXT:    mvns r7, r4
 ; SOFT-NEXT:  .LBB8_21:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB8_10
-; SOFT-NEXT:    b .LBB8_11
-; SOFT-NEXT:  .LBB8_22:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bne .LBB8_13
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB8_23
+; SOFT-NEXT:  @ %bb.22:
+; SOFT-NEXT:    mov r4, r7
 ; SOFT-NEXT:  .LBB8_23:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bne .LBB8_14
-; SOFT-NEXT:  .LBB8_24:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB8_15
-; SOFT-NEXT:    b .LBB8_16
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB8_25
+; SOFT-NEXT:  @ %bb.24:
+; SOFT-NEXT:    mov r5, r4
+; SOFT-NEXT:  .LBB8_25:
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.25:
+; SOFT-NEXT:  @ %bb.26:
 ; SOFT-NEXT:  .LCPI8_0:
 ; SOFT-NEXT:    .long 1895825407 @ 0x70ffffff
 ;
@@ -928,104 +943,109 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #20
 ; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI9_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #255
 ; SOFT-NEXT:    lsls r1, r0, #24
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    movs r5, #1
-; SOFT-NEXT:    lsls r7, r5, #31
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    beq .LBB9_18
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB9_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB9_19
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB9_2:
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB9_4
-; SOFT-NEXT:  .LBB9_3:
-; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB9_4:
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB9_6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bne .LBB9_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:  .LBB9_6:
-; SOFT-NEXT:    ldr r1, .LCPI9_0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r5, r6
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB9_8
+; SOFT-NEXT:  .LBB9_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB9_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r7, .LCPI9_1
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB9_8:
-; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    beq .LBB9_20
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    bne .LBB9_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    beq .LBB9_21
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB9_10:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB9_12
-; SOFT-NEXT:  .LBB9_11:
-; SOFT-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB9_18
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB9_19
 ; SOFT-NEXT:  .LBB9_12:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    beq .LBB9_22
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB9_23
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB9_14
+; SOFT-NEXT:  .LBB9_13:
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB9_14:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    bne .LBB9_16
+; SOFT-NEXT:  @ %bb.15:
 ; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    beq .LBB9_24
-; SOFT-NEXT:  .LBB9_15:
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB9_17
 ; SOFT-NEXT:  .LBB9_16:
-; SOFT-NEXT:    mov r6, r3
-; SOFT-NEXT:  .LBB9_17:
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB9_20
+; SOFT-NEXT:  @ %bb.17:
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB9_21
+; SOFT-NEXT:    b .LBB9_22
 ; SOFT-NEXT:  .LBB9_18:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB9_2
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB9_12
 ; SOFT-NEXT:  .LBB9_19:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB9_3
-; SOFT-NEXT:    b .LBB9_4
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB9_13
+; SOFT-NEXT:    b .LBB9_14
 ; SOFT-NEXT:  .LBB9_20:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    bne .LBB9_10
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB9_22
 ; SOFT-NEXT:  .LBB9_21:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB9_11
-; SOFT-NEXT:    b .LBB9_12
+; SOFT-NEXT:    ldr r6, .LCPI9_1
 ; SOFT-NEXT:  .LBB9_22:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bne .LBB9_14
-; SOFT-NEXT:  .LBB9_23:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bne .LBB9_15
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB9_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r5, r6
 ; SOFT-NEXT:  .LBB9_24:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB9_16
-; SOFT-NEXT:    b .LBB9_17
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI9_0:
@@ -1196,6 +1216,12 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    ldr r3, .LCPI11_0
 ; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI11_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r6
@@ -1203,45 +1229,36 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
 ; SOFT-NEXT:    bl __aeabi_d2iz
 ; SOFT-NEXT:    movs r1, #127
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB11_2
+; SOFT-NEXT:    bne .LBB11_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    b .LBB11_3
+; SOFT-NEXT:    mvns r0, r1
 ; SOFT-NEXT:  .LBB11_2:
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB11_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r1, r0
+; SOFT-NEXT:  .LBB11_4:
 ; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    mvns r0, r1
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:  .LBB11_3:
-; SOFT-NEXT:    ldr r3, .LCPI11_1
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB11_5
-; SOFT-NEXT:  @ %bb.4:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
-; SOFT-NEXT:  .LBB11_5:
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    bl __aeabi_dcmpun
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB11_7
-; SOFT-NEXT:  @ %bb.6:
+; SOFT-NEXT:    bne .LBB11_6
+; SOFT-NEXT:  @ %bb.5:
 ; SOFT-NEXT:    mov r4, r7
-; SOFT-NEXT:  .LBB11_7:
+; SOFT-NEXT:  .LBB11_6:
 ; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.8:
+; SOFT-NEXT:  @ %bb.7:
 ; SOFT-NEXT:  .LCPI11_0:
-; SOFT-NEXT:    .long 3227516928 @ 0xc0600000
-; SOFT-NEXT:  .LCPI11_1:
 ; SOFT-NEXT:    .long 1080016896 @ 0x405fc000
+; SOFT-NEXT:  .LCPI11_1:
+; SOFT-NEXT:    .long 3227516928 @ 0xc0600000
 ;
 ; VFP2-LABEL: test_signed_i8_f64:
 ; VFP2:       @ %bb.0:
@@ -1310,27 +1327,29 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    ldr r3, .LCPI12_0
 ; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI12_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_d2iz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB12_2
+; SOFT-NEXT:    bne .LBB12_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    b .LBB12_3
+; SOFT-NEXT:    ldr r0, .LCPI12_2
 ; SOFT-NEXT:  .LBB12_2:
-; SOFT-NEXT:    ldr r7, .LCPI12_1
-; SOFT-NEXT:  .LBB12_3:
-; SOFT-NEXT:    ldr r3, .LCPI12_2
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB12_5
-; SOFT-NEXT:  @ %bb.4:
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB12_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    b .LBB12_5
+; SOFT-NEXT:  .LBB12_4:
 ; SOFT-NEXT:    ldr r7, .LCPI12_3
 ; SOFT-NEXT:  .LBB12_5:
 ; SOFT-NEXT:    mov r0, r6
@@ -1349,11 +1368,11 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.8:
 ; SOFT-NEXT:  .LCPI12_0:
-; SOFT-NEXT:    .long 3232759808 @ 0xc0b00000
+; SOFT-NEXT:    .long 1085275648 @ 0x40affe00
 ; SOFT-NEXT:  .LCPI12_1:
-; SOFT-NEXT:    .long 4294963200 @ 0xfffff000
+; SOFT-NEXT:    .long 3232759808 @ 0xc0b00000
 ; SOFT-NEXT:  .LCPI12_2:
-; SOFT-NEXT:    .long 1085275648 @ 0x40affe00
+; SOFT-NEXT:    .long 4294963200 @ 0xfffff000
 ; SOFT-NEXT:  .LCPI12_3:
 ; SOFT-NEXT:    .long 4095 @ 0xfff
 ;
@@ -1425,27 +1444,29 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    ldr r3, .LCPI13_0
 ; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI13_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_d2iz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB13_2
+; SOFT-NEXT:    bne .LBB13_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    b .LBB13_3
+; SOFT-NEXT:    ldr r0, .LCPI13_2
 ; SOFT-NEXT:  .LBB13_2:
-; SOFT-NEXT:    ldr r7, .LCPI13_1
-; SOFT-NEXT:  .LBB13_3:
-; SOFT-NEXT:    ldr r3, .LCPI13_2
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB13_5
-; SOFT-NEXT:  @ %bb.4:
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB13_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    b .LBB13_5
+; SOFT-NEXT:  .LBB13_4:
 ; SOFT-NEXT:    ldr r7, .LCPI13_3
 ; SOFT-NEXT:  .LBB13_5:
 ; SOFT-NEXT:    mov r0, r6
@@ -1464,11 +1485,11 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.8:
 ; SOFT-NEXT:  .LCPI13_0:
-; SOFT-NEXT:    .long 3235905536 @ 0xc0e00000
+; SOFT-NEXT:    .long 1088421824 @ 0x40dfffc0
 ; SOFT-NEXT:  .LCPI13_1:
-; SOFT-NEXT:    .long 4294934528 @ 0xffff8000
+; SOFT-NEXT:    .long 3235905536 @ 0xc0e00000
 ; SOFT-NEXT:  .LCPI13_2:
-; SOFT-NEXT:    .long 1088421824 @ 0x40dfffc0
+; SOFT-NEXT:    .long 4294934528 @ 0xffff8000
 ; SOFT-NEXT:  .LCPI13_3:
 ; SOFT-NEXT:    .long 32767 @ 0x7fff
 ;
@@ -1540,27 +1561,29 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    ldr r3, .LCPI14_0
 ; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI14_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_d2iz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB14_2
+; SOFT-NEXT:    bne .LBB14_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    b .LBB14_3
+; SOFT-NEXT:    ldr r0, .LCPI14_2
 ; SOFT-NEXT:  .LBB14_2:
-; SOFT-NEXT:    ldr r7, .LCPI14_1
-; SOFT-NEXT:  .LBB14_3:
-; SOFT-NEXT:    ldr r3, .LCPI14_2
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB14_5
-; SOFT-NEXT:  @ %bb.4:
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB14_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    b .LBB14_5
+; SOFT-NEXT:  .LBB14_4:
 ; SOFT-NEXT:    ldr r7, .LCPI14_3
 ; SOFT-NEXT:  .LBB14_5:
 ; SOFT-NEXT:    mov r0, r6
@@ -1579,11 +1602,11 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.8:
 ; SOFT-NEXT:  .LCPI14_0:
-; SOFT-NEXT:    .long 3239051264 @ 0xc1100000
+; SOFT-NEXT:    .long 1091567608 @ 0x410ffff8
 ; SOFT-NEXT:  .LCPI14_1:
-; SOFT-NEXT:    .long 4294705152 @ 0xfffc0000
+; SOFT-NEXT:    .long 3239051264 @ 0xc1100000
 ; SOFT-NEXT:  .LCPI14_2:
-; SOFT-NEXT:    .long 1091567608 @ 0x410ffff8
+; SOFT-NEXT:    .long 4294705152 @ 0xfffc0000
 ; SOFT-NEXT:  .LCPI14_3:
 ; SOFT-NEXT:    .long 262143 @ 0x3ffff
 ;
@@ -1651,56 +1674,58 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r5, r1
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r4, #0
-; SOFT-NEXT:    ldr r3, .LCPI15_0
-; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    ldr r2, .LCPI15_0
+; SOFT-NEXT:    ldr r3, .LCPI15_1
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    ldr r3, .LCPI15_2
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_d2iz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB15_2
+; SOFT-NEXT:    bne .LBB15_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    b .LBB15_3
-; SOFT-NEXT:  .LBB15_2:
 ; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r7, r0, #31
-; SOFT-NEXT:  .LBB15_3:
-; SOFT-NEXT:    ldr r2, .LCPI15_1
-; SOFT-NEXT:    ldr r3, .LCPI15_2
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB15_5
-; SOFT-NEXT:  @ %bb.4:
+; SOFT-NEXT:    lsls r0, r0, #31
+; SOFT-NEXT:  .LBB15_2:
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB15_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    b .LBB15_5
+; SOFT-NEXT:  .LBB15_4:
 ; SOFT-NEXT:    ldr r7, .LCPI15_3
 ; SOFT-NEXT:  .LBB15_5:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    mov r3, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpun
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    bne .LBB15_7
 ; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    mov r4, r7
+; SOFT-NEXT:    mov r6, r7
 ; SOFT-NEXT:  .LBB15_7:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.8:
 ; SOFT-NEXT:  .LCPI15_0:
-; SOFT-NEXT:    .long 3252682752 @ 0xc1e00000
-; SOFT-NEXT:  .LCPI15_1:
 ; SOFT-NEXT:    .long 4290772992 @ 0xffc00000
-; SOFT-NEXT:  .LCPI15_2:
+; SOFT-NEXT:  .LCPI15_1:
 ; SOFT-NEXT:    .long 1105199103 @ 0x41dfffff
+; SOFT-NEXT:  .LCPI15_2:
+; SOFT-NEXT:    .long 3252682752 @ 0xc1e00000
 ; SOFT-NEXT:  .LCPI15_3:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -1728,73 +1753,82 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #12
 ; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r5, r1
+; SOFT-NEXT:    mov r7, r1
 ; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r0, #15
+; SOFT-NEXT:    mvns r2, r0
+; SOFT-NEXT:    ldr r3, .LCPI16_0
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #195
 ; SOFT-NEXT:    lsls r3, r0, #24
 ; SOFT-NEXT:    movs r4, #0
 ; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r2, r0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB16_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:  .LBB16_2:
-; SOFT-NEXT:    beq .LBB16_4
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    bne .LBB16_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    b .LBB16_5
 ; SOFT-NEXT:  .LBB16_4:
-; SOFT-NEXT:    ldr r0, .LCPI16_0
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mvns r0, r4
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB16_5:
-; SOFT-NEXT:    movs r0, #15
-; SOFT-NEXT:    mvns r2, r0
-; SOFT-NEXT:    ldr r3, .LCPI16_1
 ; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mvns r7, r4
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r7
+; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB16_7
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    bne .LBB16_7
 ; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    ldr r0, .LCPI16_2
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB16_7:
-; SOFT-NEXT:    bne .LBB16_9
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB16_9
 ; SOFT-NEXT:  @ %bb.8:
-; SOFT-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB16_10
 ; SOFT-NEXT:  .LBB16_9:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB16_11
-; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB16_11:
-; SOFT-NEXT:    bne .LBB16_13
-; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    ldr r4, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:  .LBB16_13:
+; SOFT-NEXT:    ldr r3, .LCPI16_1
+; SOFT-NEXT:  .LBB16_10:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB16_12
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    ldr r3, .LCPI16_2
+; SOFT-NEXT:  .LBB16_12:
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB16_14
+; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:  .LBB16_14:
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.14:
+; SOFT-NEXT:  @ %bb.15:
 ; SOFT-NEXT:  .LCPI16_0:
-; SOFT-NEXT:    .long 4294836224 @ 0xfffe0000
-; SOFT-NEXT:  .LCPI16_1:
 ; SOFT-NEXT:    .long 1124073471 @ 0x42ffffff
+; SOFT-NEXT:  .LCPI16_1:
+; SOFT-NEXT:    .long 4294836224 @ 0xfffe0000
 ; SOFT-NEXT:  .LCPI16_2:
 ; SOFT-NEXT:    .long 131071 @ 0x1ffff
 ;
@@ -1816,12 +1850,11 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
 ; VFP2-NEXT:    movlt r0, #0
 ; VFP2-NEXT:    vcmp.f64 d17, d18
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP2-NEXT:    it gt
-; VFP2-NEXT:    movgt.w r0, #-1
-; VFP2-NEXT:    vcmp.f64 d17, d17
-; VFP2-NEXT:    itt gt
+; VFP2-NEXT:    ittt gt
 ; VFP2-NEXT:    movwgt r1, #65535
 ; VFP2-NEXT:    movtgt r1, #1
+; VFP2-NEXT:    movgt.w r0, #-1
+; VFP2-NEXT:    vcmp.f64 d17, d17
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
 ; VFP2-NEXT:    itt vs
 ; VFP2-NEXT:    movvs r0, #0
@@ -1874,70 +1907,77 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r5, r1
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    mov r6, r1
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mvns r2, r4
 ; SOFT-NEXT:    ldr r3, .LCPI17_0
+; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI17_1
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_d2lz
-; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r0, r0, #31
+; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r7, #0
 ; SOFT-NEXT:    bne .LBB17_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB17_2:
-; SOFT-NEXT:    beq .LBB17_4
+; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB17_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB17_4:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    mvns r7, r4
-; SOFT-NEXT:    ldr r3, .LCPI17_1
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB17_6
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    bne .LBB17_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    ldr r0, .LCPI17_2
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB17_6:
-; SOFT-NEXT:    bne .LBB17_8
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB17_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB17_9
 ; SOFT-NEXT:  .LBB17_8:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bne .LBB17_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB17_10:
-; SOFT-NEXT:    bne .LBB17_12
-; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    ldr r4, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:  .LBB17_12:
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    lsls r3, r2, #31
+; SOFT-NEXT:  .LBB17_9:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB17_11
+; SOFT-NEXT:  @ %bb.10:
+; SOFT-NEXT:    ldr r3, .LCPI17_2
+; SOFT-NEXT:  .LBB17_11:
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB17_13
+; SOFT-NEXT:  @ %bb.12:
+; SOFT-NEXT:    mov r4, r3
+; SOFT-NEXT:  .LBB17_13:
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:  @ %bb.14:
 ; SOFT-NEXT:  .LCPI17_0:
-; SOFT-NEXT:    .long 3286237184 @ 0xc3e00000
-; SOFT-NEXT:  .LCPI17_1:
 ; SOFT-NEXT:    .long 1138753535 @ 0x43dfffff
+; SOFT-NEXT:  .LCPI17_1:
+; SOFT-NEXT:    .long 3286237184 @ 0xc3e00000
 ; SOFT-NEXT:  .LCPI17_2:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -2019,119 +2059,122 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    .pad #28
+; SOFT-NEXT:    sub sp, #28
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mvns r2, r4
 ; SOFT-NEXT:    ldr r3, .LCPI18_0
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI18_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    movs r0, #7
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mvns r0, r0
+; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB18_17
+; SOFT-NEXT:    bne .LBB18_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB18_18
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB18_2:
-; SOFT-NEXT:    beq .LBB18_19
-; SOFT-NEXT:  .LBB18_3:
-; SOFT-NEXT:    beq .LBB18_5
+; SOFT-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB18_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:  .LBB18_4:
-; SOFT-NEXT:    mov r0, r3
-; SOFT-NEXT:  .LBB18_5:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:    mvns r7, r4
-; SOFT-NEXT:    ldr r3, .LCPI18_1
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB18_20
-; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    beq .LBB18_21
-; SOFT-NEXT:  .LBB18_7:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bne .LBB18_9
-; SOFT-NEXT:  .LBB18_8:
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:  .LBB18_9:
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB18_11
-; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB18_11:
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    beq .LBB18_22
-; SOFT-NEXT:  @ %bb.12:
+; SOFT-NEXT:    bne .LBB18_6
+; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:  .LBB18_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB18_8
+; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:  .LBB18_8:
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB18_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:  .LBB18_10:
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    beq .LBB18_23
+; SOFT-NEXT:    beq .LBB18_18
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB18_19
+; SOFT-NEXT:  .LBB18_12:
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_14
 ; SOFT-NEXT:  .LBB18_13:
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    beq .LBB18_24
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB18_14:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bne .LBB18_16
-; SOFT-NEXT:  .LBB18_15:
-; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:  @ %bb.15:
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB18_16:
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB18_17:
-; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB18_2
+; SOFT-NEXT:    movs r5, #7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB18_20
+; SOFT-NEXT:  @ %bb.17:
+; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_21
+; SOFT-NEXT:    b .LBB18_22
 ; SOFT-NEXT:  .LBB18_18:
-; SOFT-NEXT:    str r7, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB18_3
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB18_12
 ; SOFT-NEXT:  .LBB18_19:
-; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB18_4
-; SOFT-NEXT:    b .LBB18_5
+; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_13
+; SOFT-NEXT:    b .LBB18_14
 ; SOFT-NEXT:  .LBB18_20:
-; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB18_7
+; SOFT-NEXT:    mvns r7, r5
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_22
 ; SOFT-NEXT:  .LBB18_21:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    beq .LBB18_8
-; SOFT-NEXT:    b .LBB18_9
+; SOFT-NEXT:    mov r5, r7
 ; SOFT-NEXT:  .LBB18_22:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bne .LBB18_13
-; SOFT-NEXT:  .LBB18_23:
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    bne .LBB18_14
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB18_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB18_24:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB18_15
-; SOFT-NEXT:    b .LBB18_16
+; SOFT-NEXT:    mov r3, r4
+; SOFT-NEXT:    add sp, #28
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI18_0:
-; SOFT-NEXT:    .long 3323985920 @ 0xc6200000
-; SOFT-NEXT:  .LCPI18_1:
 ; SOFT-NEXT:    .long 1176502271 @ 0x461fffff
+; SOFT-NEXT:  .LCPI18_1:
+; SOFT-NEXT:    .long 3323985920 @ 0xc6200000
 ;
 ; VFP2-LABEL: test_signed_i100_f64:
 ; VFP2:       @ %bb.0:
@@ -2223,118 +2266,122 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #20
-; SOFT-NEXT:    sub sp, #20
+; SOFT-NEXT:    .pad #28
+; SOFT-NEXT:    sub sp, #28
 ; SOFT-NEXT:    mov r5, r1
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mvns r2, r4
 ; SOFT-NEXT:    ldr r3, .LCPI19_0
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; SOFT-NEXT:    ldr r3, .LCPI19_1
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __fixdfti
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    movs r0, #1
-; SOFT-NEXT:    lsls r0, r0, #31
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB19_17
+; SOFT-NEXT:    bne .LBB19_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB19_18
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB19_2:
-; SOFT-NEXT:    beq .LBB19_19
-; SOFT-NEXT:  .LBB19_3:
-; SOFT-NEXT:    beq .LBB19_5
+; SOFT-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB19_4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:  .LBB19_4:
-; SOFT-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:  .LBB19_5:
-; SOFT-NEXT:    mvns r7, r4
-; SOFT-NEXT:    ldr r3, .LCPI19_1
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB19_20
-; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    beq .LBB19_21
-; SOFT-NEXT:  .LBB19_7:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    bne .LBB19_9
-; SOFT-NEXT:  .LBB19_8:
-; SOFT-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:  .LBB19_9:
 ; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB19_11
-; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB19_11:
 ; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:    bl __aeabi_dcmpun
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    beq .LBB19_22
-; SOFT-NEXT:  @ %bb.12:
+; SOFT-NEXT:    bne .LBB19_6
+; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:  .LBB19_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB19_8
+; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:  .LBB19_8:
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB19_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:  .LBB19_10:
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    beq .LBB19_23
+; SOFT-NEXT:    beq .LBB19_18
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB19_19
+; SOFT-NEXT:  .LBB19_12:
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB19_14
 ; SOFT-NEXT:  .LBB19_13:
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    beq .LBB19_24
+; SOFT-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    str r2, [sp, #24] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB19_14:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    bne .LBB19_16
-; SOFT-NEXT:  .LBB19_15:
-; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:  @ %bb.15:
+; SOFT-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB19_16:
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB19_17:
-; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB19_2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB19_20
+; SOFT-NEXT:  @ %bb.17:
+; SOFT-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB19_21
+; SOFT-NEXT:    b .LBB19_22
 ; SOFT-NEXT:  .LBB19_18:
-; SOFT-NEXT:    str r7, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB19_3
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB19_12
 ; SOFT-NEXT:  .LBB19_19:
-; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB19_4
-; SOFT-NEXT:    b .LBB19_5
+; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB19_13
+; SOFT-NEXT:    b .LBB19_14
 ; SOFT-NEXT:  .LBB19_20:
-; SOFT-NEXT:    ldr r0, .LCPI19_2
-; SOFT-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB19_7
+; SOFT-NEXT:    movs r5, #1
+; SOFT-NEXT:    lsls r5, r5, #31
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB19_22
 ; SOFT-NEXT:  .LBB19_21:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    beq .LBB19_8
-; SOFT-NEXT:    b .LBB19_9
+; SOFT-NEXT:    ldr r5, .LCPI19_2
 ; SOFT-NEXT:  .LBB19_22:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bne .LBB19_13
-; SOFT-NEXT:  .LBB19_23:
-; SOFT-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    bne .LBB19_14
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB19_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB19_24:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB19_15
-; SOFT-NEXT:    b .LBB19_16
+; SOFT-NEXT:    mov r3, r4
+; SOFT-NEXT:    add sp, #28
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI19_0:
-; SOFT-NEXT:    .long 3353346048 @ 0xc7e00000
-; SOFT-NEXT:  .LCPI19_1:
 ; SOFT-NEXT:    .long 1205862399 @ 0x47dfffff
+; SOFT-NEXT:  .LCPI19_1:
+; SOFT-NEXT:    .long 3353346048 @ 0xc7e00000
 ; SOFT-NEXT:  .LCPI19_2:
 ; SOFT-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
@@ -2917,10 +2964,10 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
 ; VFP2-NEXT:    movlt r0, #0
 ; VFP2-NEXT:    movtlt r0, #65532
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP2-NEXT:    vcmp.f32 s0, s0
 ; VFP2-NEXT:    itt gt
 ; VFP2-NEXT:    movwgt r0, #65535
 ; VFP2-NEXT:    movtgt r0, #3
+; VFP2-NEXT:    vcmp.f32 s0, s0
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
 ; VFP2-NEXT:    it vs
 ; VFP2-NEXT:    movvs r0, #0
@@ -3031,69 +3078,73 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI26_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #27
 ; SOFT-NEXT:    lsls r1, r0, #27
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_f2lz
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __aeabi_f2lz
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB26_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB26_2:
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB26_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    b .LBB26_5
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB26_4:
-; SOFT-NEXT:    ldr r0, .LCPI26_0
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:  .LBB26_5:
-; SOFT-NEXT:    ldr r1, .LCPI26_1
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mvns r6, r7
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB26_7
-; SOFT-NEXT:  @ %bb.6:
-; SOFT-NEXT:    ldr r0, .LCPI26_2
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT:  .LBB26_7:
-; SOFT-NEXT:    bne .LBB26_9
-; SOFT-NEXT:  @ %bb.8:
-; SOFT-NEXT:    mov r6, r5
-; SOFT-NEXT:  .LBB26_9:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB26_11
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    bne .LBB26_6
+; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:  .LBB26_6:
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB26_8
+; SOFT-NEXT:  @ %bb.7:
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB26_9
+; SOFT-NEXT:  .LBB26_8:
+; SOFT-NEXT:    ldr r3, .LCPI26_1
+; SOFT-NEXT:  .LBB26_9:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB26_11
 ; SOFT-NEXT:  @ %bb.10:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    ldr r3, .LCPI26_2
 ; SOFT-NEXT:  .LBB26_11:
+; SOFT-NEXT:    cmp r1, #0
 ; SOFT-NEXT:    bne .LBB26_13
 ; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    mov r6, r3
 ; SOFT-NEXT:  .LBB26_13:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.14:
 ; SOFT-NEXT:  .LCPI26_0:
-; SOFT-NEXT:    .long 4294836224 @ 0xfffe0000
-; SOFT-NEXT:  .LCPI26_1:
 ; SOFT-NEXT:    .long 1476395007 @ 0x57ffffff
+; SOFT-NEXT:  .LCPI26_1:
+; SOFT-NEXT:    .long 4294836224 @ 0xfffe0000
 ; SOFT-NEXT:  .LCPI26_2:
 ; SOFT-NEXT:    .long 131071 @ 0x1ffff
 ;
@@ -3115,12 +3166,11 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
 ; VFP2-NEXT:    movlt r0, #0
 ; VFP2-NEXT:    vcmp.f32 s2, s4
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP2-NEXT:    it gt
-; VFP2-NEXT:    movgt.w r0, #-1
-; VFP2-NEXT:    vcmp.f32 s2, s2
-; VFP2-NEXT:    itt gt
+; VFP2-NEXT:    ittt gt
 ; VFP2-NEXT:    movwgt r1, #65535
 ; VFP2-NEXT:    movtgt r1, #1
+; VFP2-NEXT:    movgt.w r0, #-1
+; VFP2-NEXT:    vcmp.f32 s2, s2
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
 ; VFP2-NEXT:    itt vs
 ; VFP2-NEXT:    movvs r0, #0
@@ -3153,12 +3203,11 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
 ; FP16-NEXT:    movlt r0, #0
 ; FP16-NEXT:    vcmp.f32 s16, s2
 ; FP16-NEXT:    vmrs APSR_nzcv, fpscr
-; FP16-NEXT:    it gt
-; FP16-NEXT:    movgt.w r0, #-1
-; FP16-NEXT:    vcmp.f32 s16, s16
-; FP16-NEXT:    itt gt
+; FP16-NEXT:    ittt gt
 ; FP16-NEXT:    movwgt r1, #65535
 ; FP16-NEXT:    movtgt r1, #1
+; FP16-NEXT:    movgt.w r0, #-1
+; FP16-NEXT:    vcmp.f32 s16, s16
 ; FP16-NEXT:    vmrs APSR_nzcv, fpscr
 ; FP16-NEXT:    itt vs
 ; FP16-NEXT:    movvs r0, #0
@@ -3180,62 +3229,70 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #4
-; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    .pad #12
+; SOFT-NEXT:    sub sp, #12
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI27_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; SOFT-NEXT:    movs r0, #223
 ; SOFT-NEXT:    lsls r1, r0, #24
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2lz
-; SOFT-NEXT:    movs r2, #1
-; SOFT-NEXT:    lsls r7, r2, #31
-; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB27_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r4, r5
 ; SOFT-NEXT:  .LBB27_2:
-; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
 ; SOFT-NEXT:    beq .LBB27_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mvns r4, r6
 ; SOFT-NEXT:  .LBB27_4:
-; SOFT-NEXT:    ldr r1, .LCPI27_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r5, r6
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r1, r0
 ; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB27_6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    bne .LBB27_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    ldr r7, .LCPI27_1
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB27_6:
-; SOFT-NEXT:    bne .LBB27_8
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB27_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB27_9
 ; SOFT-NEXT:  .LBB27_8:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB27_10
-; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:  .LBB27_10:
-; SOFT-NEXT:    bne .LBB27_12
-; SOFT-NEXT:  @ %bb.11:
-; SOFT-NEXT:    mov r6, r7
-; SOFT-NEXT:  .LBB27_12:
+; SOFT-NEXT:    movs r2, #1
+; SOFT-NEXT:    lsls r3, r2, #31
+; SOFT-NEXT:  .LBB27_9:
+; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB27_11
+; SOFT-NEXT:  @ %bb.10:
+; SOFT-NEXT:    ldr r3, .LCPI27_1
+; SOFT-NEXT:  .LBB27_11:
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    bne .LBB27_13
+; SOFT-NEXT:  @ %bb.12:
+; SOFT-NEXT:    mov r6, r3
+; SOFT-NEXT:  .LBB27_13:
 ; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    add sp, #12
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:  @ %bb.14:
 ; SOFT-NEXT:  .LCPI27_0:
 ; SOFT-NEXT:    .long 1593835519 @ 0x5effffff
 ; SOFT-NEXT:  .LCPI27_1:
@@ -3322,106 +3379,110 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #20
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI28_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #241
 ; SOFT-NEXT:    lsls r1, r0, #24
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    movs r5, #7
-; SOFT-NEXT:    str r5, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mvns r7, r5
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    beq .LBB28_17
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB28_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB28_18
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB28_2:
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB28_4
-; SOFT-NEXT:  .LBB28_3:
-; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB28_4:
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB28_6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bne .LBB28_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:  .LBB28_6:
-; SOFT-NEXT:    ldr r1, .LCPI28_0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r5, r6
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB28_19
+; SOFT-NEXT:  .LBB28_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB28_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    beq .LBB28_20
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB28_8:
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    beq .LBB28_21
-; SOFT-NEXT:  .LBB28_9:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB28_11
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    bne .LBB28_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB28_10:
-; SOFT-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT:  .LBB28_11:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    beq .LBB28_22
-; SOFT-NEXT:  @ %bb.12:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB28_23
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB28_18
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB28_19
+; SOFT-NEXT:  .LBB28_12:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB28_14
 ; SOFT-NEXT:  .LBB28_13:
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    beq .LBB28_24
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB28_14:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
 ; SOFT-NEXT:    bne .LBB28_16
-; SOFT-NEXT:  .LBB28_15:
-; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:  @ %bb.15:
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB28_16:
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB28_17:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB28_2
+; SOFT-NEXT:    movs r4, #7
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB28_20
+; SOFT-NEXT:  @ %bb.17:
+; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT:    b .LBB28_21
 ; SOFT-NEXT:  .LBB28_18:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB28_3
-; SOFT-NEXT:    b .LBB28_4
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB28_12
 ; SOFT-NEXT:  .LBB28_19:
-; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bne .LBB28_8
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB28_13
+; SOFT-NEXT:    b .LBB28_14
 ; SOFT-NEXT:  .LBB28_20:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    bne .LBB28_9
+; SOFT-NEXT:    mvns r7, r4
 ; SOFT-NEXT:  .LBB28_21:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB28_10
-; SOFT-NEXT:    b .LBB28_11
-; SOFT-NEXT:  .LBB28_22:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bne .LBB28_13
+; SOFT-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB28_23
+; SOFT-NEXT:  @ %bb.22:
+; SOFT-NEXT:    mov r4, r7
 ; SOFT-NEXT:  .LBB28_23:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bne .LBB28_14
-; SOFT-NEXT:  .LBB28_24:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB28_15
-; SOFT-NEXT:    b .LBB28_16
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB28_25
+; SOFT-NEXT:  @ %bb.24:
+; SOFT-NEXT:    mov r5, r4
+; SOFT-NEXT:  .LBB28_25:
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
-; SOFT-NEXT:  @ %bb.25:
+; SOFT-NEXT:  @ %bb.26:
 ; SOFT-NEXT:  .LCPI28_0:
 ; SOFT-NEXT:    .long 1895825407 @ 0x70ffffff
 ;
@@ -3518,104 +3579,109 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #20
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI29_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r0, #255
 ; SOFT-NEXT:    lsls r1, r0, #24
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __fixsfti
-; SOFT-NEXT:    movs r5, #1
-; SOFT-NEXT:    lsls r7, r5, #31
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    beq .LBB29_18
+; SOFT-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB29_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB29_19
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:  .LBB29_2:
+; SOFT-NEXT:    movs r5, #0
+; SOFT-NEXT:    mvns r1, r5
+; SOFT-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:    bne .LBB29_4
-; SOFT-NEXT:  .LBB29_3:
-; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:  .LBB29_4:
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB29_6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bl __aeabi_fcmpun
+; SOFT-NEXT:    mov r3, r0
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bne .LBB29_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r7, r3
-; SOFT-NEXT:  .LBB29_6:
-; SOFT-NEXT:    ldr r1, .LCPI29_0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mvns r5, r6
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB29_8
+; SOFT-NEXT:  .LBB29_6:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    bne .LBB29_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r7, .LCPI29_1
+; SOFT-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; SOFT-NEXT:  .LBB29_8:
-; SOFT-NEXT:    str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    beq .LBB29_20
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    bne .LBB29_10
 ; SOFT-NEXT:  @ %bb.9:
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    beq .LBB29_21
+; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB29_10:
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB29_12
-; SOFT-NEXT:  .LBB29_11:
-; SOFT-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    beq .LBB29_18
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB29_19
 ; SOFT-NEXT:  .LBB29_12:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    bl __aeabi_fcmpun
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    beq .LBB29_22
-; SOFT-NEXT:  @ %bb.13:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB29_23
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB29_14
+; SOFT-NEXT:  .LBB29_13:
+; SOFT-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB29_14:
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    bne .LBB29_16
+; SOFT-NEXT:  @ %bb.15:
 ; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    beq .LBB29_24
-; SOFT-NEXT:  .LBB29_15:
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB29_17
 ; SOFT-NEXT:  .LBB29_16:
-; SOFT-NEXT:    mov r6, r3
-; SOFT-NEXT:  .LBB29_17:
-; SOFT-NEXT:    mov r3, r6
-; SOFT-NEXT:    add sp, #20
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    beq .LBB29_20
+; SOFT-NEXT:  @ %bb.17:
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB29_21
+; SOFT-NEXT:    b .LBB29_22
 ; SOFT-NEXT:  .LBB29_18:
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bne .LBB29_2
+; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    bne .LBB29_12
 ; SOFT-NEXT:  .LBB29_19:
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    beq .LBB29_3
-; SOFT-NEXT:    b .LBB29_4
+; SOFT-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB29_13
+; SOFT-NEXT:    b .LBB29_14
 ; SOFT-NEXT:  .LBB29_20:
-; SOFT-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:    bne .LBB29_10
+; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    lsls r6, r4, #31
+; SOFT-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB29_22
 ; SOFT-NEXT:  .LBB29_21:
-; SOFT-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT:    beq .LBB29_11
-; SOFT-NEXT:    b .LBB29_12
+; SOFT-NEXT:    ldr r6, .LCPI29_1
 ; SOFT-NEXT:  .LBB29_22:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bne .LBB29_14
-; SOFT-NEXT:  .LBB29_23:
-; SOFT-NEXT:    mov r1, r7
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bne .LBB29_15
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    bne .LBB29_24
+; SOFT-NEXT:  @ %bb.23:
+; SOFT-NEXT:    mov r5, r6
 ; SOFT-NEXT:  .LBB29_24:
-; SOFT-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB29_16
-; SOFT-NEXT:    b .LBB29_17
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    add sp, #20
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.25:
 ; SOFT-NEXT:  .LCPI29_0:

diff  --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
index 4cc5f943dadff1..14eb67104eddac 100644
--- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
@@ -24,38 +24,41 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
 ; SOFT-NEXT:    .save {r4, r5, r6, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
 ; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    movs r0, #127
+; SOFT-NEXT:    lsls r1, r0, #23
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB0_2
+; SOFT-NEXT:    beq .LBB0_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB0_4
 ; SOFT-NEXT:  .LBB0_2:
-; SOFT-NEXT:    movs r0, #127
-; SOFT-NEXT:    lsls r1, r0, #23
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB0_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB0_3:
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB0_2
 ; SOFT-NEXT:  .LBB0_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    movs r0, #1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ;
 ; VFP2-LABEL: test_signed_i1_f32:
 ; VFP2:       @ %bb.0:
-; VFP2-NEXT:    vmov s0, r0
-; VFP2-NEXT:    vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT:    vcvt.u32.f32 s2, s0
-; VFP2-NEXT:    vcmp.f32 s0, #0
+; VFP2-NEXT:    vmov s2, r0
+; VFP2-NEXT:    vmov.f32 s0, #1.000000e+00
+; VFP2-NEXT:    vcvt.u32.f32 s4, s2
+; VFP2-NEXT:    vcmp.f32 s2, #0
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP2-NEXT:    vcmp.f32 s0, s4
-; VFP2-NEXT:    vmov r0, s2
+; VFP2-NEXT:    vcmp.f32 s2, s0
+; VFP2-NEXT:    vmov r0, s4
 ; VFP2-NEXT:    it lt
 ; VFP2-NEXT:    movlt r0, #0
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
@@ -86,27 +89,29 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI1_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB1_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB1_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB1_4
 ; SOFT-NEXT:  .LBB1_2:
-; SOFT-NEXT:    ldr r1, .LCPI1_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB1_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB1_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    movs r4, #255
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB1_2
 ; SOFT-NEXT:  .LBB1_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    movs r0, #255
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -158,27 +163,29 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI2_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB2_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB2_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB2_4
 ; SOFT-NEXT:  .LBB2_2:
-; SOFT-NEXT:    ldr r1, .LCPI2_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB2_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB2_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    ldr r4, .LCPI2_1
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB2_2
 ; SOFT-NEXT:  .LBB2_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r0, .LCPI2_1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -232,27 +239,29 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI3_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB3_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB3_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB3_4
 ; SOFT-NEXT:  .LBB3_2:
-; SOFT-NEXT:    ldr r1, .LCPI3_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB3_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB3_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    ldr r4, .LCPI3_1
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB3_2
 ; SOFT-NEXT:  .LBB3_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r0, .LCPI3_1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -306,27 +315,29 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI4_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB4_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB4_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB4_4
 ; SOFT-NEXT:  .LBB4_2:
-; SOFT-NEXT:    ldr r1, .LCPI4_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB4_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB4_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    ldr r4, .LCPI4_1
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB4_2
 ; SOFT-NEXT:  .LBB4_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r0, .LCPI4_1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -383,28 +394,31 @@ define i32 @test_signed_i32_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI5_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB5_2
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB5_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB5_4
 ; SOFT-NEXT:  .LBB5_2:
-; SOFT-NEXT:    ldr r1, .LCPI5_0
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB5_3:
 ; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB5_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r5, r4
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB5_2
 ; SOFT-NEXT:  .LBB5_4:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mvns r0, r4
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -429,39 +443,45 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI6_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bl __aeabi_fcmpge
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
-; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB6_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB6_5
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB6_6
 ; SOFT-NEXT:  .LBB6_2:
-; SOFT-NEXT:    bne .LBB6_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB6_7
+; SOFT-NEXT:  .LBB6_3:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB6_8
 ; SOFT-NEXT:  .LBB6_4:
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    ldr r1, .LCPI6_0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r1, r0
-; SOFT-NEXT:    mvns r0, r5
-; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    bne .LBB6_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB6_5:
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB6_2
 ; SOFT-NEXT:  .LBB6_6:
-; SOFT-NEXT:    beq .LBB6_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r7, .LCPI6_1
+; SOFT-NEXT:    mvns r0, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB6_3
+; SOFT-NEXT:  .LBB6_7:
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB6_4
 ; SOFT-NEXT:  .LBB6_8:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    ldr r1, .LCPI6_1
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -508,43 +528,48 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r4, #0
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI7_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB7_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB7_2:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB7_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:  .LBB7_4:
-; SOFT-NEXT:    ldr r1, .LCPI7_0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mvns r1, r4
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:    beq .LBB7_7
-; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB7_8
+; SOFT-NEXT:  .LBB7_4:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB7_6
+; SOFT-NEXT:  .LBB7_5:
+; SOFT-NEXT:    mov r2, r1
 ; SOFT-NEXT:  .LBB7_6:
+; SOFT-NEXT:    mov r1, r2
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB7_7:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB7_6
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB7_4
 ; SOFT-NEXT:  .LBB7_8:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #4
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB7_5
+; SOFT-NEXT:    b .LBB7_6
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI7_0:
@@ -584,71 +609,76 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI8_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __fixunssfti
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB8_11
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __fixunssfti
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB8_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB8_12
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB8_2:
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r6, r2
 ; SOFT-NEXT:    bne .LBB8_4
-; SOFT-NEXT:  .LBB8_3:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:  .LBB8_4:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB8_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB8_6:
-; SOFT-NEXT:    ldr r1, .LCPI8_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mvns r2, r6
-; SOFT-NEXT:    movs r3, #15
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r2
 ; SOFT-NEXT:    beq .LBB8_13
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB8_14
 ; SOFT-NEXT:  .LBB8_8:
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB8_15
 ; SOFT-NEXT:  .LBB8_9:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB8_16
 ; SOFT-NEXT:  .LBB8_10:
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB8_12
 ; SOFT-NEXT:  .LBB8_11:
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    bne .LBB8_2
+; SOFT-NEXT:    movs r3, #15
 ; SOFT-NEXT:  .LBB8_12:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    beq .LBB8_3
-; SOFT-NEXT:    b .LBB8_4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB8_13:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB8_8
 ; SOFT-NEXT:  .LBB8_14:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB8_9
 ; SOFT-NEXT:  .LBB8_15:
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB8_10
 ; SOFT-NEXT:  .LBB8_16:
-; SOFT-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB8_11
+; SOFT-NEXT:    b .LBB8_12
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI8_0:
@@ -692,72 +722,75 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI9_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __fixunssfti
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB9_11
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __fixunssfti
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB9_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB9_12
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB9_2:
+; SOFT-NEXT:    mvns r6, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bne .LBB9_4
-; SOFT-NEXT:  .LBB9_3:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:  .LBB9_4:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB9_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB9_6:
-; SOFT-NEXT:    ldr r1, .LCPI9_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mvns r3, r6
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r3
-; SOFT-NEXT:    beq .LBB9_13
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bne .LBB9_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r1, r3
-; SOFT-NEXT:    beq .LBB9_14
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:  .LBB9_8:
-; SOFT-NEXT:    mov r2, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB9_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:  .LBB9_10:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r7, r6
 ; SOFT-NEXT:    beq .LBB9_15
-; SOFT-NEXT:  .LBB9_9:
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB9_16
-; SOFT-NEXT:  .LBB9_10:
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB9_11:
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    bne .LBB9_2
 ; SOFT-NEXT:  .LBB9_12:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    beq .LBB9_3
-; SOFT-NEXT:    b .LBB9_4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB9_14
 ; SOFT-NEXT:  .LBB9_13:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r3
-; SOFT-NEXT:    bne .LBB9_8
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB9_14:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r3
-; SOFT-NEXT:    bne .LBB9_9
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB9_15:
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB9_10
+; SOFT-NEXT:    mov r7, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB9_12
 ; SOFT-NEXT:  .LBB9_16:
-; SOFT-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB9_13
+; SOFT-NEXT:    b .LBB9_14
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI9_0:
@@ -819,32 +852,33 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    ldr r3, .LCPI10_0
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    bl __aeabi_dcmpgt
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_d2uiz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB10_2
+; SOFT-NEXT:    beq .LBB10_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB10_2:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    ldr r3, .LCPI10_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB10_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:  .LBB10_2:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB10_3:
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB10_2
 ; SOFT-NEXT:  .LBB10_4:
 ; SOFT-NEXT:    movs r0, #1
 ; SOFT-NEXT:    add sp, #4
@@ -897,32 +931,33 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    ldr r3, .LCPI11_0
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    bl __aeabi_dcmpgt
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_d2uiz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB11_2
+; SOFT-NEXT:    beq .LBB11_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB11_2:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    ldr r3, .LCPI11_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB11_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:  .LBB11_2:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB11_3:
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB11_2
 ; SOFT-NEXT:  .LBB11_4:
 ; SOFT-NEXT:    movs r0, #255
 ; SOFT-NEXT:    add sp, #4
@@ -983,32 +1018,33 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    ldr r3, .LCPI12_0
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    bl __aeabi_dcmpgt
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_d2uiz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB12_2
+; SOFT-NEXT:    beq .LBB12_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB12_2:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    ldr r3, .LCPI12_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB12_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:  .LBB12_2:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB12_3:
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB12_2
 ; SOFT-NEXT:  .LBB12_4:
 ; SOFT-NEXT:    ldr r0, .LCPI12_1
 ; SOFT-NEXT:    add sp, #4
@@ -1071,32 +1107,33 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    ldr r3, .LCPI13_0
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    bl __aeabi_dcmpgt
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_d2uiz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB13_2
+; SOFT-NEXT:    beq .LBB13_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB13_2:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    ldr r3, .LCPI13_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB13_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:  .LBB13_2:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB13_3:
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB13_2
 ; SOFT-NEXT:  .LBB13_4:
 ; SOFT-NEXT:    ldr r0, .LCPI13_1
 ; SOFT-NEXT:    add sp, #4
@@ -1159,32 +1196,33 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    movs r7, #0
+; SOFT-NEXT:    ldr r3, .LCPI14_0
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    bl __aeabi_dcmpgt
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r6, #0
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r7
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_d2uiz
 ; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB14_2
+; SOFT-NEXT:    beq .LBB14_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB14_2:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    ldr r3, .LCPI14_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB14_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:  .LBB14_2:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB14_3:
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB14_2
 ; SOFT-NEXT:  .LBB14_4:
 ; SOFT-NEXT:    ldr r0, .LCPI14_1
 ; SOFT-NEXT:    add sp, #4
@@ -1248,34 +1286,32 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    mov r5, r1
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r2, .LCPI15_0
+; SOFT-NEXT:    ldr r3, .LCPI15_1
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
 ; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    mov r2, r4
 ; SOFT-NEXT:    mov r3, r4
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r5
 ; SOFT-NEXT:    bl __aeabi_d2uiz
-; SOFT-NEXT:    cmp r7, #0
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB15_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:  .LBB15_2:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    ldr r2, .LCPI15_0
-; SOFT-NEXT:    ldr r3, .LCPI15_1
 ; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    bne .LBB15_4
+; SOFT-NEXT:  .LBB15_2:
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    beq .LBB15_4
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    add sp, #4
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB15_4:
 ; SOFT-NEXT:    mvns r0, r4
+; SOFT-NEXT:  .LBB15_4:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -1309,47 +1345,49 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r6, r1
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    bl __aeabi_dcmpge
+; SOFT-NEXT:    mov r7, r1
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    movs r0, #7
+; SOFT-NEXT:    mvns r2, r0
+; SOFT-NEXT:    ldr r3, .LCPI16_0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    bl __aeabi_dcmpge
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_d2ulz
-; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB16_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB16_2:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB16_4
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB16_6
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:  .LBB16_4:
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    movs r0, #7
-; SOFT-NEXT:    mvns r2, r0
-; SOFT-NEXT:    ldr r3, .LCPI16_0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    mov r1, r0
-; SOFT-NEXT:    mvns r0, r5
-; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB16_7
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:  .LBB16_4:
+; SOFT-NEXT:    cmp r2, #0
 ; SOFT-NEXT:    bne .LBB16_8
-; SOFT-NEXT:  .LBB16_6:
+; SOFT-NEXT:  .LBB16_5:
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB16_6:
+; SOFT-NEXT:    mvns r0, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB16_4
 ; SOFT-NEXT:  .LBB16_7:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB16_6
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    beq .LBB16_5
 ; SOFT-NEXT:  .LBB16_8:
 ; SOFT-NEXT:    ldr r1, .LCPI16_1
 ; SOFT-NEXT:    add sp, #4
@@ -1420,46 +1458,54 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    .pad #4
 ; SOFT-NEXT:    sub sp, #4
-; SOFT-NEXT:    mov r4, r1
+; SOFT-NEXT:    mov r7, r1
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    movs r7, #0
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    mov r3, r7
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mvns r4, r6
+; SOFT-NEXT:    ldr r3, .LCPI17_0
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __aeabi_d2ulz
+; SOFT-NEXT:    mov r2, r0
 ; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB17_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB17_2:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB17_4
+; SOFT-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    beq .LBB17_7
 ; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB17_8
 ; SOFT-NEXT:  .LBB17_4:
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mvns r7, r7
-; SOFT-NEXT:    ldr r3, .LCPI17_0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r4
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    cmp r3, #0
 ; SOFT-NEXT:    bne .LBB17_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:  .LBB17_5:
+; SOFT-NEXT:    mov r4, r1
 ; SOFT-NEXT:  .LBB17_6:
-; SOFT-NEXT:    bne .LBB17_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r7, [sp] @ 4-byte Reload
-; SOFT-NEXT:  .LBB17_8:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB17_7:
+; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB17_4
+; SOFT-NEXT:  .LBB17_8:
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    cmp r3, #0
+; SOFT-NEXT:    beq .LBB17_5
+; SOFT-NEXT:    b .LBB17_6
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI17_0:
@@ -1527,76 +1573,82 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r6, r1
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r1
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    ldr r3, .LCPI18_0
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __fixunsdfti
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB18_12
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB18_13
+; SOFT-NEXT:    mov r4, r6
 ; SOFT-NEXT:  .LBB18_2:
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; SOFT-NEXT:    bne .LBB18_4
-; SOFT-NEXT:  .LBB18_3:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:  .LBB18_4:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB18_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r7, r6
 ; SOFT-NEXT:  .LBB18_6:
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mvns r5, r5
-; SOFT-NEXT:    ldr r3, .LCPI18_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    movs r3, #15
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    beq .LBB18_14
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    beq .LBB18_13
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    beq .LBB18_15
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_14
 ; SOFT-NEXT:  .LBB18_8:
-; SOFT-NEXT:    beq .LBB18_16
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB18_15
 ; SOFT-NEXT:  .LBB18_9:
-; SOFT-NEXT:    bne .LBB18_11
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB18_16
 ; SOFT-NEXT:  .LBB18_10:
-; SOFT-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB18_12
 ; SOFT-NEXT:  .LBB18_11:
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    movs r3, #15
 ; SOFT-NEXT:  .LBB18_12:
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    bne .LBB18_2
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB18_13:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    beq .LBB18_3
-; SOFT-NEXT:    b .LBB18_4
-; SOFT-NEXT:  .LBB18_14:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB18_8
-; SOFT-NEXT:  .LBB18_15:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:  .LBB18_14:
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB18_9
+; SOFT-NEXT:  .LBB18_15:
+; SOFT-NEXT:    mov r4, r2
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB18_10
 ; SOFT-NEXT:  .LBB18_16:
-; SOFT-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB18_10
-; SOFT-NEXT:    b .LBB18_11
+; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB18_11
+; SOFT-NEXT:    b .LBB18_12
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI18_0:
@@ -1672,77 +1724,78 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
-; SOFT-NEXT:    mov r6, r1
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mvns r4, r6
+; SOFT-NEXT:    ldr r3, .LCPI19_0
+; SOFT-NEXT:    mov r2, r4
+; SOFT-NEXT:    bl __aeabi_dcmpgt
+; SOFT-NEXT:    str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    mov r2, r6
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bl __aeabi_dcmpge
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:    bl __fixunsdfti
-; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    beq .LBB19_12
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB19_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB19_13
+; SOFT-NEXT:    mov r5, r6
 ; SOFT-NEXT:  .LBB19_2:
+; SOFT-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r0, #0
+; SOFT-NEXT:    mov r0, r4
 ; SOFT-NEXT:    bne .LBB19_4
-; SOFT-NEXT:  .LBB19_3:
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB19_4:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r6, #0
 ; SOFT-NEXT:    bne .LBB19_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    mov r7, r6
 ; SOFT-NEXT:  .LBB19_6:
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mvns r5, r5
-; SOFT-NEXT:    ldr r3, .LCPI19_0
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    mov r1, r6
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    bl __aeabi_dcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    beq .LBB19_14
+; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r1, #0
+; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    bne .LBB19_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    beq .LBB19_15
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:  .LBB19_8:
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    beq .LBB19_16
-; SOFT-NEXT:  .LBB19_9:
-; SOFT-NEXT:    bne .LBB19_11
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB19_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    mov r2, r6
 ; SOFT-NEXT:  .LBB19_10:
-; SOFT-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:  .LBB19_11:
-; SOFT-NEXT:    mov r3, r5
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    mov r5, r4
+; SOFT-NEXT:    bne .LBB19_12
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    mov r5, r2
 ; SOFT-NEXT:  .LBB19_12:
-; SOFT-NEXT:    mov r3, r4
-; SOFT-NEXT:    bne .LBB19_2
-; SOFT-NEXT:  .LBB19_13:
-; SOFT-NEXT:    mov r2, r4
-; SOFT-NEXT:    beq .LBB19_3
-; SOFT-NEXT:    b .LBB19_4
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    bne .LBB19_14
+; SOFT-NEXT:  @ %bb.13:
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:  .LBB19_14:
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bne .LBB19_8
-; SOFT-NEXT:  .LBB19_15:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r5
-; SOFT-NEXT:    bne .LBB19_9
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r2, #0
+; SOFT-NEXT:    bne .LBB19_16
+; SOFT-NEXT:  @ %bb.15:
+; SOFT-NEXT:    mov r4, r3
 ; SOFT-NEXT:  .LBB19_16:
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    beq .LBB19_10
-; SOFT-NEXT:    b .LBB19_11
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:    mov r3, r4
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI19_0:
@@ -1836,27 +1889,30 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
 ; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    movs r0, #127
+; SOFT-NEXT:    lsls r1, r0, #23
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB20_2
+; SOFT-NEXT:    beq .LBB20_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB20_4
 ; SOFT-NEXT:  .LBB20_2:
-; SOFT-NEXT:    movs r0, #127
-; SOFT-NEXT:    lsls r1, r0, #23
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB20_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    movs r4, #1
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB20_3:
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB20_2
 ; SOFT-NEXT:  .LBB20_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    movs r0, #1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ;
 ; VFP2-LABEL: test_signed_i1_f16:
@@ -1864,13 +1920,13 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
 ; VFP2-NEXT:    .save {r7, lr}
 ; VFP2-NEXT:    push {r7, lr}
 ; VFP2-NEXT:    bl __aeabi_h2f
-; VFP2-NEXT:    vmov s0, r0
-; VFP2-NEXT:    vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT:    vcvt.u32.f32 s2, s0
-; VFP2-NEXT:    vcmp.f32 s0, #0
+; VFP2-NEXT:    vmov s2, r0
+; VFP2-NEXT:    vmov.f32 s0, #1.000000e+00
+; VFP2-NEXT:    vcvt.u32.f32 s4, s2
+; VFP2-NEXT:    vcmp.f32 s2, #0
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
-; VFP2-NEXT:    vcmp.f32 s0, s4
-; VFP2-NEXT:    vmov r0, s2
+; VFP2-NEXT:    vcmp.f32 s2, s0
+; VFP2-NEXT:    vmov r0, s4
 ; VFP2-NEXT:    it lt
 ; VFP2-NEXT:    movlt r0, #0
 ; VFP2-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1904,27 +1960,29 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI21_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB21_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB21_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB21_4
 ; SOFT-NEXT:  .LBB21_2:
-; SOFT-NEXT:    ldr r1, .LCPI21_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB21_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB21_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    movs r4, #255
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB21_2
 ; SOFT-NEXT:  .LBB21_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    movs r0, #255
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -1982,27 +2040,29 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI22_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB22_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB22_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB22_4
 ; SOFT-NEXT:  .LBB22_2:
-; SOFT-NEXT:    ldr r1, .LCPI22_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB22_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB22_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    ldr r4, .LCPI22_1
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB22_2
 ; SOFT-NEXT:  .LBB22_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r0, .LCPI22_1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -2062,27 +2122,29 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI23_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB23_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB23_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB23_4
 ; SOFT-NEXT:  .LBB23_2:
-; SOFT-NEXT:    ldr r1, .LCPI23_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB23_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB23_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    ldr r4, .LCPI23_1
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB23_2
 ; SOFT-NEXT:  .LBB23_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r0, .LCPI23_1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -2142,27 +2204,29 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
 ; SOFT-NEXT:    push {r4, r5, r6, lr}
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    ldr r1, .LCPI24_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r1, #0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r6
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    cmp r6, #0
-; SOFT-NEXT:    bne .LBB24_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB24_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r4, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB24_4
 ; SOFT-NEXT:  .LBB24_2:
-; SOFT-NEXT:    ldr r1, .LCPI24_0
+; SOFT-NEXT:    pop {r4, r5, r6, pc}
+; SOFT-NEXT:  .LBB24_3:
 ; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB24_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    ldr r4, .LCPI24_1
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB24_2
 ; SOFT-NEXT:  .LBB24_4:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    ldr r0, .LCPI24_1
 ; SOFT-NEXT:    pop {r4, r5, r6, pc}
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.5:
@@ -2225,28 +2289,31 @@ define i32 @test_signed_i32_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI25_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r5, r0
 ; SOFT-NEXT:    movs r4, #0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r4
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r6, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2uiz
-; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    bne .LBB25_2
+; SOFT-NEXT:    cmp r6, #0
+; SOFT-NEXT:    beq .LBB25_3
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB25_4
 ; SOFT-NEXT:  .LBB25_2:
-; SOFT-NEXT:    ldr r1, .LCPI25_0
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB25_3:
 ; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    beq .LBB25_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mvns r5, r4
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB25_2
 ; SOFT-NEXT:  .LBB25_4:
-; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    mvns r0, r4
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -2283,39 +2350,45 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r5, #0
-; SOFT-NEXT:    mov r1, r5
-; SOFT-NEXT:    bl __aeabi_fcmpge
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI26_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bl __aeabi_fcmpge
+; SOFT-NEXT:    mov r5, r0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
-; SOFT-NEXT:    mov r7, r1
-; SOFT-NEXT:    cmp r4, #0
-; SOFT-NEXT:    bne .LBB26_2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB26_5
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r7, r4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB26_6
 ; SOFT-NEXT:  .LBB26_2:
-; SOFT-NEXT:    bne .LBB26_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    beq .LBB26_7
+; SOFT-NEXT:  .LBB26_3:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB26_8
 ; SOFT-NEXT:  .LBB26_4:
-; SOFT-NEXT:    mov r4, r0
-; SOFT-NEXT:    ldr r1, .LCPI26_0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mov r1, r0
-; SOFT-NEXT:    mvns r0, r5
-; SOFT-NEXT:    cmp r1, #0
-; SOFT-NEXT:    bne .LBB26_6
-; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r0, r4
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:  .LBB26_5:
+; SOFT-NEXT:    mov r0, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB26_2
 ; SOFT-NEXT:  .LBB26_6:
-; SOFT-NEXT:    beq .LBB26_8
-; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    ldr r7, .LCPI26_1
+; SOFT-NEXT:    mvns r0, r6
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB26_3
+; SOFT-NEXT:  .LBB26_7:
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB26_4
 ; SOFT-NEXT:  .LBB26_8:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    ldr r1, .LCPI26_1
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:    .p2align 2
@@ -2392,43 +2465,48 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
 ; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
-; SOFT-NEXT:    mov r6, r0
-; SOFT-NEXT:    movs r4, #0
-; SOFT-NEXT:    mov r1, r4
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI27_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
+; SOFT-NEXT:    mov r4, r0
+; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    bl __aeabi_f2ulz
-; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB27_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    mov r3, r5
 ; SOFT-NEXT:  .LBB27_2:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    bne .LBB27_4
-; SOFT-NEXT:  @ %bb.3:
-; SOFT-NEXT:    mov r7, r5
-; SOFT-NEXT:  .LBB27_4:
-; SOFT-NEXT:    ldr r1, .LCPI27_0
-; SOFT-NEXT:    mov r0, r6
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mvns r1, r4
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r1
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r0, r2
 ; SOFT-NEXT:    beq .LBB27_7
-; SOFT-NEXT:  @ %bb.5:
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB27_8
+; SOFT-NEXT:  .LBB27_4:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB27_6
+; SOFT-NEXT:  .LBB27_5:
+; SOFT-NEXT:    mov r2, r1
 ; SOFT-NEXT:  .LBB27_6:
+; SOFT-NEXT:    mov r1, r2
 ; SOFT-NEXT:    add sp, #4
 ; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB27_7:
-; SOFT-NEXT:    mov r0, r7
-; SOFT-NEXT:    bne .LBB27_6
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB27_4
 ; SOFT-NEXT:  .LBB27_8:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #4
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r1, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB27_5
+; SOFT-NEXT:    b .LBB27_6
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.9:
 ; SOFT-NEXT:  .LCPI27_0:
@@ -2495,73 +2573,78 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI28_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __fixunssfti
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB28_11
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __fixunssfti
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB28_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB28_12
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB28_2:
+; SOFT-NEXT:    mvns r2, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r6, r2
 ; SOFT-NEXT:    bne .LBB28_4
-; SOFT-NEXT:  .LBB28_3:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r6, r0
 ; SOFT-NEXT:  .LBB28_4:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB28_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB28_6:
-; SOFT-NEXT:    ldr r1, .LCPI28_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mvns r2, r6
-; SOFT-NEXT:    movs r3, #15
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r2
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r2
 ; SOFT-NEXT:    beq .LBB28_13
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB28_14
 ; SOFT-NEXT:  .LBB28_8:
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    beq .LBB28_15
 ; SOFT-NEXT:  .LBB28_9:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB28_16
 ; SOFT-NEXT:  .LBB28_10:
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB28_12
 ; SOFT-NEXT:  .LBB28_11:
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    bne .LBB28_2
+; SOFT-NEXT:    movs r3, #15
 ; SOFT-NEXT:  .LBB28_12:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    beq .LBB28_3
-; SOFT-NEXT:    b .LBB28_4
+; SOFT-NEXT:    mov r0, r6
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB28_13:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r2
+; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB28_8
 ; SOFT-NEXT:  .LBB28_14:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
 ; SOFT-NEXT:    bne .LBB28_9
 ; SOFT-NEXT:  .LBB28_15:
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB28_10
 ; SOFT-NEXT:  .LBB28_16:
-; SOFT-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    mov r3, r5
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB28_11
+; SOFT-NEXT:    b .LBB28_12
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI28_0:
@@ -2636,74 +2719,77 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; SOFT:       @ %bb.0:
 ; SOFT-NEXT:    .save {r4, r5, r6, r7, lr}
 ; SOFT-NEXT:    push {r4, r5, r6, r7, lr}
-; SOFT-NEXT:    .pad #12
-; SOFT-NEXT:    sub sp, #12
+; SOFT-NEXT:    .pad #4
+; SOFT-NEXT:    sub sp, #4
 ; SOFT-NEXT:    uxth r0, r0
 ; SOFT-NEXT:    bl __aeabi_h2f
+; SOFT-NEXT:    mov r7, r0
+; SOFT-NEXT:    ldr r1, .LCPI29_0
+; SOFT-NEXT:    bl __aeabi_fcmpgt
 ; SOFT-NEXT:    mov r4, r0
 ; SOFT-NEXT:    movs r6, #0
+; SOFT-NEXT:    mov r0, r7
 ; SOFT-NEXT:    mov r1, r6
 ; SOFT-NEXT:    bl __aeabi_fcmpge
-; SOFT-NEXT:    mov r7, r0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __fixunssfti
 ; SOFT-NEXT:    mov r5, r0
-; SOFT-NEXT:    cmp r7, #0
-; SOFT-NEXT:    beq .LBB29_11
+; SOFT-NEXT:    mov r0, r7
+; SOFT-NEXT:    bl __fixunssfti
+; SOFT-NEXT:    mov r7, r1
+; SOFT-NEXT:    str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB29_2
 ; SOFT-NEXT:  @ %bb.1:
-; SOFT-NEXT:    beq .LBB29_12
+; SOFT-NEXT:    mov r0, r5
 ; SOFT-NEXT:  .LBB29_2:
+; SOFT-NEXT:    mvns r6, r6
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r3, r6
 ; SOFT-NEXT:    bne .LBB29_4
-; SOFT-NEXT:  .LBB29_3:
-; SOFT-NEXT:    mov r1, r7
+; SOFT-NEXT:  @ %bb.3:
+; SOFT-NEXT:    mov r3, r0
 ; SOFT-NEXT:  .LBB29_4:
-; SOFT-NEXT:    str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    bne .LBB29_6
 ; SOFT-NEXT:  @ %bb.5:
-; SOFT-NEXT:    mov r5, r7
+; SOFT-NEXT:    mov r7, r5
 ; SOFT-NEXT:  .LBB29_6:
-; SOFT-NEXT:    ldr r1, .LCPI29_0
-; SOFT-NEXT:    mov r0, r4
-; SOFT-NEXT:    bl __aeabi_fcmpgt
-; SOFT-NEXT:    mvns r3, r6
-; SOFT-NEXT:    cmp r0, #0
-; SOFT-NEXT:    mov r0, r3
-; SOFT-NEXT:    beq .LBB29_13
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r1, r6
+; SOFT-NEXT:    bne .LBB29_8
 ; SOFT-NEXT:  @ %bb.7:
-; SOFT-NEXT:    mov r1, r3
-; SOFT-NEXT:    beq .LBB29_14
+; SOFT-NEXT:    mov r1, r7
 ; SOFT-NEXT:  .LBB29_8:
-; SOFT-NEXT:    mov r2, r3
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB29_10
+; SOFT-NEXT:  @ %bb.9:
+; SOFT-NEXT:    mov r2, r5
+; SOFT-NEXT:  .LBB29_10:
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    mov r7, r6
 ; SOFT-NEXT:    beq .LBB29_15
-; SOFT-NEXT:  .LBB29_9:
+; SOFT-NEXT:  @ %bb.11:
+; SOFT-NEXT:    cmp r5, #0
 ; SOFT-NEXT:    beq .LBB29_16
-; SOFT-NEXT:  .LBB29_10:
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT:  .LBB29_11:
-; SOFT-NEXT:    mov r3, r7
-; SOFT-NEXT:    bne .LBB29_2
 ; SOFT-NEXT:  .LBB29_12:
-; SOFT-NEXT:    mov r2, r7
-; SOFT-NEXT:    beq .LBB29_3
-; SOFT-NEXT:    b .LBB29_4
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    bne .LBB29_14
 ; SOFT-NEXT:  .LBB29_13:
-; SOFT-NEXT:    mov r0, r5
-; SOFT-NEXT:    mov r1, r3
-; SOFT-NEXT:    bne .LBB29_8
+; SOFT-NEXT:    ldr r6, [sp] @ 4-byte Reload
 ; SOFT-NEXT:  .LBB29_14:
-; SOFT-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT:    mov r2, r3
-; SOFT-NEXT:    bne .LBB29_9
+; SOFT-NEXT:    mov r0, r3
+; SOFT-NEXT:    mov r2, r7
+; SOFT-NEXT:    mov r3, r6
+; SOFT-NEXT:    add sp, #4
+; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
 ; SOFT-NEXT:  .LBB29_15:
-; SOFT-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT:    bne .LBB29_10
+; SOFT-NEXT:    mov r7, r2
+; SOFT-NEXT:    cmp r5, #0
+; SOFT-NEXT:    bne .LBB29_12
 ; SOFT-NEXT:  .LBB29_16:
-; SOFT-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT:    add sp, #12
-; SOFT-NEXT:    pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT:    str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT:    cmp r4, #0
+; SOFT-NEXT:    beq .LBB29_13
+; SOFT-NEXT:    b .LBB29_14
 ; SOFT-NEXT:    .p2align 2
 ; SOFT-NEXT:  @ %bb.17:
 ; SOFT-NEXT:  .LCPI29_0:

diff  --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
index a1b6847d623d00..de5bd2a7040b99 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
@@ -71,12 +71,12 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    tst r2, #32
-; CHECK-NEXT:    mov r3, r0
+; CHECK-NEXT:    ands r3, r2, #32
 ; CHECK-NEXT:    and r12, r2, #31
+; CHECK-NEXT:    mov r3, r0
+; CHECK-NEXT:    mov r4, #31
 ; CHECK-NEXT:    movne r3, r1
 ; CHECK-NEXT:    movne r1, r0
-; CHECK-NEXT:    mov r4, #31
 ; CHECK-NEXT:    bic r2, r4, r2
 ; CHECK-NEXT:    lsl lr, r3, r12
 ; CHECK-NEXT:    lsr r0, r1, #1
@@ -206,7 +206,7 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
 define i64 @rotr_i64(i64 %x, i64 %z) {
 ; CHECK-LABEL: rotr_i64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    tst r2, #32
+; CHECK-NEXT:    ands r3, r2, #32
 ; CHECK-NEXT:    mov r3, r1
 ; CHECK-NEXT:    moveq r3, r0
 ; CHECK-NEXT:    moveq r0, r1

diff  --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll
index 191155ae30f3e3..5a7c4384428e1a 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift.ll
@@ -47,67 +47,69 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
 ; SCALAR-LABEL: fshl_i37:
 ; SCALAR:       @ %bb.0:
-; SCALAR-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; SCALAR-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; SCALAR-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; SCALAR-NEXT:    mov r8, r0
-; SCALAR-NEXT:    ldr r0, [sp, #28]
+; SCALAR-NEXT:    ldr r0, [sp, #36]
 ; SCALAR-NEXT:    mov r4, r1
-; SCALAR-NEXT:    mov r5, r3
+; SCALAR-NEXT:    mov r6, r3
 ; SCALAR-NEXT:    and r1, r0, #31
-; SCALAR-NEXT:    ldr r0, [sp, #24]
-; SCALAR-NEXT:    mov r6, r2
+; SCALAR-NEXT:    ldr r0, [sp, #32]
+; SCALAR-NEXT:    mov r9, r2
 ; SCALAR-NEXT:    mov r2, #37
 ; SCALAR-NEXT:    mov r3, #0
 ; SCALAR-NEXT:    bl __aeabi_uldivmod
-; SCALAR-NEXT:    lsl r0, r5, #27
-; SCALAR-NEXT:    tst r2, #32
-; SCALAR-NEXT:    orr r0, r0, r6, lsr #5
-; SCALAR-NEXT:    mov r1, r8
-; SCALAR-NEXT:    and r3, r2, #31
+; SCALAR-NEXT:    lsl r1, r6, #27
+; SCALAR-NEXT:    ands r0, r2, #32
+; SCALAR-NEXT:    orr r1, r1, r9, lsr #5
+; SCALAR-NEXT:    mov r3, r8
+; SCALAR-NEXT:    and r6, r2, #31
 ; SCALAR-NEXT:    mov r7, #31
-; SCALAR-NEXT:    movne r1, r0
-; SCALAR-NEXT:    lslne r0, r6, #27
+; SCALAR-NEXT:    movne r3, r1
+; SCALAR-NEXT:    cmp r0, #0
+; SCALAR-NEXT:    lslne r1, r9, #27
 ; SCALAR-NEXT:    bic r2, r7, r2
-; SCALAR-NEXT:    lsl r5, r1, r3
-; SCALAR-NEXT:    lsr r0, r0, #1
 ; SCALAR-NEXT:    movne r4, r8
-; SCALAR-NEXT:    lsr r1, r1, #1
-; SCALAR-NEXT:    lsl r3, r4, r3
+; SCALAR-NEXT:    lsl r5, r3, r6
+; SCALAR-NEXT:    lsr r0, r1, #1
+; SCALAR-NEXT:    lsl r1, r4, r6
+; SCALAR-NEXT:    lsr r3, r3, #1
 ; SCALAR-NEXT:    orr r0, r5, r0, lsr r2
-; SCALAR-NEXT:    orr r1, r3, r1, lsr r2
-; SCALAR-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; SCALAR-NEXT:    orr r1, r1, r3, lsr r2
+; SCALAR-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 ;
 ; NEON-LABEL: fshl_i37:
 ; NEON:       @ %bb.0:
-; NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; NEON-NEXT:    mov r4, r1
 ; NEON-NEXT:    ldr r1, [sp, #28]
-; NEON-NEXT:    mov r8, r0
+; NEON-NEXT:    mov r6, r0
 ; NEON-NEXT:    ldr r0, [sp, #24]
 ; NEON-NEXT:    and r1, r1, #31
 ; NEON-NEXT:    mov r5, r3
-; NEON-NEXT:    mov r6, r2
+; NEON-NEXT:    mov r7, r2
 ; NEON-NEXT:    mov r2, #37
 ; NEON-NEXT:    mov r3, #0
 ; NEON-NEXT:    bl __aeabi_uldivmod
+; NEON-NEXT:    mov r0, #31
+; NEON-NEXT:    bic r1, r0, r2
 ; NEON-NEXT:    lsl r0, r5, #27
-; NEON-NEXT:    tst r2, #32
-; NEON-NEXT:    orr r0, r0, r6, lsr #5
-; NEON-NEXT:    mov r1, r8
-; NEON-NEXT:    and r3, r2, #31
-; NEON-NEXT:    mov r7, #31
-; NEON-NEXT:    movne r1, r0
-; NEON-NEXT:    lslne r0, r6, #27
-; NEON-NEXT:    bic r2, r7, r2
-; NEON-NEXT:    lsl r5, r1, r3
+; NEON-NEXT:    ands r12, r2, #32
+; NEON-NEXT:    orr r0, r0, r7, lsr #5
+; NEON-NEXT:    mov r5, r6
+; NEON-NEXT:    and r2, r2, #31
+; NEON-NEXT:    movne r5, r0
+; NEON-NEXT:    lslne r0, r7, #27
+; NEON-NEXT:    cmp r12, #0
+; NEON-NEXT:    lsl r3, r5, r2
 ; NEON-NEXT:    lsr r0, r0, #1
-; NEON-NEXT:    movne r4, r8
-; NEON-NEXT:    lsr r1, r1, #1
-; NEON-NEXT:    lsl r3, r4, r3
-; NEON-NEXT:    orr r0, r5, r0, lsr r2
-; NEON-NEXT:    orr r1, r3, r1, lsr r2
-; NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; NEON-NEXT:    movne r4, r6
+; NEON-NEXT:    orr r0, r3, r0, lsr r1
+; NEON-NEXT:    lsr r3, r5, #1
+; NEON-NEXT:    lsl r2, r4, r2
+; NEON-NEXT:    orr r1, r2, r3, lsr r1
+; NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
   ret i37 %f
 }
@@ -235,69 +237,71 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 ; SCALAR-LABEL: fshr_i37:
 ; SCALAR:       @ %bb.0:
-; SCALAR-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; SCALAR-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; SCALAR-NEXT:    mov r5, r0
+; SCALAR-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT:    mov r8, r0
 ; SCALAR-NEXT:    ldr r0, [sp, #28]
 ; SCALAR-NEXT:    mov r4, r1
-; SCALAR-NEXT:    mov r6, r3
+; SCALAR-NEXT:    mov r5, r3
 ; SCALAR-NEXT:    and r1, r0, #31
 ; SCALAR-NEXT:    ldr r0, [sp, #24]
 ; SCALAR-NEXT:    mov r7, r2
 ; SCALAR-NEXT:    mov r2, #37
 ; SCALAR-NEXT:    mov r3, #0
 ; SCALAR-NEXT:    bl __aeabi_uldivmod
+; SCALAR-NEXT:    lsl r3, r5, #27
 ; SCALAR-NEXT:    add r0, r2, #27
-; SCALAR-NEXT:    lsl r2, r6, #27
-; SCALAR-NEXT:    orr r2, r2, r7, lsr #5
+; SCALAR-NEXT:    orr r3, r3, r7, lsr #5
+; SCALAR-NEXT:    ands r2, r0, #32
+; SCALAR-NEXT:    mov r5, r8
 ; SCALAR-NEXT:    mov r1, #31
-; SCALAR-NEXT:    tst r0, #32
-; SCALAR-NEXT:    mov r3, r5
-; SCALAR-NEXT:    moveq r3, r2
-; SCALAR-NEXT:    lsleq r2, r7, #27
+; SCALAR-NEXT:    moveq r5, r3
+; SCALAR-NEXT:    lsleq r3, r7, #27
+; SCALAR-NEXT:    cmp r2, #0
 ; SCALAR-NEXT:    bic r1, r1, r0
+; SCALAR-NEXT:    moveq r4, r8
+; SCALAR-NEXT:    lsl r6, r5, #1
 ; SCALAR-NEXT:    and r7, r0, #31
-; SCALAR-NEXT:    lsl r6, r3, #1
-; SCALAR-NEXT:    moveq r4, r5
-; SCALAR-NEXT:    lsl r6, r6, r1
-; SCALAR-NEXT:    orr r0, r6, r2, lsr r7
 ; SCALAR-NEXT:    lsl r2, r4, #1
+; SCALAR-NEXT:    lsl r6, r6, r1
 ; SCALAR-NEXT:    lsl r1, r2, r1
-; SCALAR-NEXT:    orr r1, r1, r3, lsr r7
-; SCALAR-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; SCALAR-NEXT:    orr r0, r6, r3, lsr r7
+; SCALAR-NEXT:    orr r1, r1, r5, lsr r7
+; SCALAR-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 ;
 ; NEON-LABEL: fshr_i37:
 ; NEON:       @ %bb.0:
-; NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
 ; NEON-NEXT:    mov r4, r1
 ; NEON-NEXT:    ldr r1, [sp, #28]
-; NEON-NEXT:    mov r5, r0
+; NEON-NEXT:    mov r8, r0
 ; NEON-NEXT:    ldr r0, [sp, #24]
 ; NEON-NEXT:    and r1, r1, #31
-; NEON-NEXT:    mov r6, r3
+; NEON-NEXT:    mov r5, r3
 ; NEON-NEXT:    mov r7, r2
 ; NEON-NEXT:    mov r2, #37
 ; NEON-NEXT:    mov r3, #0
 ; NEON-NEXT:    bl __aeabi_uldivmod
+; NEON-NEXT:    lsl r3, r5, #27
 ; NEON-NEXT:    add r0, r2, #27
-; NEON-NEXT:    lsl r2, r6, #27
-; NEON-NEXT:    orr r2, r2, r7, lsr #5
+; NEON-NEXT:    orr r3, r3, r7, lsr #5
+; NEON-NEXT:    ands r2, r0, #32
+; NEON-NEXT:    mov r5, r8
 ; NEON-NEXT:    mov r1, #31
-; NEON-NEXT:    tst r0, #32
-; NEON-NEXT:    mov r3, r5
-; NEON-NEXT:    moveq r3, r2
-; NEON-NEXT:    lsleq r2, r7, #27
+; NEON-NEXT:    moveq r5, r3
+; NEON-NEXT:    lsleq r3, r7, #27
+; NEON-NEXT:    cmp r2, #0
 ; NEON-NEXT:    bic r1, r1, r0
+; NEON-NEXT:    moveq r4, r8
+; NEON-NEXT:    lsl r6, r5, #1
 ; NEON-NEXT:    and r7, r0, #31
-; NEON-NEXT:    lsl r6, r3, #1
-; NEON-NEXT:    moveq r4, r5
-; NEON-NEXT:    lsl r6, r6, r1
-; NEON-NEXT:    orr r0, r6, r2, lsr r7
 ; NEON-NEXT:    lsl r2, r4, #1
+; NEON-NEXT:    lsl r6, r6, r1
 ; NEON-NEXT:    lsl r1, r2, r1
-; NEON-NEXT:    orr r1, r1, r3, lsr r7
-; NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; NEON-NEXT:    orr r0, r6, r3, lsr r7
+; NEON-NEXT:    orr r1, r1, r5, lsr r7
+; NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
   ret i37 %f
 }

diff  --git a/llvm/test/CodeGen/ARM/ifcvt1.ll b/llvm/test/CodeGen/ARM/ifcvt1.ll
index 6d59869bc102ee..d419cbc48fc488 100644
--- a/llvm/test/CodeGen/ARM/ifcvt1.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt1.ll
@@ -13,10 +13,10 @@ define i32 @t1(i32 %a, i32 %b) {
 ;
 ; SWIFT-LABEL: t1:
 ; SWIFT:       @ %bb.0: @ %common.ret
+; SWIFT-NEXT:    mov r2, #1
 ; SWIFT-NEXT:    cmp r0, #0
-; SWIFT-NEXT:    mov r0, #1
-; SWIFT-NEXT:    mvneq r0, #0
-; SWIFT-NEXT:    add r0, r1, r0
+; SWIFT-NEXT:    mvneq r2, #0
+; SWIFT-NEXT:    add r0, r1, r2
 ; SWIFT-NEXT:    bx lr
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true

diff  --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index dd33b09fe83004..4003af5d44be81 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -494,14 +494,14 @@ define <4 x float> @fminnumv432_intrinsic(<4 x float> %x, <4 x float> %y) {
 ; ARMV7-NEXT:    vld1.64 {d0, d1}, [r12]
 ; ARMV7-NEXT:    vmov d3, r2, r3
 ; ARMV7-NEXT:    vmov d2, r0, r1
-; ARMV7-NEXT:    vcmp.f32 s6, s2
-; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vcmp.f32 s7, s3
-; ARMV7-NEXT:    vmovlt.f32 s2, s6
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT:    vcmp.f32 s5, s1
+; ARMV7-NEXT:    vcmp.f32 s6, s2
 ; ARMV7-NEXT:    vmovlt.f32 s3, s7
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
+; ARMV7-NEXT:    vcmp.f32 s5, s1
+; ARMV7-NEXT:    vmovlt.f32 s2, s6
+; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vcmp.f32 s4, s0
 ; ARMV7-NEXT:    vmovlt.f32 s1, s5
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
@@ -676,14 +676,14 @@ define <4 x float> @fmaxnumv432_intrinsic(<4 x float> %x, <4 x float> %y) {
 ; ARMV7-NEXT:    vld1.64 {d0, d1}, [r12]
 ; ARMV7-NEXT:    vmov d3, r2, r3
 ; ARMV7-NEXT:    vmov d2, r0, r1
-; ARMV7-NEXT:    vcmp.f32 s6, s2
-; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vcmp.f32 s7, s3
-; ARMV7-NEXT:    vmovgt.f32 s2, s6
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT:    vcmp.f32 s5, s1
+; ARMV7-NEXT:    vcmp.f32 s6, s2
 ; ARMV7-NEXT:    vmovgt.f32 s3, s7
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
+; ARMV7-NEXT:    vcmp.f32 s5, s1
+; ARMV7-NEXT:    vmovgt.f32 s2, s6
+; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vcmp.f32 s4, s0
 ; ARMV7-NEXT:    vmovgt.f32 s1, s5
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
@@ -760,16 +760,16 @@ define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) {
 ; ARMV7-NEXT:    vmov d3, r2, r3
 ; ARMV7-NEXT:    vldr s0, .LCPI21_0
 ; ARMV7-NEXT:    vmov d2, r0, r1
-; ARMV7-NEXT:    vcmp.f32 s6, #0
-; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT:    vmov.f32 s2, s0
 ; ARMV7-NEXT:    vcmp.f32 s7, #0
-; ARMV7-NEXT:    vmovgt.f32 s2, s6
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vmov.f32 s3, s0
-; ARMV7-NEXT:    vcmp.f32 s5, #0
+; ARMV7-NEXT:    vcmp.f32 s6, #0
 ; ARMV7-NEXT:    vmovgt.f32 s3, s7
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
+; ARMV7-NEXT:    vmov.f32 s2, s0
+; ARMV7-NEXT:    vcmp.f32 s5, #0
+; ARMV7-NEXT:    vmovgt.f32 s2, s6
+; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vmov.f32 s1, s0
 ; ARMV7-NEXT:    vcmp.f32 s4, #0
 ; ARMV7-NEXT:    vmovgt.f32 s1, s5
@@ -812,18 +812,18 @@ define <4 x float> @fmaxnumv432_minus_zero_intrinsic(<4 x float> %x) {
 ; ARMV7-NEXT:    vldr s0, .LCPI22_0
 ; ARMV7-NEXT:    vmov d3, r2, r3
 ; ARMV7-NEXT:    vmov d2, r0, r1
-; ARMV7-NEXT:    vcmp.f32 s6, s0
-; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vcmp.f32 s7, s0
-; ARMV7-NEXT:    vmov.f32 s2, s0
-; ARMV7-NEXT:    vmovgt.f32 s2, s6
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT:    vcmp.f32 s5, s0
 ; ARMV7-NEXT:    vmov.f32 s3, s0
+; ARMV7-NEXT:    vcmp.f32 s6, s0
 ; ARMV7-NEXT:    vmovgt.f32 s3, s7
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT:    vcmp.f32 s4, s0
+; ARMV7-NEXT:    vmov.f32 s2, s0
+; ARMV7-NEXT:    vcmp.f32 s5, s0
+; ARMV7-NEXT:    vmovgt.f32 s2, s6
+; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vmov.f32 s1, s0
+; ARMV7-NEXT:    vcmp.f32 s4, s0
 ; ARMV7-NEXT:    vmovgt.f32 s1, s5
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV7-NEXT:    vmovgt.f32 s0, s4
@@ -933,8 +933,8 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
 ; ARMV8M-NEXT:    vselgt.f64 d0, d0, d2
 ; ARMV8M-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV8M-NEXT:    vmov r0, r1, d0
-; ARMV8M-NEXT:    vselgt.f64 d0, d1, d3
-; ARMV8M-NEXT:    vmov r2, r3, d0
+; ARMV8M-NEXT:    vselgt.f64 d1, d1, d3
+; ARMV8M-NEXT:    vmov r2, r3, d1
 ; ARMV8M-NEXT:    bx lr
   %a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y)
   ret <2 x double> %a
@@ -981,8 +981,8 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
 ; ARMV8M-NEXT:    vselgt.f64 d0, d0, d2
 ; ARMV8M-NEXT:    vmrs APSR_nzcv, fpscr
 ; ARMV8M-NEXT:    vmov r0, r1, d0
-; ARMV8M-NEXT:    vselgt.f64 d0, d1, d3
-; ARMV8M-NEXT:    vmov r2, r3, d0
+; ARMV8M-NEXT:    vselgt.f64 d1, d1, d3
+; ARMV8M-NEXT:    vmov r2, r3, d1
 ; ARMV8M-NEXT:    bx lr
   %a = call nnan nsz <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y)
   ret <2 x double> %a
@@ -1225,26 +1225,26 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) {
 ; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic:
 ; ARMV8M:       @ %bb.0:
 ; ARMV8M-NEXT:    vmov d2, r0, r1
-; ARMV8M-NEXT:    vldr d0, .LCPI30_0
+; ARMV8M-NEXT:    vldr d1, .LCPI30_1
 ; ARMV8M-NEXT:    vcmp.f64 d2, #0
 ; ARMV8M-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT:    vmov d1, r2, r3
-; ARMV8M-NEXT:    vcmp.f64 d1, d0
-; ARMV8M-NEXT:    vldr d3, .LCPI30_1
-; ARMV8M-NEXT:    vselgt.f64 d2, d2, d3
+; ARMV8M-NEXT:    vmov d3, r2, r3
+; ARMV8M-NEXT:    vcmp.f64 d3, d1
+; ARMV8M-NEXT:    vldr d0, .LCPI30_0
+; ARMV8M-NEXT:    vselgt.f64 d0, d2, d0
 ; ARMV8M-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT:    vmov r0, r1, d2
-; ARMV8M-NEXT:    vselgt.f64 d0, d1, d0
-; ARMV8M-NEXT:    vmov r2, r3, d0
+; ARMV8M-NEXT:    vmov r0, r1, d0
+; ARMV8M-NEXT:    vselgt.f64 d1, d3, d1
+; ARMV8M-NEXT:    vmov r2, r3, d1
 ; ARMV8M-NEXT:    bx lr
 ; ARMV8M-NEXT:    .p2align 3
 ; ARMV8M-NEXT:  @ %bb.1:
 ; ARMV8M-NEXT:  .LCPI30_0:
-; ARMV8M-NEXT:    .long 0 @ double -0
-; ARMV8M-NEXT:    .long 2147483648
-; ARMV8M-NEXT:  .LCPI30_1:
 ; ARMV8M-NEXT:    .long 0 @ double 0
 ; ARMV8M-NEXT:    .long 0
+; ARMV8M-NEXT:  .LCPI30_1:
+; ARMV8M-NEXT:    .long 0 @ double -0
+; ARMV8M-NEXT:    .long 2147483648
   %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double><double 0.0, double -0.0>)
   ret <2 x double> %a
 }

diff  --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll
index ffc72b242f829f..8a268d46304cf9 100644
--- a/llvm/test/CodeGen/ARM/neon_vabd.ll
+++ b/llvm/test/CodeGen/ARM/neon_vabd.ll
@@ -144,25 +144,25 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    vmov r0, r1, d1
+; CHECK-NEXT:    vmov r0, r12, d0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r12, lr, d0
-; CHECK-NEXT:    vmov r4, r5, d2
+; CHECK-NEXT:    vmov r2, r3, d2
+; CHECK-NEXT:    vmov r1, lr, d1
+; CHECK-NEXT:    vmov r4, r5, d3
 ; CHECK-NEXT:    vsub.i64 q8, q0, q1
 ; CHECK-NEXT:    subs r0, r2, r0
-; CHECK-NEXT:    sbcs r0, r3, r1
+; CHECK-NEXT:    sbcs r0, r3, r12
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    subs r1, r4, r12
+; CHECK-NEXT:    subs r1, r4, r1
 ; CHECK-NEXT:    sbcs r1, r5, lr
-; CHECK-NEXT:    vdup.32 d19, r0
 ; CHECK-NEXT:    movwlt r6, #1
 ; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    mvnne r6, #0
-; CHECK-NEXT:    vdup.32 d18, r6
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vdup.32 d19, r6
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d18, r0
 ; CHECK-NEXT:    veor q8, q8, q9
 ; CHECK-NEXT:    vsub.i64 q0, q9, q8
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
@@ -475,25 +475,25 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    vmov r0, r1, d1
+; CHECK-NEXT:    vmov r0, r12, d0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r12, lr, d0
-; CHECK-NEXT:    vmov r4, r5, d2
+; CHECK-NEXT:    vmov r2, r3, d2
+; CHECK-NEXT:    vmov r1, lr, d1
+; CHECK-NEXT:    vmov r4, r5, d3
 ; CHECK-NEXT:    vsub.i64 q8, q0, q1
 ; CHECK-NEXT:    subs r0, r2, r0
-; CHECK-NEXT:    sbcs r0, r3, r1
+; CHECK-NEXT:    sbcs r0, r3, r12
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movwlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    subs r1, r4, r12
+; CHECK-NEXT:    subs r1, r4, r1
 ; CHECK-NEXT:    sbcs r1, r5, lr
-; CHECK-NEXT:    vdup.32 d19, r0
 ; CHECK-NEXT:    movwlt r6, #1
 ; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    mvnne r6, #0
-; CHECK-NEXT:    vdup.32 d18, r6
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vdup.32 d19, r6
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d18, r0
 ; CHECK-NEXT:    veor q8, q8, q9
 ; CHECK-NEXT:    vsub.i64 q0, q9, q8
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}

diff  --git a/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
index 2631189979e37e..198927d1da3a45 100644
--- a/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
+++ b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
@@ -164,9 +164,9 @@ cont2:
 
 define void @extern_loop(i32 %n) local_unnamed_addr #0 {
 ; Do not replace the compare around the clobbering call.
-; CHECK: bl external_fn
-; CHECK-NEXT: adds
-; CHECK-NEXT: bvs
+; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1
+; CHECK-NEXT: bl external_fn
+; CHECK: cmp
 entry:
   %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1)
   %1 = extractvalue { i32, i1 } %0, 1

diff  --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index b8f7a2daaeabab..0060b4458081bc 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -72,21 +72,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T16-NEXT:    adcs r3, r4
 ; CHECK-T16-NEXT:    eors r4, r3
 ; CHECK-T16-NEXT:    bics r4, r1
-; CHECK-T16-NEXT:    asrs r0, r3, #31
-; CHECK-T16-NEXT:    movs r1, #1
-; CHECK-T16-NEXT:    lsls r1, r1, #31
-; CHECK-T16-NEXT:    eors r1, r0
+; CHECK-T16-NEXT:    asrs r1, r3, #31
 ; CHECK-T16-NEXT:    cmp r4, #0
-; CHECK-T16-NEXT:    bpl .LBB1_3
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:    bmi .LBB1_2
 ; CHECK-T16-NEXT:  @ %bb.1:
-; CHECK-T16-NEXT:    bpl .LBB1_4
+; CHECK-T16-NEXT:    mov r0, r2
 ; CHECK-T16-NEXT:  .LBB1_2:
+; CHECK-T16-NEXT:    cmp r4, #0
+; CHECK-T16-NEXT:    bmi .LBB1_4
+; CHECK-T16-NEXT:  @ %bb.3:
+; CHECK-T16-NEXT:    mov r1, r3
 ; CHECK-T16-NEXT:    pop {r4, pc}
-; CHECK-T16-NEXT:  .LBB1_3:
-; CHECK-T16-NEXT:    mov r0, r2
-; CHECK-T16-NEXT:    bmi .LBB1_2
 ; CHECK-T16-NEXT:  .LBB1_4:
-; CHECK-T16-NEXT:    mov r1, r3
+; CHECK-T16-NEXT:    movs r2, #1
+; CHECK-T16-NEXT:    lsls r2, r2, #31
+; CHECK-T16-NEXT:    eors r1, r2
 ; CHECK-T16-NEXT:    pop {r4, pc}
 ;
 ; CHECK-T2-LABEL: func2:
@@ -127,22 +128,23 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T15TE-NEXT:    adcs r3, r4
 ; CHECK-T15TE-NEXT:    eors r4, r3
 ; CHECK-T15TE-NEXT:    bics r4, r1
-; CHECK-T15TE-NEXT:    asrs r0, r3, #31
-; CHECK-T15TE-NEXT:    movs r1, #1
-; CHECK-T15TE-NEXT:    lsls r1, r1, #31
-; CHECK-T15TE-NEXT:    eors r1, r0
+; CHECK-T15TE-NEXT:    asrs r1, r3, #31
 ; CHECK-T15TE-NEXT:    cmp r4, #0
-; CHECK-T15TE-NEXT:    bpl .LBB1_3
+; CHECK-T15TE-NEXT:    mov r12, r1
+; CHECK-T15TE-NEXT:    mov r0, r12
+; CHECK-T15TE-NEXT:    bmi .LBB1_2
 ; CHECK-T15TE-NEXT:  @ %bb.1:
-; CHECK-T15TE-NEXT:    bpl .LBB1_4
+; CHECK-T15TE-NEXT:    movs r0, r2
 ; CHECK-T15TE-NEXT:  .LBB1_2:
+; CHECK-T15TE-NEXT:    cmp r4, #0
+; CHECK-T15TE-NEXT:    bmi .LBB1_4
+; CHECK-T15TE-NEXT:  @ %bb.3:
+; CHECK-T15TE-NEXT:    movs r1, r3
 ; CHECK-T15TE-NEXT:    pop {r4, pc}
-; CHECK-T15TE-NEXT:  .LBB1_3:
-; CHECK-T15TE-NEXT:    mov r12, r2
-; CHECK-T15TE-NEXT:    mov r0, r12
-; CHECK-T15TE-NEXT:    bmi .LBB1_2
 ; CHECK-T15TE-NEXT:  .LBB1_4:
-; CHECK-T15TE-NEXT:    movs r1, r3
+; CHECK-T15TE-NEXT:    movs r2, #1
+; CHECK-T15TE-NEXT:    lsls r2, r2, #31
+; CHECK-T15TE-NEXT:    eors r1, r2
 ; CHECK-T15TE-NEXT:    pop {r4, pc}
   %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y)
   ret i64 %tmp

diff  --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
index 0ddb64fc3f2d13..859aedc7a3f019 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
@@ -63,22 +63,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T1-NEXT:    adcs r3, r1
 ; CHECK-T1-NEXT:    eors r1, r3
 ; CHECK-T1-NEXT:    bics r1, r2
-; CHECK-T1-NEXT:    asrs r0, r3, #31
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    lsls r2, r2, #31
-; CHECK-T1-NEXT:    eors r2, r0
+; CHECK-T1-NEXT:    asrs r2, r3, #31
 ; CHECK-T1-NEXT:    cmp r1, #0
-; CHECK-T1-NEXT:    bpl .LBB1_3
+; CHECK-T1-NEXT:    mov r0, r2
+; CHECK-T1-NEXT:    bmi .LBB1_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    bpl .LBB1_4
+; CHECK-T1-NEXT:    mov r0, r4
 ; CHECK-T1-NEXT:  .LBB1_2:
-; CHECK-T1-NEXT:    mov r1, r2
+; CHECK-T1-NEXT:    cmp r1, #0
+; CHECK-T1-NEXT:    bmi .LBB1_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r1, r3
 ; CHECK-T1-NEXT:    pop {r4, pc}
-; CHECK-T1-NEXT:  .LBB1_3:
-; CHECK-T1-NEXT:    mov r0, r4
-; CHECK-T1-NEXT:    bmi .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
-; CHECK-T1-NEXT:    mov r2, r3
+; CHECK-T1-NEXT:    movs r1, #1
+; CHECK-T1-NEXT:    lsls r1, r1, #31
+; CHECK-T1-NEXT:    eors r2, r1
 ; CHECK-T1-NEXT:    mov r1, r2
 ; CHECK-T1-NEXT:    pop {r4, pc}
 ;

diff  --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939e..496a6c0f5acbbe 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -320,11 +320,11 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ;
 ; CHECK-VFP-LABEL: f10:
 ; CHECK-VFP:       @ %bb.0:
-; CHECK-VFP-NEXT:    vmov.f32 s0, #1.000000e+00
-; CHECK-VFP-NEXT:    vldr s2, .LCPI9_0
+; CHECK-VFP-NEXT:    vmov.f32 s2, #1.000000e+00
+; CHECK-VFP-NEXT:    vldr s0, .LCPI9_0
 ; CHECK-VFP-NEXT:    cmp r0, r1
-; CHECK-VFP-NEXT:    vmoveq.f32 s2, s0
-; CHECK-VFP-NEXT:    vmov r0, s2
+; CHECK-VFP-NEXT:    vmoveq.f32 s0, s2
+; CHECK-VFP-NEXT:    vmov r0, s0
 ; CHECK-VFP-NEXT:    bx lr
 ; CHECK-VFP-NEXT:    .p2align 2
 ; CHECK-VFP-NEXT:  @ %bb.1:
@@ -333,12 +333,12 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ;
 ; CHECK-NEON-LABEL: f10:
 ; CHECK-NEON:       @ %bb.0:
-; CHECK-NEON-NEXT:    vldr s2, LCPI9_0
-; CHECK-NEON-NEXT:    vmov.f32 s0, #1.000000e+00
+; CHECK-NEON-NEXT:    vldr s0, LCPI9_0
+; CHECK-NEON-NEXT:    vmov.f32 s2, #1.000000e+00
 ; CHECK-NEON-NEXT:    cmp r0, r1
 ; CHECK-NEON-NEXT:    it eq
-; CHECK-NEON-NEXT:    vmoveq.f32 s2, s0
-; CHECK-NEON-NEXT:    vmov r0, s2
+; CHECK-NEON-NEXT:    vmoveq.f32 s0, s2
+; CHECK-NEON-NEXT:    vmov r0, s0
 ; CHECK-NEON-NEXT:    bx lr
 ; CHECK-NEON-NEXT:    .p2align 2
 ; CHECK-NEON-NEXT:  @ %bb.1:
@@ -364,11 +364,11 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ;
 ; CHECK-VFP-LABEL: f11:
 ; CHECK-VFP:       @ %bb.0:
-; CHECK-VFP-NEXT:    vmov.f32 s0, #-1.000000e+00
-; CHECK-VFP-NEXT:    vldr s2, .LCPI10_0
+; CHECK-VFP-NEXT:    vmov.f32 s2, #-1.000000e+00
+; CHECK-VFP-NEXT:    vldr s0, .LCPI10_0
 ; CHECK-VFP-NEXT:    cmp r0, r1
-; CHECK-VFP-NEXT:    vmoveq.f32 s2, s0
-; CHECK-VFP-NEXT:    vmov r0, s2
+; CHECK-VFP-NEXT:    vmoveq.f32 s0, s2
+; CHECK-VFP-NEXT:    vmov r0, s0
 ; CHECK-VFP-NEXT:    bx lr
 ; CHECK-VFP-NEXT:    .p2align 2
 ; CHECK-VFP-NEXT:  @ %bb.1:
@@ -377,12 +377,12 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ;
 ; CHECK-NEON-LABEL: f11:
 ; CHECK-NEON:       @ %bb.0:
-; CHECK-NEON-NEXT:    vldr s2, LCPI10_0
-; CHECK-NEON-NEXT:    vmov.f32 s0, #-1.000000e+00
+; CHECK-NEON-NEXT:    vldr s0, LCPI10_0
+; CHECK-NEON-NEXT:    vmov.f32 s2, #-1.000000e+00
 ; CHECK-NEON-NEXT:    cmp r0, r1
 ; CHECK-NEON-NEXT:    it eq
-; CHECK-NEON-NEXT:    vmoveq.f32 s2, s0
-; CHECK-NEON-NEXT:    vmov r0, s2
+; CHECK-NEON-NEXT:    vmoveq.f32 s0, s2
+; CHECK-NEON-NEXT:    vmov r0, s0
 ; CHECK-NEON-NEXT:    bx lr
 ; CHECK-NEON-NEXT:    .p2align 2
 ; CHECK-NEON-NEXT:  @ %bb.1:
@@ -406,11 +406,11 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ;
 ; CHECK-VFP-LABEL: f12:
 ; CHECK-VFP:       @ %bb.0:
-; CHECK-VFP-NEXT:    vmov.f32 s0, #1.000000e+00
-; CHECK-VFP-NEXT:    vldr s2, .LCPI11_0
+; CHECK-VFP-NEXT:    vmov.f32 s2, #1.000000e+00
+; CHECK-VFP-NEXT:    vldr s0, .LCPI11_0
 ; CHECK-VFP-NEXT:    cmp r0, r1
-; CHECK-VFP-NEXT:    vmoveq.f32 s2, s0
-; CHECK-VFP-NEXT:    vmov r0, s2
+; CHECK-VFP-NEXT:    vmoveq.f32 s0, s2
+; CHECK-VFP-NEXT:    vmov r0, s0
 ; CHECK-VFP-NEXT:    bx lr
 ; CHECK-VFP-NEXT:    .p2align 2
 ; CHECK-VFP-NEXT:  @ %bb.1:
@@ -419,12 +419,12 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ;
 ; CHECK-NEON-LABEL: f12:
 ; CHECK-NEON:       @ %bb.0:
-; CHECK-NEON-NEXT:    vldr s2, LCPI11_0
-; CHECK-NEON-NEXT:    vmov.f32 s0, #1.000000e+00
+; CHECK-NEON-NEXT:    vldr s0, LCPI11_0
+; CHECK-NEON-NEXT:    vmov.f32 s2, #1.000000e+00
 ; CHECK-NEON-NEXT:    cmp r0, r1
 ; CHECK-NEON-NEXT:    it eq
-; CHECK-NEON-NEXT:    vmoveq.f32 s2, s0
-; CHECK-NEON-NEXT:    vmov r0, s2
+; CHECK-NEON-NEXT:    vmoveq.f32 s0, s2
+; CHECK-NEON-NEXT:    vmov r0, s0
 ; CHECK-NEON-NEXT:    bx lr
 ; CHECK-NEON-NEXT:    .p2align 2
 ; CHECK-NEON-NEXT:  @ %bb.1:

diff  --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index 180daa12e7c52d..df95af313eac66 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -645,13 +645,12 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
 ; THUMB2-NEXT:    push {r7, lr}
 ; THUMB2-NEXT:    ands r12, r0, #1
 ; THUMB2-NEXT:    mov.w lr, #1
-; THUMB2-NEXT:    it ne
-; THUMB2-NEXT:    movne.w r12, #1
-; THUMB2-NEXT:    it ne
+; THUMB2-NEXT:    itt ne
 ; THUMB2-NEXT:    movne.w lr, #65536
+; THUMB2-NEXT:    movne.w r12, #1
 ; THUMB2-NEXT:    subs.w r0, lr, #1
-; THUMB2-NEXT:    eor r3, r3, #1
 ; THUMB2-NEXT:    sbc r1, r12, #0
+; THUMB2-NEXT:    eor r3, r3, #1
 ; THUMB2-NEXT:    eor r2, r2, #65537
 ; THUMB2-NEXT:    orrs r2, r3
 ; THUMB2-NEXT:    itt ne
@@ -689,12 +688,11 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
 ; THUMB-NEXT:    ldr r6, .LCPI24_0
 ; THUMB-NEXT:    eors r2, r6
 ; THUMB-NEXT:    orrs r2, r3
-; THUMB-NEXT:    cmp r2, #0
 ; THUMB-NEXT:    beq .LBB24_5
 ; THUMB-NEXT:  @ %bb.4:
-; THUMB-NEXT:    mov r12, r4
-; THUMB-NEXT:    mov r1, r12
+; THUMB-NEXT:    movs r1, r4
 ; THUMB-NEXT:  .LBB24_5:
+; THUMB-NEXT:    cmp r2, #0
 ; THUMB-NEXT:    beq .LBB24_7
 ; THUMB-NEXT:  @ %bb.6:
 ; THUMB-NEXT:    movs r0, r5

diff  --git a/llvm/test/CodeGen/ARM/shift-i64.ll b/llvm/test/CodeGen/ARM/shift-i64.ll
index c326ac1529b2f5..33e0ba1457e72b 100644
--- a/llvm/test/CodeGen/ARM/shift-i64.ll
+++ b/llvm/test/CodeGen/ARM/shift-i64.ll
@@ -52,14 +52,14 @@ define i64 @test_lshr(i64 %val, i64 %amt) {
 define i64 @test_ashr(i64 %val, i64 %amt) {
 ; CHECK-LABEL: test_ashr:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    rsb r3, r2, #32
+; CHECK-NEXT:    asr r3, r1, r2
+; CHECK-NEXT:    subs r12, r2, #32
 ; CHECK-NEXT:    lsr r0, r0, r2
-; CHECK-NEXT:    orr r0, r0, r1, lsl r3
-; CHECK-NEXT:    subs r3, r2, #32
-; CHECK-NEXT:    asr r2, r1, r2
-; CHECK-NEXT:    asrpl r2, r1, #31
-; CHECK-NEXT:    asrpl r0, r1, r3
-; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    rsb r2, r2, #32
+; CHECK-NEXT:    asrpl r3, r1, #31
+; CHECK-NEXT:    orr r0, r0, r1, lsl r2
+; CHECK-NEXT:    asrpl r0, r1, r12
+; CHECK-NEXT:    mov r1, r3
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; EXPAND-LABEL: test_ashr:

diff  --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll
index 0978bfd1f0140b..1bafba3b49ed7d 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat.ll
@@ -71,21 +71,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T1-NEXT:    sbcs r2, r3
 ; CHECK-T1-NEXT:    eors r4, r2
 ; CHECK-T1-NEXT:    ands r4, r1
-; CHECK-T1-NEXT:    asrs r0, r2, #31
-; CHECK-T1-NEXT:    movs r1, #1
-; CHECK-T1-NEXT:    lsls r1, r1, #31
-; CHECK-T1-NEXT:    eors r1, r0
+; CHECK-T1-NEXT:    asrs r1, r2, #31
 ; CHECK-T1-NEXT:    cmp r4, #0
-; CHECK-T1-NEXT:    bpl .LBB1_3
+; CHECK-T1-NEXT:    mov r0, r1
+; CHECK-T1-NEXT:    bmi .LBB1_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    bpl .LBB1_4
+; CHECK-T1-NEXT:    mov r0, r5
 ; CHECK-T1-NEXT:  .LBB1_2:
+; CHECK-T1-NEXT:    cmp r4, #0
+; CHECK-T1-NEXT:    bmi .LBB1_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r1, r2
 ; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-T1-NEXT:  .LBB1_3:
-; CHECK-T1-NEXT:    mov r0, r5
-; CHECK-T1-NEXT:    bmi .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
-; CHECK-T1-NEXT:    mov r1, r2
+; CHECK-T1-NEXT:    movs r2, #1
+; CHECK-T1-NEXT:    lsls r2, r2, #31
+; CHECK-T1-NEXT:    eors r1, r2
 ; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; CHECK-T2-LABEL: func2:

diff  --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
index adf6cafc6ccb87..0a2d1f0e7a240e 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
@@ -65,22 +65,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T1-NEXT:    sbcs r3, r2
 ; CHECK-T1-NEXT:    eors r1, r3
 ; CHECK-T1-NEXT:    ands r1, r5
-; CHECK-T1-NEXT:    asrs r0, r3, #31
-; CHECK-T1-NEXT:    movs r2, #1
-; CHECK-T1-NEXT:    lsls r2, r2, #31
-; CHECK-T1-NEXT:    eors r2, r0
+; CHECK-T1-NEXT:    asrs r2, r3, #31
 ; CHECK-T1-NEXT:    cmp r1, #0
-; CHECK-T1-NEXT:    bpl .LBB1_3
+; CHECK-T1-NEXT:    mov r0, r2
+; CHECK-T1-NEXT:    bmi .LBB1_2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    bpl .LBB1_4
+; CHECK-T1-NEXT:    mov r0, r4
 ; CHECK-T1-NEXT:  .LBB1_2:
-; CHECK-T1-NEXT:    mov r1, r2
+; CHECK-T1-NEXT:    cmp r1, #0
+; CHECK-T1-NEXT:    bmi .LBB1_4
+; CHECK-T1-NEXT:  @ %bb.3:
+; CHECK-T1-NEXT:    mov r1, r3
 ; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-T1-NEXT:  .LBB1_3:
-; CHECK-T1-NEXT:    mov r0, r4
-; CHECK-T1-NEXT:    bmi .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
-; CHECK-T1-NEXT:    mov r2, r3
+; CHECK-T1-NEXT:    movs r1, #1
+; CHECK-T1-NEXT:    lsls r1, r1, #31
+; CHECK-T1-NEXT:    eors r2, r1
 ; CHECK-T1-NEXT:    mov r1, r2
 ; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
 ;

diff  --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
index fb966c29f39a2e..046bbbde686426 100644
--- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -136,8 +136,8 @@ define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
 ;
 ; CHECK-V8-LABEL: float_sel:
 ; CHECK-V8:       @ %bb.0: @ %entry
-; CHECK-V8-NEXT:    subs r0, r0, r1
 ; CHECK-V8-NEXT:    vmov s0, r3
+; CHECK-V8-NEXT:    subs r0, r0, r1
 ; CHECK-V8-NEXT:    vmov s2, r2
 ; CHECK-V8-NEXT:    vseleq.f32 s0, s2, s0
 ; CHECK-V8-NEXT:    vmov r0, s0

diff  --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll
index 5549d9c6c29c03..39c79f4104e6e6 100644
--- a/llvm/test/CodeGen/ARM/uadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat.ll
@@ -45,19 +45,21 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T1-NEXT:    movs r5, #0
 ; CHECK-T1-NEXT:    adds r4, r0, r2
 ; CHECK-T1-NEXT:    adcs r1, r3
-; CHECK-T1-NEXT:    mov r0, r5
-; CHECK-T1-NEXT:    adcs r0, r5
+; CHECK-T1-NEXT:    mov r3, r5
+; CHECK-T1-NEXT:    adcs r3, r5
 ; CHECK-T1-NEXT:    mvns r2, r5
-; CHECK-T1-NEXT:    cmp r0, #0
+; CHECK-T1-NEXT:    cmp r3, #0
 ; CHECK-T1-NEXT:    mov r0, r2
 ; CHECK-T1-NEXT:    beq .LBB1_3
 ; CHECK-T1-NEXT:  @ %bb.1:
+; CHECK-T1-NEXT:    cmp r3, #0
 ; CHECK-T1-NEXT:    beq .LBB1_4
 ; CHECK-T1-NEXT:  .LBB1_2:
 ; CHECK-T1-NEXT:    mov r1, r2
 ; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-T1-NEXT:  .LBB1_3:
 ; CHECK-T1-NEXT:    mov r0, r4
+; CHECK-T1-NEXT:    cmp r3, #0
 ; CHECK-T1-NEXT:    bne .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
 ; CHECK-T1-NEXT:    mov r2, r1

diff  --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
index ffacba8cf01240..451b32f7304240 100644
--- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
@@ -44,29 +44,31 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T1-LABEL: func64:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    .save {r4, lr}
-; CHECK-T1-NEXT:    push {r4, lr}
-; CHECK-T1-NEXT:    movs r4, #0
-; CHECK-T1-NEXT:    ldr r2, [sp, #12]
-; CHECK-T1-NEXT:    ldr r3, [sp, #8]
+; CHECK-T1-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-T1-NEXT:    push {r4, r5, r7, lr}
+; CHECK-T1-NEXT:    movs r5, #0
+; CHECK-T1-NEXT:    ldr r2, [sp, #20]
+; CHECK-T1-NEXT:    ldr r3, [sp, #16]
 ; CHECK-T1-NEXT:    adds r3, r0, r3
 ; CHECK-T1-NEXT:    adcs r2, r1
-; CHECK-T1-NEXT:    mov r0, r4
-; CHECK-T1-NEXT:    adcs r0, r4
-; CHECK-T1-NEXT:    mvns r1, r4
-; CHECK-T1-NEXT:    cmp r0, #0
+; CHECK-T1-NEXT:    mov r4, r5
+; CHECK-T1-NEXT:    adcs r4, r5
+; CHECK-T1-NEXT:    mvns r1, r5
+; CHECK-T1-NEXT:    cmp r4, #0
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    beq .LBB1_3
 ; CHECK-T1-NEXT:  @ %bb.1:
+; CHECK-T1-NEXT:    cmp r4, #0
 ; CHECK-T1-NEXT:    beq .LBB1_4
 ; CHECK-T1-NEXT:  .LBB1_2:
-; CHECK-T1-NEXT:    pop {r4, pc}
+; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-T1-NEXT:  .LBB1_3:
 ; CHECK-T1-NEXT:    mov r0, r3
+; CHECK-T1-NEXT:    cmp r4, #0
 ; CHECK-T1-NEXT:    bne .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
 ; CHECK-T1-NEXT:    mov r1, r2
-; CHECK-T1-NEXT:    pop {r4, pc}
+; CHECK-T1-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; CHECK-T2-LABEL: func64:
 ; CHECK-T2:       @ %bb.0:

diff  --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index 4eb82c80e2bff3..464808ec8861b3 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -7,209 +7,207 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; ARMV6:       @ %bb.0: @ %start
 ; ARMV6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; ARMV6-NEXT:    sub sp, sp, #28
-; ARMV6-NEXT:    ldr r4, [sp, #72]
-; ARMV6-NEXT:    mov r7, r0
-; ARMV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; ARMV6-NEXT:    ldr r12, [sp, #64]
-; ARMV6-NEXT:    umull r1, r0, r2, r4
-; ARMV6-NEXT:    ldr r5, [sp, #68]
-; ARMV6-NEXT:    str r1, [r7]
-; ARMV6-NEXT:    ldr r1, [sp, #76]
-; ARMV6-NEXT:    umull r7, r6, r1, r12
-; ARMV6-NEXT:    str r6, [sp, #8] @ 4-byte Spill
-; ARMV6-NEXT:    umull r6, r9, r5, r4
-; ARMV6-NEXT:    add r7, r6, r7
-; ARMV6-NEXT:    umull r4, r6, r12, r4
-; ARMV6-NEXT:    str r4, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT:    mov r4, #0
-; ARMV6-NEXT:    adds r8, r6, r7
-; ARMV6-NEXT:    ldr r6, [sp, #80]
-; ARMV6-NEXT:    adc r7, r4, #0
+; ARMV6-NEXT:    ldr lr, [sp, #72]
+; ARMV6-NEXT:    mov r6, r0
+; ARMV6-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; ARMV6-NEXT:    ldr r4, [sp, #84]
-; ARMV6-NEXT:    str r7, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT:    umull r12, lr, r3, r6
-; ARMV6-NEXT:    umull r11, r7, r4, r2
+; ARMV6-NEXT:    umull r1, r0, r2, lr
+; ARMV6-NEXT:    umull r5, r10, r4, r2
+; ARMV6-NEXT:    str r1, [r6]
+; ARMV6-NEXT:    ldr r6, [sp, #80]
+; ARMV6-NEXT:    umull r1, r7, r3, r6
+; ARMV6-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; ARMV6-NEXT:    add r1, r5, r1
+; ARMV6-NEXT:    umull r7, r5, r6, r2
+; ARMV6-NEXT:    mov r6, lr
+; ARMV6-NEXT:    str r7, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT:    mov r7, #0
+; ARMV6-NEXT:    adds r1, r5, r1
+; ARMV6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT:    adc r1, r7, #0
+; ARMV6-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT:    ldr r1, [sp, #64]
+; ARMV6-NEXT:    ldr r7, [sp, #76]
+; ARMV6-NEXT:    ldr r5, [sp, #64]
+; ARMV6-NEXT:    umull r12, r9, r7, r1
+; ARMV6-NEXT:    ldr r1, [sp, #68]
+; ARMV6-NEXT:    umull r11, r8, r1, lr
 ; ARMV6-NEXT:    add r12, r11, r12
-; ARMV6-NEXT:    umull r11, r10, r6, r2
-; ARMV6-NEXT:    adds r12, r10, r12
-; ARMV6-NEXT:    mov r10, #0
-; ARMV6-NEXT:    adc r6, r10, #0
+; ARMV6-NEXT:    umull r11, lr, r5, lr
+; ARMV6-NEXT:    mov r5, r6
+; ARMV6-NEXT:    mov r6, #0
+; ARMV6-NEXT:    adds r12, lr, r12
+; ARMV6-NEXT:    umull r2, lr, r2, r7
+; ARMV6-NEXT:    adc r6, r6, #0
 ; ARMV6-NEXT:    str r6, [sp, #20] @ 4-byte Spill
 ; ARMV6-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT:    adds r6, r6, r11
-; ARMV6-NEXT:    str r6, [sp, #12] @ 4-byte Spill
-; ARMV6-NEXT:    adc r6, r8, r12
-; ARMV6-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT:    ldr r6, [sp, #72]
+; ARMV6-NEXT:    adds r11, r11, r6
+; ARMV6-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT:    adc r6, r12, r6
 ; ARMV6-NEXT:    mov r12, #0
-; ARMV6-NEXT:    umull r2, r8, r2, r1
-; ARMV6-NEXT:    umlal r0, r12, r3, r6
+; ARMV6-NEXT:    umlal r0, r12, r3, r5
+; ARMV6-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; ARMV6-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT:    ldr r6, [sp, #64]
 ; ARMV6-NEXT:    adds r0, r2, r0
-; ARMV6-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; ARMV6-NEXT:    adcs r8, r12, r8
-; ARMV6-NEXT:    adc r12, r10, #0
-; ARMV6-NEXT:    cmp lr, #0
-; ARMV6-NEXT:    str r0, [r2, #4]
+; ARMV6-NEXT:    str r0, [r5, #4]
+; ARMV6-NEXT:    adcs r0, r12, lr
+; ARMV6-NEXT:    mov r2, #0
+; ARMV6-NEXT:    adc r2, r2, #0
+; ARMV6-NEXT:    orrs lr, r6, r1
+; ARMV6-NEXT:    ldr r6, [sp, #80]
 ; ARMV6-NEXT:    movne lr, #1
-; ARMV6-NEXT:    ldr r11, [sp, #8] @ 4-byte Reload
-; ARMV6-NEXT:    cmp r7, #0
-; ARMV6-NEXT:    movne r7, #1
-; ARMV6-NEXT:    ldr r0, [sp, #64]
-; ARMV6-NEXT:    cmp r11, #0
-; ARMV6-NEXT:    umlal r8, r12, r3, r1
-; ARMV6-NEXT:    movne r11, #1
+; ARMV6-NEXT:    umlal r0, r2, r3, r7
+; ARMV6-NEXT:    orrs r12, r6, r4
+; ARMV6-NEXT:    movne r12, #1
 ; ARMV6-NEXT:    cmp r9, #0
+; ARMV6-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; ARMV6-NEXT:    movne r9, #1
-; ARMV6-NEXT:    orrs r10, r0, r5
-; ARMV6-NEXT:    ldr r0, [sp, #80]
+; ARMV6-NEXT:    cmp r8, #0
+; ARMV6-NEXT:    movne r8, #1
+; ARMV6-NEXT:    cmp r6, #0
+; ARMV6-NEXT:    movne r6, #1
+; ARMV6-NEXT:    cmp r10, #0
 ; ARMV6-NEXT:    movne r10, #1
-; ARMV6-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
-; ARMV6-NEXT:    orrs r0, r0, r4
-; ARMV6-NEXT:    movne r0, #1
+; ARMV6-NEXT:    cmp r1, #0
+; ARMV6-NEXT:    movne r1, #1
+; ARMV6-NEXT:    cmp r7, #0
+; ARMV6-NEXT:    movne r7, #1
 ; ARMV6-NEXT:    cmp r4, #0
 ; ARMV6-NEXT:    movne r4, #1
 ; ARMV6-NEXT:    cmp r3, #0
 ; ARMV6-NEXT:    movne r3, #1
-; ARMV6-NEXT:    cmp r5, #0
-; ARMV6-NEXT:    movne r5, #1
-; ARMV6-NEXT:    cmp r1, #0
-; ARMV6-NEXT:    movne r1, #1
-; ARMV6-NEXT:    adds r6, r8, r6
-; ARMV6-NEXT:    str r6, [r2, #8]
-; ARMV6-NEXT:    and r1, r5, r1
-; ARMV6-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT:    adds r0, r0, r11
+; ARMV6-NEXT:    str r0, [r5, #8]
+; ARMV6-NEXT:    and r1, r1, r7
+; ARMV6-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT:    orr r1, r1, r8
 ; ARMV6-NEXT:    orr r1, r1, r9
-; ARMV6-NEXT:    orr r1, r1, r11
-; ARMV6-NEXT:    and r0, r10, r0
-; ARMV6-NEXT:    adcs r6, r12, r6
-; ARMV6-NEXT:    str r6, [r2, #12]
-; ARMV6-NEXT:    ldr r6, [sp, #24] @ 4-byte Reload
-; ARMV6-NEXT:    orr r1, r1, r6
-; ARMV6-NEXT:    orr r0, r0, r1
-; ARMV6-NEXT:    and r1, r4, r3
-; ARMV6-NEXT:    orr r1, r1, r7
-; ARMV6-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT:    orr r1, r1, lr
-; ARMV6-NEXT:    orr r1, r1, r3
-; ARMV6-NEXT:    orr r0, r0, r1
+; ARMV6-NEXT:    adcs r0, r2, r0
+; ARMV6-NEXT:    str r0, [r5, #12]
+; ARMV6-NEXT:    and r0, r4, r3
+; ARMV6-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT:    orr r0, r0, r10
+; ARMV6-NEXT:    orr r0, r0, r6
+; ARMV6-NEXT:    orr r0, r0, r2
+; ARMV6-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; ARMV6-NEXT:    orr r1, r1, r2
+; ARMV6-NEXT:    and r2, lr, r12
+; ARMV6-NEXT:    orr r1, r2, r1
+; ARMV6-NEXT:    orr r0, r1, r0
 ; ARMV6-NEXT:    mov r1, #0
 ; ARMV6-NEXT:    adc r1, r1, #0
 ; ARMV6-NEXT:    orr r0, r0, r1
 ; ARMV6-NEXT:    and r0, r0, #1
-; ARMV6-NEXT:    strb r0, [r2, #16]
+; ARMV6-NEXT:    strb r0, [r5, #16]
 ; ARMV6-NEXT:    add sp, sp, #28
 ; ARMV6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; ARMV7-LABEL: muloti_test:
 ; ARMV7:       @ %bb.0: @ %start
 ; ARMV7-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT:    sub sp, sp, #44
-; ARMV7-NEXT:    ldr r8, [sp, #88]
-; ARMV7-NEXT:    mov r9, r0
-; ARMV7-NEXT:    ldr r7, [sp, #96]
-; ARMV7-NEXT:    ldr lr, [sp, #100]
-; ARMV7-NEXT:    umull r0, r5, r2, r8
-; ARMV7-NEXT:    ldr r4, [sp, #80]
+; ARMV7-NEXT:    sub sp, sp, #36
+; ARMV7-NEXT:    ldr r5, [sp, #84]
+; ARMV7-NEXT:    mov r8, r0
+; ARMV7-NEXT:    ldr r1, [sp, #72]
+; ARMV7-NEXT:    ldr r10, [sp, #80]
+; ARMV7-NEXT:    ldr r9, [sp, #76]
+; ARMV7-NEXT:    umull r4, lr, r5, r1
+; ARMV7-NEXT:    umull r0, r7, r2, r10
+; ARMV7-NEXT:    str r4, [sp, #24] @ 4-byte Spill
+; ARMV7-NEXT:    ldr r4, [sp, #88]
+; ARMV7-NEXT:    umull r1, r6, r1, r10
 ; ARMV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT:    umull r1, r0, r3, r7
-; ARMV7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; ARMV7-NEXT:    umull r0, r11, lr, r2
-; ARMV7-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT:    umull r11, r0, r2, r5
+; ARMV7-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT:    umull r6, r12, r3, r4
 ; ARMV7-NEXT:    ldr r1, [sp, #92]
-; ARMV7-NEXT:    str r0, [sp] @ 4-byte Spill
-; ARMV7-NEXT:    umull r0, r10, r7, r2
-; ARMV7-NEXT:    mov r7, r1
-; ARMV7-NEXT:    umull r6, r12, r1, r4
-; ARMV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r0, [sp, #84]
-; ARMV7-NEXT:    str r6, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT:    umull r6, r1, r0, r8
+; ARMV7-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT:    mov r0, #0
+; ARMV7-NEXT:    umlal r7, r0, r3, r10
 ; ARMV7-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT:    umull r6, r2, r2, r7
-; ARMV7-NEXT:    mov r7, r4
-; ARMV7-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT:    umull r6, r1, r1, r2
+; ARMV7-NEXT:    umull r2, r4, r4, r2
+; ARMV7-NEXT:    str r6, [sp, #4] @ 4-byte Spill
 ; ARMV7-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; ARMV7-NEXT:    umull r2, r6, r4, r8
-; ARMV7-NEXT:    str r2, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT:    str r6, [sp, #28] @ 4-byte Spill
-; ARMV7-NEXT:    mov r6, #0
-; ARMV7-NEXT:    str r2, [r9]
-; ARMV7-NEXT:    umlal r5, r6, r3, r8
+; ARMV7-NEXT:    adds r2, r11, r7
+; ARMV7-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT:    mov r11, #0
+; ARMV7-NEXT:    str r4, [sp] @ 4-byte Spill
+; ARMV7-NEXT:    umull r6, r4, r9, r10
+; ARMV7-NEXT:    adcs r9, r0, r7
+; ARMV7-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT:    adc r10, r11, #0
+; ARMV7-NEXT:    stm r8, {r0, r2}
+; ARMV7-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; ARMV7-NEXT:    umlal r9, r10, r3, r5
 ; ARMV7-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT:    ldr r4, [sp] @ 4-byte Reload
-; ARMV7-NEXT:    add r4, r4, r2
-; ARMV7-NEXT:    adds r2, r10, r4
-; ARMV7-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT:    mov r2, #0
-; ARMV7-NEXT:    adc r2, r2, #0
-; ARMV7-NEXT:    cmp r12, #0
+; ARMV7-NEXT:    add r0, r6, r0
+; ARMV7-NEXT:    adds r0, r2, r0
+; ARMV7-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT:    adc r2, r11, #0
 ; ARMV7-NEXT:    str r2, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT:    movwne r12, #1
-; ARMV7-NEXT:    cmp r1, #0
-; ARMV7-NEXT:    ldr r2, [sp, #96]
-; ARMV7-NEXT:    movwne r1, #1
-; ARMV7-NEXT:    orrs r10, r7, r0
-; ARMV7-NEXT:    movwne r10, #1
-; ARMV7-NEXT:    orrs r7, r2, lr
-; ARMV7-NEXT:    ldr r2, [sp, #92]
-; ARMV7-NEXT:    movwne r7, #1
+; ARMV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; ARMV7-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; ARMV7-NEXT:    add r2, r6, r2
+; ARMV7-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; ARMV7-NEXT:    adds r2, r6, r2
+; ARMV7-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
+; ARMV7-NEXT:    adc r11, r11, #0
+; ARMV7-NEXT:    adds r7, r7, r6
+; ARMV7-NEXT:    ldr r6, [sp, #92]
+; ARMV7-NEXT:    adc r0, r0, r2
+; ARMV7-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT:    ldr r0, [sp, #92]
+; ARMV7-NEXT:    cmp r3, #0
+; ARMV7-NEXT:    movwne r3, #1
+; ARMV7-NEXT:    ldr r2, [sp, #76]
 ; ARMV7-NEXT:    cmp r0, #0
 ; ARMV7-NEXT:    movwne r0, #1
-; ARMV7-NEXT:    cmp r2, #0
-; ARMV7-NEXT:    mov r4, r2
-; ARMV7-NEXT:    mov r8, r2
-; ARMV7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; ARMV7-NEXT:    movwne r4, #1
-; ARMV7-NEXT:    and r0, r0, r4
-; ARMV7-NEXT:    mov r4, #0
-; ARMV7-NEXT:    adds r5, r2, r5
-; ARMV7-NEXT:    str r5, [r9, #4]
+; ARMV7-NEXT:    cmp r1, #0
+; ARMV7-NEXT:    movwne r1, #1
+; ARMV7-NEXT:    cmp r12, #0
+; ARMV7-NEXT:    and r0, r0, r3
+; ARMV7-NEXT:    movwne r12, #1
+; ARMV7-NEXT:    cmp r5, #0
 ; ARMV7-NEXT:    orr r0, r0, r1
-; ARMV7-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT:    and r5, r10, r7
-; ARMV7-NEXT:    orr r0, r0, r12
-; ARMV7-NEXT:    mov r12, #0
-; ARMV7-NEXT:    add r1, r2, r1
-; ARMV7-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; ARMV7-NEXT:    adcs r2, r6, r2
-; ARMV7-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT:    adc r7, r4, #0
-; ARMV7-NEXT:    adds r1, r6, r1
-; ARMV7-NEXT:    umlal r2, r7, r3, r8
-; ARMV7-NEXT:    adc r4, r4, #0
-; ARMV7-NEXT:    orr r0, r0, r4
-; ARMV7-NEXT:    orr r0, r5, r0
-; ARMV7-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
-; ARMV7-NEXT:    ldr r5, [sp, #36] @ 4-byte Reload
-; ARMV7-NEXT:    adds r5, r5, r4
-; ARMV7-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT:    adc r1, r1, r4
-; ARMV7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT:    movwne r5, #1
+; ARMV7-NEXT:    cmp r2, #0
+; ARMV7-NEXT:    mov r1, r2
+; ARMV7-NEXT:    mov r3, r2
+; ARMV7-NEXT:    movwne r1, #1
 ; ARMV7-NEXT:    cmp r4, #0
+; ARMV7-NEXT:    ldr r2, [sp, #72]
 ; ARMV7-NEXT:    movwne r4, #1
-; ARMV7-NEXT:    cmp r3, #0
-; ARMV7-NEXT:    movwne r3, #1
 ; ARMV7-NEXT:    cmp lr, #0
+; ARMV7-NEXT:    and r1, r1, r5
 ; ARMV7-NEXT:    movwne lr, #1
-; ARMV7-NEXT:    cmp r11, #0
-; ARMV7-NEXT:    movwne r11, #1
-; ARMV7-NEXT:    adds r2, r2, r5
-; ARMV7-NEXT:    and r3, lr, r3
-; ARMV7-NEXT:    str r2, [r9, #8]
-; ARMV7-NEXT:    adcs r1, r7, r1
-; ARMV7-NEXT:    str r1, [r9, #12]
-; ARMV7-NEXT:    orr r1, r3, r11
-; ARMV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT:    orrs r2, r2, r3
+; ARMV7-NEXT:    ldr r3, [sp, #88]
+; ARMV7-NEXT:    movwne r2, #1
 ; ARMV7-NEXT:    orr r1, r1, r4
-; ARMV7-NEXT:    orr r1, r1, r2
-; ARMV7-NEXT:    orr r0, r0, r1
-; ARMV7-NEXT:    adc r1, r12, #0
+; ARMV7-NEXT:    orr r0, r0, r12
+; ARMV7-NEXT:    orrs r3, r3, r6
+; ARMV7-NEXT:    orr r1, r1, lr
+; ARMV7-NEXT:    movwne r3, #1
+; ARMV7-NEXT:    adds r7, r9, r7
+; ARMV7-NEXT:    str r7, [r8, #8]
+; ARMV7-NEXT:    and r2, r2, r3
+; ARMV7-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; ARMV7-NEXT:    orr r0, r0, r11
+; ARMV7-NEXT:    adcs r7, r10, r7
+; ARMV7-NEXT:    str r7, [r8, #12]
+; ARMV7-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT:    orr r1, r1, r7
+; ARMV7-NEXT:    orr r1, r2, r1
+; ARMV7-NEXT:    orr r0, r1, r0
+; ARMV7-NEXT:    mov r1, #0
+; ARMV7-NEXT:    adc r1, r1, #0
 ; ARMV7-NEXT:    orr r0, r0, r1
 ; ARMV7-NEXT:    and r0, r0, #1
-; ARMV7-NEXT:    strb r0, [r9, #16]
-; ARMV7-NEXT:    add sp, sp, #44
+; ARMV7-NEXT:    strb r0, [r8, #16]
+; ARMV7-NEXT:    add sp, sp, #36
 ; ARMV7-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

diff  --git a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
index 64d98314429707..ddf033b19b949d 100644
--- a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
@@ -5,49 +5,50 @@
 define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
 ; ARMV6-LABEL: mulodi_test:
 ; ARMV6:       @ %bb.0: @ %start
-; ARMV6-NEXT:    push {r4, r5, r11, lr}
-; ARMV6-NEXT:    umull r12, lr, r1, r2
-; ARMV6-NEXT:    umull r4, r5, r3, r0
-; ARMV6-NEXT:    cmp lr, #0
-; ARMV6-NEXT:    movne lr, #1
+; ARMV6-NEXT:    push {r4, r5, r6, lr}
+; ARMV6-NEXT:    umull r12, lr, r3, r0
+; ARMV6-NEXT:    mov r6, #0
+; ARMV6-NEXT:    umull r4, r5, r1, r2
+; ARMV6-NEXT:    umull r0, r2, r0, r2
+; ARMV6-NEXT:    add r4, r4, r12
+; ARMV6-NEXT:    adds r12, r2, r4
+; ARMV6-NEXT:    adc r2, r6, #0
 ; ARMV6-NEXT:    cmp r3, #0
 ; ARMV6-NEXT:    movne r3, #1
 ; ARMV6-NEXT:    cmp r1, #0
-; ARMV6-NEXT:    umull r0, r2, r0, r2
 ; ARMV6-NEXT:    movne r1, #1
-; ARMV6-NEXT:    and r1, r1, r3
 ; ARMV6-NEXT:    cmp r5, #0
-; ARMV6-NEXT:    orr r1, r1, lr
+; ARMV6-NEXT:    and r1, r1, r3
 ; ARMV6-NEXT:    movne r5, #1
-; ARMV6-NEXT:    orr r3, r1, r5
-; ARMV6-NEXT:    add r1, r12, r4
-; ARMV6-NEXT:    adds r1, r2, r1
-; ARMV6-NEXT:    mov r5, #0
-; ARMV6-NEXT:    adc r2, r5, #0
-; ARMV6-NEXT:    orr r2, r3, r2
-; ARMV6-NEXT:    pop {r4, r5, r11, pc}
+; ARMV6-NEXT:    cmp lr, #0
+; ARMV6-NEXT:    orr r1, r1, r5
+; ARMV6-NEXT:    movne lr, #1
+; ARMV6-NEXT:    orr r1, r1, lr
+; ARMV6-NEXT:    orr r2, r1, r2
+; ARMV6-NEXT:    mov r1, r12
+; ARMV6-NEXT:    pop {r4, r5, r6, pc}
 ;
 ; ARMV7-LABEL: mulodi_test:
 ; ARMV7:       @ %bb.0: @ %start
 ; ARMV7-NEXT:    push {r4, r5, r11, lr}
-; ARMV7-NEXT:    umull r12, lr, r3, r0
+; ARMV7-NEXT:    umull r12, lr, r1, r2
 ; ARMV7-NEXT:    cmp r3, #0
+; ARMV7-NEXT:    umull r4, r5, r3, r0
 ; ARMV7-NEXT:    movwne r3, #1
 ; ARMV7-NEXT:    cmp r1, #0
-; ARMV7-NEXT:    umull r0, r4, r0, r2
-; ARMV7-NEXT:    umull r2, r5, r1, r2
 ; ARMV7-NEXT:    movwne r1, #1
+; ARMV7-NEXT:    umull r0, r2, r0, r2
+; ARMV7-NEXT:    cmp lr, #0
 ; ARMV7-NEXT:    and r1, r1, r3
+; ARMV7-NEXT:    movwne lr, #1
 ; ARMV7-NEXT:    cmp r5, #0
+; ARMV7-NEXT:    orr r1, r1, lr
 ; ARMV7-NEXT:    movwne r5, #1
-; ARMV7-NEXT:    cmp lr, #0
-; ARMV7-NEXT:    orr r1, r1, r5
-; ARMV7-NEXT:    movwne lr, #1
-; ARMV7-NEXT:    orr r3, r1, lr
-; ARMV7-NEXT:    add r1, r2, r12
-; ARMV7-NEXT:    mov r2, #0
-; ARMV7-NEXT:    adds r1, r4, r1
-; ARMV7-NEXT:    adc r2, r2, #0
+; ARMV7-NEXT:    orr r3, r1, r5
+; ARMV7-NEXT:    add r1, r12, r4
+; ARMV7-NEXT:    mov r5, #0
+; ARMV7-NEXT:    adds r1, r2, r1
+; ARMV7-NEXT:    adc r2, r5, #0
 ; ARMV7-NEXT:    orr r2, r3, r2
 ; ARMV7-NEXT:    pop {r4, r5, r11, pc}
 start:

diff  --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll
index 73e6dafc085903..9c2fd3966ea984 100644
--- a/llvm/test/CodeGen/ARM/usub_sat.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat.ll
@@ -49,15 +49,16 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T1-NEXT:    adcs r0, r1
 ; CHECK-T1-NEXT:    movs r3, #1
 ; CHECK-T1-NEXT:    eors r3, r0
-; CHECK-T1-NEXT:    cmp r3, #0
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    beq .LBB1_3
 ; CHECK-T1-NEXT:  @ %bb.1:
+; CHECK-T1-NEXT:    cmp r3, #0
 ; CHECK-T1-NEXT:    beq .LBB1_4
 ; CHECK-T1-NEXT:  .LBB1_2:
 ; CHECK-T1-NEXT:    pop {r4, pc}
 ; CHECK-T1-NEXT:  .LBB1_3:
 ; CHECK-T1-NEXT:    mov r0, r2
+; CHECK-T1-NEXT:    cmp r3, #0
 ; CHECK-T1-NEXT:    bne .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
 ; CHECK-T1-NEXT:    mov r1, r4
@@ -69,7 +70,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T2-NEXT:    mov.w r12, #0
 ; CHECK-T2-NEXT:    sbcs r1, r3
 ; CHECK-T2-NEXT:    adc r2, r12, #0
-; CHECK-T2-NEXT:    teq.w r2, #1
+; CHECK-T2-NEXT:    eors r2, r2, #1
 ; CHECK-T2-NEXT:    itt ne
 ; CHECK-T2-NEXT:    movne r0, #0
 ; CHECK-T2-NEXT:    movne r1, #0
@@ -81,7 +82,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-ARM-NEXT:    mov r12, #0
 ; CHECK-ARM-NEXT:    sbcs r1, r1, r3
 ; CHECK-ARM-NEXT:    adc r2, r12, #0
-; CHECK-ARM-NEXT:    teq r2, #1
+; CHECK-ARM-NEXT:    eors r2, r2, #1
 ; CHECK-ARM-NEXT:    movwne r0, #0
 ; CHECK-ARM-NEXT:    movwne r1, #0
 ; CHECK-ARM-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
index a465a413c6d0ed..51ec83c707603b 100644
--- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
@@ -55,15 +55,16 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T1-NEXT:    adcs r0, r1
 ; CHECK-T1-NEXT:    movs r4, #1
 ; CHECK-T1-NEXT:    eors r4, r0
-; CHECK-T1-NEXT:    cmp r4, #0
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    beq .LBB1_3
 ; CHECK-T1-NEXT:  @ %bb.1:
+; CHECK-T1-NEXT:    cmp r4, #0
 ; CHECK-T1-NEXT:    beq .LBB1_4
 ; CHECK-T1-NEXT:  .LBB1_2:
 ; CHECK-T1-NEXT:    pop {r4, pc}
 ; CHECK-T1-NEXT:  .LBB1_3:
 ; CHECK-T1-NEXT:    mov r0, r3
+; CHECK-T1-NEXT:    cmp r4, #0
 ; CHECK-T1-NEXT:    bne .LBB1_2
 ; CHECK-T1-NEXT:  .LBB1_4:
 ; CHECK-T1-NEXT:    mov r1, r2
@@ -76,7 +77,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T2-NEXT:    subs r0, r0, r2
 ; CHECK-T2-NEXT:    sbcs r1, r3
 ; CHECK-T2-NEXT:    adc r2, r12, #0
-; CHECK-T2-NEXT:    teq.w r2, #1
+; CHECK-T2-NEXT:    eors r2, r2, #1
 ; CHECK-T2-NEXT:    itt ne
 ; CHECK-T2-NEXT:    movne r0, #0
 ; CHECK-T2-NEXT:    movne r1, #0
@@ -90,7 +91,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-ARM-NEXT:    subs r0, r0, r2
 ; CHECK-ARM-NEXT:    sbcs r1, r1, r3
 ; CHECK-ARM-NEXT:    adc r2, r12, #0
-; CHECK-ARM-NEXT:    teq r2, #1
+; CHECK-ARM-NEXT:    eors r2, r2, #1
 ; CHECK-ARM-NEXT:    movwne r0, #0
 ; CHECK-ARM-NEXT:    movwne r1, #0
 ; CHECK-ARM-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index bd5e3061f0d180..9f0edb7117bd15 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -111,8 +111,8 @@ define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
 ; CHECK-LABEL: func_blend18:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
 ; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]!
 ; CHECK-NEXT:    vmov r4, r6, d16
@@ -122,6 +122,7 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    vmov r2, r1, d20
 ; CHECK-NEXT:    subs r2, r2, lr
+; CHECK-NEXT:    vmov r7, lr, d17
 ; CHECK-NEXT:    vmov r2, r5, d22
 ; CHECK-NEXT:    sbcs r1, r1, r12
 ; CHECK-NEXT:    mov r1, #0
@@ -130,34 +131,33 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 ; CHECK-NEXT:    mvnne r1, #0
 ; CHECK-NEXT:    subs r2, r2, r4
 ; CHECK-NEXT:    sbcs r6, r5, r6
-; CHECK-NEXT:    vmov r2, r12, d17
-; CHECK-NEXT:    vmov r5, r4, d23
+; CHECK-NEXT:    vmov r2, r12, d19
+; CHECK-NEXT:    vmov r5, r4, d21
 ; CHECK-NEXT:    mov r6, #0
 ; CHECK-NEXT:    movlt r6, #1
 ; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    mvnne r6, #0
 ; CHECK-NEXT:    subs r2, r5, r2
-; CHECK-NEXT:    sbcs r2, r4, r12
-; CHECK-NEXT:    vmov lr, r12, d19
-; CHECK-NEXT:    vmov r4, r5, d21
+; CHECK-NEXT:    sbcs r4, r4, r12
 ; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    vmov r4, r5, d23
 ; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    subs r7, r4, r7
+; CHECK-NEXT:    sbcs r7, r5, lr
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
 ; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    vdup.32 d25, r0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d25, r2
 ; CHECK-NEXT:    vdup.32 d24, r6
+; CHECK-NEXT:    vdup.32 d27, r2
 ; CHECK-NEXT:    vbit q8, q11, q12
-; CHECK-NEXT:    subs r4, r4, lr
-; CHECK-NEXT:    sbcs r5, r5, r12
-; CHECK-NEXT:    movlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d27, r0
 ; CHECK-NEXT:    vdup.32 d26, r1
 ; CHECK-NEXT:    vbit q9, q10, q13
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
-; CHECK-NEXT:    pop {r4, r5, r6, lr}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
 ; COST: func_blend18
 ; COST: cost of 0 {{.*}} icmp
@@ -198,21 +198,12 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 ; CHECK-NEXT:    mvnne r12, #0
 ; CHECK-NEXT:    subs r1, r1, r2
 ; CHECK-NEXT:    sbcs r0, r4, r0
-; CHECK-NEXT:    vmov r2, r4, d24
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    movlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d1, r0
-; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    subs r0, r2, r0
-; CHECK-NEXT:    sbcs r0, r4, r1
 ; CHECK-NEXT:    vmov r2, r4, d26
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d0, r0
+; CHECK-NEXT:    vdup.32 d1, r0
 ; CHECK-NEXT:    vmov r0, r1, d22
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    mov r2, #0
@@ -233,15 +224,14 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 ; CHECK-NEXT:    vmov r0, r1, d28
 ; CHECK-NEXT:    subs r0, r4, r0
 ; CHECK-NEXT:    sbcs r0, r5, r1
-; CHECK-NEXT:    vmov r4, r5, d27
+; CHECK-NEXT:    vmov r4, r5, d24
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
 ; CHECK-NEXT:    vdup.32 d2, r0
-; CHECK-NEXT:    vmov r0, r1, d23
+; CHECK-NEXT:    vmov r0, r1, d20
 ; CHECK-NEXT:    vbit q14, q15, q1
-; CHECK-NEXT:    vbit q10, q12, q0
 ; CHECK-NEXT:    subs r0, r4, r0
 ; CHECK-NEXT:    sbcs r0, r5, r1
 ; CHECK-NEXT:    vmov r1, r4, d17
@@ -250,17 +240,27 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 ; CHECK-NEXT:    movlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d31, r0
-; CHECK-NEXT:    vdup.32 d30, r2
-; CHECK-NEXT:    vbit q11, q13, q15
-; CHECK-NEXT:    vst1.64 {d28, d29}, [r3:128]!
+; CHECK-NEXT:    vdup.32 d0, r0
+; CHECK-NEXT:    vbit q10, q12, q0
 ; CHECK-NEXT:    subs r1, r5, r1
 ; CHECK-NEXT:    sbcs r1, r6, r4
+; CHECK-NEXT:    vmov r4, r5, d27
+; CHECK-NEXT:    vmov r0, r1, d23
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    movlt r6, #1
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r1
 ; CHECK-NEXT:    movlt lr, #1
 ; CHECK-NEXT:    cmp lr, #0
 ; CHECK-NEXT:    mvnne lr, #0
-; CHECK-NEXT:    vdup.32 d3, lr
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vdup.32 d31, lr
+; CHECK-NEXT:    mvnne r6, #0
+; CHECK-NEXT:    vdup.32 d30, r2
+; CHECK-NEXT:    vdup.32 d3, r6
+; CHECK-NEXT:    vbit q11, q13, q15
 ; CHECK-NEXT:    vdup.32 d2, r12
+; CHECK-NEXT:    vst1.64 {d28, d29}, [r3:128]!
 ; CHECK-NEXT:    vbit q8, q9, q1
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r3:128]!
 ; CHECK-NEXT:    vst1.64 {d22, d23}, [r3:128]!
@@ -283,194 +283,198 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
 define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
 ; CHECK-LABEL: func_blend20:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    add r8, r1, #64
-; CHECK-NEXT:    add lr, r0, #64
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    mov lr, r0
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r8:128]!
+; CHECK-NEXT:    add r9, r0, #64
+; CHECK-NEXT:    add r10, r1, #64
 ; CHECK-NEXT:    mov r12, #0
-; CHECK-NEXT:    vld1.64 {d24, d25}, [r0:128]!
-; CHECK-NEXT:    vmov r4, r5, d17
-; CHECK-NEXT:    vmov r6, r7, d25
-; CHECK-NEXT:    vld1.64 {d18, d19}, [lr:128]!
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]!
-; CHECK-NEXT:    vld1.64 {d22, d23}, [r8:128]!
-; CHECK-NEXT:    vld1.64 {d0, d1}, [lr:128]!
-; CHECK-NEXT:    subs r4, r6, r4
-; CHECK-NEXT:    sbcs r4, r7, r5
-; CHECK-NEXT:    vmov r5, r6, d16
-; CHECK-NEXT:    vmov r7, r2, d24
+; CHECK-NEXT:    vld1.64 {d22, d23}, [lr:128]!
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r8:128]!
+; CHECK-NEXT:    vld1.64 {d20, d21}, [lr:128]!
+; CHECK-NEXT:    vmov r6, r4, d19
+; CHECK-NEXT:    vmov r5, r7, d21
+; CHECK-NEXT:    vld1.64 {d4, d5}, [r9:128]!
+; CHECK-NEXT:    vld1.64 {d6, d7}, [r10:128]!
+; CHECK-NEXT:    vld1.64 {d0, d1}, [r10:128]!
+; CHECK-NEXT:    vld1.64 {d2, d3}, [r9:128]!
+; CHECK-NEXT:    subs r6, r5, r6
+; CHECK-NEXT:    sbcs r4, r7, r4
+; CHECK-NEXT:    vmov r5, r6, d18
+; CHECK-NEXT:    vmov r7, r2, d20
 ; CHECK-NEXT:    mov r4, #0
 ; CHECK-NEXT:    movlt r4, #1
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    mvnne r4, #0
-; CHECK-NEXT:    vdup.32 d27, r4
+; CHECK-NEXT:    vdup.32 d31, r4
 ; CHECK-NEXT:    subs r5, r7, r5
 ; CHECK-NEXT:    sbcs r2, r2, r6
-; CHECK-NEXT:    vmov r5, r6, d1
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d26, r2
-; CHECK-NEXT:    vmov r2, r4, d23
-; CHECK-NEXT:    vbit q8, q12, q13
-; CHECK-NEXT:    vld1.64 {d24, d25}, [r0:128]!
-; CHECK-NEXT:    vld1.64 {d26, d27}, [r1:128]!
-; CHECK-NEXT:    vld1.64 {d28, d29}, [lr:128]!
-; CHECK-NEXT:    subs r2, r5, r2
-; CHECK-NEXT:    sbcs r2, r6, r4
-; CHECK-NEXT:    vmov r4, r5, d22
-; CHECK-NEXT:    vmov r6, r7, d0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d3, r2
-; CHECK-NEXT:    subs r4, r6, r4
-; CHECK-NEXT:    sbcs r4, r7, r5
-; CHECK-NEXT:    vmov r2, r5, d27
-; CHECK-NEXT:    vmov r6, r7, d25
-; CHECK-NEXT:    mov r4, #0
-; CHECK-NEXT:    movlt r4, #1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mvnne r4, #0
-; CHECK-NEXT:    vdup.32 d2, r4
-; CHECK-NEXT:    subs r2, r6, r2
-; CHECK-NEXT:    sbcs r2, r7, r5
-; CHECK-NEXT:    vmov r6, r7, d24
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d5, r2
-; CHECK-NEXT:    vmov r2, r5, d26
-; CHECK-NEXT:    subs r2, r6, r2
-; CHECK-NEXT:    sbcs r2, r7, r5
-; CHECK-NEXT:    vmov r6, r7, d19
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d4, r2
-; CHECK-NEXT:    vmov r2, r5, d21
-; CHECK-NEXT:    subs r2, r6, r2
-; CHECK-NEXT:    sbcs r2, r7, r5
-; CHECK-NEXT:    vmov r6, r7, d18
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d31, r2
-; CHECK-NEXT:    vmov r2, r5, d20
-; CHECK-NEXT:    subs r2, r6, r2
-; CHECK-NEXT:    sbcs r2, r7, r5
+; CHECK-NEXT:    vmov r4, r5, d3
 ; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    movlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
 ; CHECK-NEXT:    vdup.32 d30, r2
-; CHECK-NEXT:    vbif q9, q10, q15
-; CHECK-NEXT:    vld1.64 {d30, d31}, [r8:128]!
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]
-; CHECK-NEXT:    vbit q13, q12, q2
-; CHECK-NEXT:    vld1.64 {d24, d25}, [lr:128]
-; CHECK-NEXT:    vmov r2, r7, d21
-; CHECK-NEXT:    vbit q11, q0, q1
+; CHECK-NEXT:    vmov r0, r2, d1
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    vmov r4, r5, d2
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d9, r0
+; CHECK-NEXT:    vmov r0, r2, d0
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    vmov r4, r5, d5
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d8, r0
+; CHECK-NEXT:    vmov r0, r2, d7
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    vmov r4, r5, d4
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d11, r0
+; CHECK-NEXT:    vmov r0, r2, d6
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    vmov r4, r5, d23
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d10, r0
+; CHECK-NEXT:    vmov r0, r2, d17
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    vmov r4, r5, d22
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d25, r0
+; CHECK-NEXT:    vmov r0, r2, d16
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d24, r0
+; CHECK-NEXT:    vorr q13, q12, q12
+; CHECK-NEXT:    vbsl q13, q11, q8
+; CHECK-NEXT:    vld1.64 {d24, d25}, [r9:128]!
+; CHECK-NEXT:    vorr q8, q5, q5
+; CHECK-NEXT:    vld1.64 {d28, d29}, [r10:128]!
+; CHECK-NEXT:    vbsl q8, q2, q3
+; CHECK-NEXT:    vld1.64 {d6, d7}, [r8:128]!
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r8:128]
+; CHECK-NEXT:    vld1.64 {d4, d5}, [lr:128]!
+; CHECK-NEXT:    vbif q10, q9, q15
+; CHECK-NEXT:    vorr q9, q4, q4
+; CHECK-NEXT:    vmov r0, r2, d22
+; CHECK-NEXT:    vbsl q9, q1, q0
+; CHECK-NEXT:    vld1.64 {d30, d31}, [lr:128]
 ; CHECK-NEXT:    mov lr, #0
-; CHECK-NEXT:    vmov r6, r5, d25
-; CHECK-NEXT:    vld1.64 {d4, d5}, [r1:128]!
-; CHECK-NEXT:    vld1.64 {d6, d7}, [r0:128]!
-; CHECK-NEXT:    vld1.64 {d0, d1}, [r1:128]
-; CHECK-NEXT:    vld1.64 {d2, d3}, [r0:128]
-; CHECK-NEXT:    subs r1, r6, r2
-; CHECK-NEXT:    vmov r0, r6, d2
-; CHECK-NEXT:    sbcs r1, r5, r7
-; CHECK-NEXT:    vmov r2, r7, d0
+; CHECK-NEXT:    vmov r7, r5, d30
+; CHECK-NEXT:    vld1.64 {d0, d1}, [r9:128]
+; CHECK-NEXT:    vld1.64 {d2, d3}, [r10:128]
+; CHECK-NEXT:    subs r0, r7, r0
+; CHECK-NEXT:    sbcs r0, r5, r2
+; CHECK-NEXT:    vmov r5, r4, d24
+; CHECK-NEXT:    vmov r0, r7, d28
 ; CHECK-NEXT:    movlt lr, #1
 ; CHECK-NEXT:    cmp lr, #0
 ; CHECK-NEXT:    mvnne lr, #0
-; CHECK-NEXT:    subs r0, r0, r2
-; CHECK-NEXT:    sbcs r0, r6, r7
-; CHECK-NEXT:    vmov r2, r7, d30
-; CHECK-NEXT:    vmov r6, r5, d28
+; CHECK-NEXT:    subs r0, r5, r0
+; CHECK-NEXT:    sbcs r0, r4, r7
+; CHECK-NEXT:    vmov r7, r5, d29
+; CHECK-NEXT:    vmov r4, r6, d25
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    subs r2, r6, r2
-; CHECK-NEXT:    sbcs r2, r5, r7
-; CHECK-NEXT:    vmov r7, r6, d31
-; CHECK-NEXT:    vmov r5, r4, d29
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    subs r7, r5, r7
-; CHECK-NEXT:    vmov r5, r1, d7
-; CHECK-NEXT:    sbcs r7, r4, r6
+; CHECK-NEXT:    subs r7, r4, r7
 ; CHECK-NEXT:    mov r4, #0
-; CHECK-NEXT:    vmov r7, r6, d5
+; CHECK-NEXT:    sbcs r7, r6, r5
+; CHECK-NEXT:    vmov r5, r1, d31
+; CHECK-NEXT:    vmov r7, r6, d23
 ; CHECK-NEXT:    movlt r4, #1
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    mvnne r4, #0
-; CHECK-NEXT:    subs r5, r5, r7
+; CHECK-NEXT:    subs r7, r5, r7
+; CHECK-NEXT:    mov r5, #0
 ; CHECK-NEXT:    sbcs r1, r1, r6
-; CHECK-NEXT:    vmov r6, r7, d6
+; CHECK-NEXT:    vmov r6, r2, d5
+; CHECK-NEXT:    vmov r1, r7, d7
+; CHECK-NEXT:    movlt r5, #1
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    subs r1, r6, r1
+; CHECK-NEXT:    sbcs r1, r2, r7
+; CHECK-NEXT:    vmov r6, r7, d4
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    movlt r1, #1
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mvnne r1, #0
 ; CHECK-NEXT:    vdup.32 d9, r1
-; CHECK-NEXT:    vmov r1, r5, d4
+; CHECK-NEXT:    vmov r1, r2, d6
 ; CHECK-NEXT:    subs r1, r6, r1
-; CHECK-NEXT:    sbcs r1, r7, r5
-; CHECK-NEXT:    vmov r6, r7, d3
+; CHECK-NEXT:    sbcs r1, r7, r2
+; CHECK-NEXT:    vmov r6, r7, d0
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    movlt r1, #1
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mvnne r1, #0
 ; CHECK-NEXT:    vdup.32 d8, r1
-; CHECK-NEXT:    vmov r1, r5, d1
-; CHECK-NEXT:    vbit q2, q3, q4
+; CHECK-NEXT:    vmov r1, r2, d2
+; CHECK-NEXT:    vbif q2, q3, q4
+; CHECK-NEXT:    vdup.32 d7, r5
 ; CHECK-NEXT:    vdup.32 d9, r4
-; CHECK-NEXT:    vdup.32 d8, r2
+; CHECK-NEXT:    vmov r4, r5, d1
+; CHECK-NEXT:    vdup.32 d8, r0
+; CHECK-NEXT:    mov r0, r3
+; CHECK-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; CHECK-NEXT:    vbif q12, q14, q4
+; CHECK-NEXT:    vdup.32 d6, lr
+; CHECK-NEXT:    vbit q11, q15, q3
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]!
 ; CHECK-NEXT:    subs r1, r6, r1
-; CHECK-NEXT:    sbcs r1, r7, r5
-; CHECK-NEXT:    vmov r5, r6, d24
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    movlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mvnne r1, #0
-; CHECK-NEXT:    vdup.32 d7, r1
-; CHECK-NEXT:    vmov r1, r4, d20
-; CHECK-NEXT:    vdup.32 d6, r0
-; CHECK-NEXT:    subs r1, r5, r1
-; CHECK-NEXT:    mov r1, r3
-; CHECK-NEXT:    sbcs r0, r6, r4
-; CHECK-NEXT:    vst1.64 {d16, d17}, [r1:128]!
-; CHECK-NEXT:    vorr q8, q4, q4
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    sbcs r1, r7, r2
+; CHECK-NEXT:    vmov r1, r2, d3
+; CHECK-NEXT:    movlt r6, #1
+; CHECK-NEXT:    subs r1, r4, r1
+; CHECK-NEXT:    sbcs r1, r5, r2
 ; CHECK-NEXT:    movlt r12, #1
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    vbsl q8, q14, q15
-; CHECK-NEXT:    vdup.32 d29, lr
-; CHECK-NEXT:    vorr q15, q3, q3
 ; CHECK-NEXT:    mvnne r12, #0
-; CHECK-NEXT:    vdup.32 d28, r12
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vdup.32 d27, r12
+; CHECK-NEXT:    mvnne r6, #0
+; CHECK-NEXT:    vdup.32 d26, r6
+; CHECK-NEXT:    vorr q10, q13, q13
+; CHECK-NEXT:    vbsl q10, q0, q1
+; CHECK-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]
 ; CHECK-NEXT:    add r0, r3, #64
-; CHECK-NEXT:    vbsl q15, q1, q0
-; CHECK-NEXT:    vst1.64 {d26, d27}, [r1:128]!
-; CHECK-NEXT:    vbit q10, q12, q14
-; CHECK-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]!
-; CHECK-NEXT:    vst1.64 {d4, d5}, [r1:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; CHECK-NEXT:    vst1.64 {d30, d31}, [r1:128]
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; CHECK-NEXT:    vst1.64 {d24, d25}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]
-; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEXT:    mov pc, lr
 ; COST: func_blend20
 ; COST: cost of 0 {{.*}} icmp

diff  --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll
index 9acf8d249ddf18..09e3592b6d420e 100644
--- a/llvm/test/CodeGen/ARM/wide-compares.ll
+++ b/llvm/test/CodeGen/ARM/wide-compares.ll
@@ -129,16 +129,19 @@ declare void @g()
 define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
 ; CHECK-ARM-LABEL: test_slt_select:
 ; CHECK-ARM:       @ %bb.0: @ %entry
-; CHECK-ARM-NEXT:    push {r4, r5, r6, lr}
-; CHECK-ARM-NEXT:    ldr r12, [sp, #24]
-; CHECK-ARM-NEXT:    ldr lr, [sp, #16]
-; CHECK-ARM-NEXT:    ldr r6, [sp, #28]
-; CHECK-ARM-NEXT:    ldr r5, [sp, #20]
+; CHECK-ARM-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; CHECK-ARM-NEXT:    ldr r12, [sp, #32]
+; CHECK-ARM-NEXT:    mov r6, #0
+; CHECK-ARM-NEXT:    ldr lr, [sp, #24]
+; CHECK-ARM-NEXT:    ldr r7, [sp, #36]
+; CHECK-ARM-NEXT:    ldr r5, [sp, #28]
 ; CHECK-ARM-NEXT:    subs r4, lr, r12
-; CHECK-ARM-NEXT:    sbcs r6, r5, r6
-; CHECK-ARM-NEXT:    movhs r0, r2
-; CHECK-ARM-NEXT:    movhs r1, r3
-; CHECK-ARM-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-ARM-NEXT:    sbcs r7, r5, r7
+; CHECK-ARM-NEXT:    movwlo r6, #1
+; CHECK-ARM-NEXT:    cmp r6, #0
+; CHECK-ARM-NEXT:    moveq r0, r2
+; CHECK-ARM-NEXT:    moveq r1, r3
+; CHECK-ARM-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
 ; CHECK-THUMB1-NOMOV-LABEL: test_slt_select:
 ; CHECK-THUMB1-NOMOV:       @ %bb.0: @ %entry
@@ -154,13 +157,22 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
 ; CHECK-THUMB1-NOMOV-NEXT:    sbcs r5, r4
 ; CHECK-THUMB1-NOMOV-NEXT:    blo .LBB2_2
 ; CHECK-THUMB1-NOMOV-NEXT:  @ %bb.1: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT:    mov r12, r2
-; CHECK-THUMB1-NOMOV-NEXT:    mov r0, r12
-; CHECK-THUMB1-NOMOV-NEXT:  .LBB2_2: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT:    blo .LBB2_4
-; CHECK-THUMB1-NOMOV-NEXT:  @ %bb.3: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT:    movs r1, r3
+; CHECK-THUMB1-NOMOV-NEXT:    movs r4, #0
+; CHECK-THUMB1-NOMOV-NEXT:    cmp r4, #0
+; CHECK-THUMB1-NOMOV-NEXT:    beq .LBB2_3
+; CHECK-THUMB1-NOMOV-NEXT:    b .LBB2_4
+; CHECK-THUMB1-NOMOV-NEXT:  .LBB2_2:
+; CHECK-THUMB1-NOMOV-NEXT:    movs r4, #1
+; CHECK-THUMB1-NOMOV-NEXT:    cmp r4, #0
+; CHECK-THUMB1-NOMOV-NEXT:    bne .LBB2_4
+; CHECK-THUMB1-NOMOV-NEXT:  .LBB2_3: @ %entry
+; CHECK-THUMB1-NOMOV-NEXT:    movs r0, r2
 ; CHECK-THUMB1-NOMOV-NEXT:  .LBB2_4: @ %entry
+; CHECK-THUMB1-NOMOV-NEXT:    cmp r4, #0
+; CHECK-THUMB1-NOMOV-NEXT:    bne .LBB2_6
+; CHECK-THUMB1-NOMOV-NEXT:  @ %bb.5: @ %entry
+; CHECK-THUMB1-NOMOV-NEXT:    movs r1, r3
+; CHECK-THUMB1-NOMOV-NEXT:  .LBB2_6: @ %entry
 ; CHECK-THUMB1-NOMOV-NEXT:    add sp, #4
 ; CHECK-THUMB1-NOMOV-NEXT:    pop {r4, r5, r6, r7}
 ; CHECK-THUMB1-NOMOV-NEXT:    pop {r2}
@@ -176,31 +188,46 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
 ; CHECK-THUMB1-NEXT:    ldr r7, [sp, #24]
 ; CHECK-THUMB1-NEXT:    subs r6, r7, r6
 ; CHECK-THUMB1-NEXT:    sbcs r5, r4
-; CHECK-THUMB1-NEXT:    bhs .LBB2_3
+; CHECK-THUMB1-NEXT:    blo .LBB2_2
 ; CHECK-THUMB1-NEXT:  @ %bb.1: @ %entry
-; CHECK-THUMB1-NEXT:    bhs .LBB2_4
-; CHECK-THUMB1-NEXT:  .LBB2_2: @ %entry
-; CHECK-THUMB1-NEXT:    add sp, #4
-; CHECK-THUMB1-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-THUMB1-NEXT:    movs r4, #0
+; CHECK-THUMB1-NEXT:    cmp r4, #0
+; CHECK-THUMB1-NEXT:    beq .LBB2_3
+; CHECK-THUMB1-NEXT:    b .LBB2_4
+; CHECK-THUMB1-NEXT:  .LBB2_2:
+; CHECK-THUMB1-NEXT:    movs r4, #1
+; CHECK-THUMB1-NEXT:    cmp r4, #0
+; CHECK-THUMB1-NEXT:    bne .LBB2_4
 ; CHECK-THUMB1-NEXT:  .LBB2_3: @ %entry
 ; CHECK-THUMB1-NEXT:    mov r0, r2
-; CHECK-THUMB1-NEXT:    blo .LBB2_2
 ; CHECK-THUMB1-NEXT:  .LBB2_4: @ %entry
+; CHECK-THUMB1-NEXT:    cmp r4, #0
+; CHECK-THUMB1-NEXT:    beq .LBB2_6
+; CHECK-THUMB1-NEXT:  @ %bb.5: @ %entry
+; CHECK-THUMB1-NEXT:    add sp, #4
+; CHECK-THUMB1-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-THUMB1-NEXT:  .LBB2_6: @ %entry
 ; CHECK-THUMB1-NEXT:    mov r1, r3
 ; CHECK-THUMB1-NEXT:    add sp, #4
 ; CHECK-THUMB1-NEXT:    pop {r4, r5, r6, r7, pc}
 ;
 ; CHECK-THUMB2-LABEL: test_slt_select:
 ; CHECK-THUMB2:       @ %bb.0: @ %entry
-; CHECK-THUMB2-NEXT:    push {r4, r5, r6, lr}
-; CHECK-THUMB2-NEXT:    ldrd r12, r6, [sp, #24]
-; CHECK-THUMB2-NEXT:    ldrd lr, r5, [sp, #16]
+; CHECK-THUMB2-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-THUMB2-NEXT:    sub sp, #4
+; CHECK-THUMB2-NEXT:    ldrd r12, r7, [sp, #32]
+; CHECK-THUMB2-NEXT:    movs r6, #0
+; CHECK-THUMB2-NEXT:    ldrd lr, r5, [sp, #24]
 ; CHECK-THUMB2-NEXT:    subs.w r4, lr, r12
-; CHECK-THUMB2-NEXT:    sbcs.w r6, r5, r6
-; CHECK-THUMB2-NEXT:    itt hs
-; CHECK-THUMB2-NEXT:    movhs r0, r2
-; CHECK-THUMB2-NEXT:    movhs r1, r3
-; CHECK-THUMB2-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-THUMB2-NEXT:    sbcs.w r7, r5, r7
+; CHECK-THUMB2-NEXT:    it lo
+; CHECK-THUMB2-NEXT:    movlo r6, #1
+; CHECK-THUMB2-NEXT:    cmp r6, #0
+; CHECK-THUMB2-NEXT:    itt eq
+; CHECK-THUMB2-NEXT:    moveq r0, r2
+; CHECK-THUMB2-NEXT:    moveq r1, r3
+; CHECK-THUMB2-NEXT:    add sp, #4
+; CHECK-THUMB2-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
     %cmp = icmp ult i64 %a, %b
     %r1 = select i1 %cmp, i64 %c, i64 %d

diff  --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index 9d69417e8f6b5b..fdfbf3393098e4 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -501,8 +501,9 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1, #48]
 ; CHECK-NEXT:    adds r1, #64
-; CHECK-NEXT:    adds r0, #32
 ; CHECK-NEXT:    subs r3, r3, #4
+; CHECK-NEXT:    adds r0, #32
+; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    bne .LBB1_5
 ; CHECK-NEXT:  .LBB1_6: @ %while.end
 ; CHECK-NEXT:    movs r2, #3

diff  --git a/llvm/test/CodeGen/Thumb/select.ll b/llvm/test/CodeGen/Thumb/select.ll
index 89cc4c10fe51d6..0065616c4646e9 100644
--- a/llvm/test/CodeGen/Thumb/select.ll
+++ b/llvm/test/CodeGen/Thumb/select.ll
@@ -94,8 +94,8 @@ entry:
 ; CHECK-LABEL: f8:
 ; CHECK: cmp r0, r1
 ; CHECK: blt
-; CHECK: mov
-; CHECK: mov
+; CHECK: movs
+; CHECK: cmp r0, r1
 ; CHECK: blt
 ; CHECK: movs
 ; CHECK: movs

diff  --git a/llvm/test/CodeGen/Thumb/smul_fix_sat.ll b/llvm/test/CodeGen/Thumb/smul_fix_sat.ll
index 8bc39ea0370a60..f8557419c41990 100644
--- a/llvm/test/CodeGen/Thumb/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/Thumb/smul_fix_sat.ll
@@ -49,24 +49,25 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    sub sp, #36
 ; ARM-NEXT:    str r3, [sp, #28] @ 4-byte Spill
 ; ARM-NEXT:    mov r6, r1
-; ARM-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    movs r4, #0
+; ARM-NEXT:    str r4, [sp, #32] @ 4-byte Spill
 ; ARM-NEXT:    mov r5, r0
-; ARM-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r7, r2
-; ARM-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; ARM-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; ARM-NEXT:    str r1, [sp, #32] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    mov r0, r6
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r2, r7
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r6, r1
-; ARM-NEXT:    ldr r1, [sp, #32] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    adds r7, r0, r1
 ; ARM-NEXT:    adcs r6, r4
 ; ARM-NEXT:    mov r0, r5
@@ -76,108 +77,118 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    adds r0, r0, r7
-; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    adcs r1, r4
 ; ARM-NEXT:    adds r0, r6, r1
-; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; ARM-NEXT:    mov r6, r4
 ; ARM-NEXT:    adcs r6, r4
-; ARM-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    mov r0, r7
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r2, r5
-; ARM-NEXT:    str r4, [sp, #32] @ 4-byte Spill
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r5, r1
-; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
-; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; ARM-NEXT:    adcs r5, r6
-; ARM-NEXT:    mov r4, r7
 ; ARM-NEXT:    asrs r2, r7, #31
-; ARM-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; ARM-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
-; ARM-NEXT:    mov r1, r7
+; ARM-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; ARM-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
+; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r3, r2
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r6, r0
-; ARM-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; ARM-NEXT:    asrs r0, r7, #31
+; ARM-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    asrs r0, r4, #31
 ; ARM-NEXT:    mov r1, r0
-; ARM-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; ARM-NEXT:    mov r3, r4
+; ARM-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    adds r0, r0, r6
-; ARM-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; ARM-NEXT:    adcs r1, r2
-; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; ARM-NEXT:    adds r3, r2, r0
+; ARM-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    adds r0, r2, r0
 ; ARM-NEXT:    adcs r1, r5
-; ARM-NEXT:    rsbs r2, r1, #0
-; ARM-NEXT:    adcs r2, r1
-; ARM-NEXT:    movs r0, #1
-; ARM-NEXT:    cmp r3, #1
-; ARM-NEXT:    mov r5, r0
+; ARM-NEXT:    rsbs r5, r1, #0
+; ARM-NEXT:    adcs r5, r1
+; ARM-NEXT:    movs r2, #1
+; ARM-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; ARM-NEXT:    cmp r0, #1
+; ARM-NEXT:    mov r3, r2
 ; ARM-NEXT:    bhi .LBB1_2
 ; ARM-NEXT:  @ %bb.1:
-; ARM-NEXT:    ldr r5, [sp, #32] @ 4-byte Reload
+; ARM-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
 ; ARM-NEXT:  .LBB1_2:
-; ARM-NEXT:    ands r2, r5
+; ARM-NEXT:    ands r5, r3
 ; ARM-NEXT:    cmp r1, #0
-; ARM-NEXT:    mov r5, r0
+; ARM-NEXT:    mov r3, r2
 ; ARM-NEXT:    bgt .LBB1_4
 ; ARM-NEXT:  @ %bb.3:
-; ARM-NEXT:    ldr r5, [sp, #32] @ 4-byte Reload
+; ARM-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
 ; ARM-NEXT:  .LBB1_4:
-; ARM-NEXT:    orrs r5, r2
-; ARM-NEXT:    lsls r2, r3, #30
-; ARM-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
-; ARM-NEXT:    lsrs r4, r6, #2
-; ARM-NEXT:    adds r2, r2, r4
-; ARM-NEXT:    lsls r4, r6, #30
-; ARM-NEXT:    ldr r6, [sp, #24] @ 4-byte Reload
-; ARM-NEXT:    lsrs r6, r6, #2
-; ARM-NEXT:    adds r7, r4, r6
-; ARM-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
-; ARM-NEXT:    mvns r6, r4
-; ARM-NEXT:    cmp r5, #0
-; ARM-NEXT:    beq .LBB1_6
+; ARM-NEXT:    orrs r3, r5
+; ARM-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; ARM-NEXT:    mvns r6, r0
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; ARM-NEXT:    bne .LBB1_6
 ; ARM-NEXT:  @ %bb.5:
-; ARM-NEXT:    ldr r2, .LCPI1_0
+; ARM-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    lsls r0, r0, #30
+; ARM-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    lsrs r4, r4, #2
+; ARM-NEXT:    adds r0, r0, r4
+; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:  .LBB1_6:
-; ARM-NEXT:    mov r5, r6
-; ARM-NEXT:    bne .LBB1_8
+; ARM-NEXT:    adds r0, r1, #1
+; ARM-NEXT:    rsbs r7, r0, #0
+; ARM-NEXT:    adcs r7, r0
+; ARM-NEXT:    mvns r0, r2
+; ARM-NEXT:    ldr r5, [sp, #28] @ 4-byte Reload
+; ARM-NEXT:    cmp r5, r0
+; ARM-NEXT:    mov r0, r2
+; ARM-NEXT:    blo .LBB1_8
 ; ARM-NEXT:  @ %bb.7:
-; ARM-NEXT:    mov r5, r7
+; ARM-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
 ; ARM-NEXT:  .LBB1_8:
-; ARM-NEXT:    adds r4, r1, #1
-; ARM-NEXT:    rsbs r7, r4, #0
-; ARM-NEXT:    adcs r7, r4
-; ARM-NEXT:    mvns r4, r0
-; ARM-NEXT:    cmp r3, r4
-; ARM-NEXT:    mov r3, r0
-; ARM-NEXT:    blo .LBB1_10
+; ARM-NEXT:    ands r7, r0
+; ARM-NEXT:    cmp r1, r6
+; ARM-NEXT:    mov r6, r2
+; ARM-NEXT:    bge .LBB1_12
 ; ARM-NEXT:  @ %bb.9:
-; ARM-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; ARM-NEXT:    orrs r6, r7
+; ARM-NEXT:    beq .LBB1_13
 ; ARM-NEXT:  .LBB1_10:
-; ARM-NEXT:    ands r7, r3
-; ARM-NEXT:    cmp r1, r6
-; ARM-NEXT:    mov r3, r0
-; ARM-NEXT:    blt .LBB1_12
-; ARM-NEXT:  @ %bb.11:
-; ARM-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
-; ARM-NEXT:  .LBB1_12:
-; ARM-NEXT:    orrs r3, r7
-; ARM-NEXT:    lsls r1, r0, #31
 ; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    bne .LBB1_14
-; ARM-NEXT:  @ %bb.13:
-; ARM-NEXT:    str r5, [sp, #32] @ 4-byte Spill
+; ARM-NEXT:  .LBB1_11:
+; ARM-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; ARM-NEXT:    lsls r0, r0, #30
+; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    lsrs r1, r1, #2
+; ARM-NEXT:    adds r1, r0, r1
+; ARM-NEXT:    cmp r6, #0
+; ARM-NEXT:    bne .LBB1_15
+; ARM-NEXT:    b .LBB1_16
+; ARM-NEXT:  .LBB1_12:
+; ARM-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
+; ARM-NEXT:    orrs r6, r7
+; ARM-NEXT:    bne .LBB1_10
+; ARM-NEXT:  .LBB1_13:
+; ARM-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; ARM-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    beq .LBB1_11
 ; ARM-NEXT:  .LBB1_14:
-; ARM-NEXT:    bne .LBB1_16
-; ARM-NEXT:  @ %bb.15:
-; ARM-NEXT:    mov r1, r2
+; ARM-NEXT:    ldr r1, .LCPI1_0
+; ARM-NEXT:    cmp r6, #0
+; ARM-NEXT:    beq .LBB1_16
+; ARM-NEXT:  .LBB1_15:
+; ARM-NEXT:    lsls r1, r2, #31
 ; ARM-NEXT:  .LBB1_16:
 ; ARM-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
 ; ARM-NEXT:    add sp, #36
@@ -271,44 +282,44 @@ define i64 @func5(i64 %x, i64 %y) {
 ; ARM-NEXT:    push {r4, r5, r6, r7, lr}
 ; ARM-NEXT:    .pad #28
 ; ARM-NEXT:    sub sp, #28
-; ARM-NEXT:    str r3, [sp, #24] @ 4-byte Spill
-; ARM-NEXT:    mov r4, r2
+; ARM-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; ARM-NEXT:    mov r5, r2
 ; ARM-NEXT:    str r2, [sp, #16] @ 4-byte Spill
-; ARM-NEXT:    mov r5, r1
+; ARM-NEXT:    mov r6, r1
 ; ARM-NEXT:    movs r7, #0
-; ARM-NEXT:    mov r6, r0
+; ARM-NEXT:    mov r4, r0
 ; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    mov r1, r7
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; ARM-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; ARM-NEXT:    mov r0, r5
+; ARM-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; ARM-NEXT:    mov r0, r6
 ; ARM-NEXT:    mov r1, r7
-; ARM-NEXT:    mov r2, r4
+; ARM-NEXT:    mov r2, r5
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    mov r4, r1
-; ARM-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    mov r5, r1
+; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
-; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; ARM-NEXT:    adcs r4, r7
-; ARM-NEXT:    mov r0, r6
+; ARM-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; ARM-NEXT:    adcs r5, r7
+; ARM-NEXT:    mov r0, r4
 ; ARM-NEXT:    mov r1, r7
-; ARM-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r2
-; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    adcs r1, r7
-; ARM-NEXT:    adds r0, r4, r1
+; ARM-NEXT:    adds r0, r5, r1
 ; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; ARM-NEXT:    mov r6, r7
-; ARM-NEXT:    adcs r6, r7
-; ARM-NEXT:    mov r0, r5
+; ARM-NEXT:    mov r5, r7
+; ARM-NEXT:    adcs r5, r7
+; ARM-NEXT:    mov r0, r6
 ; ARM-NEXT:    mov r1, r7
-; ARM-NEXT:    ldr r4, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
@@ -316,48 +327,50 @@ define i64 @func5(i64 %x, i64 %y) {
 ; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
 ; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; ARM-NEXT:    adcs r7, r6
-; ARM-NEXT:    asrs r2, r5, #31
+; ARM-NEXT:    adcs r7, r5
+; ARM-NEXT:    asrs r2, r6, #31
 ; ARM-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; ARM-NEXT:    mov r6, r4
+; ARM-NEXT:    mov r5, r4
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r3, r2
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r4, r0
 ; ARM-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; ARM-NEXT:    asrs r0, r6, #31
+; ARM-NEXT:    asrs r0, r5, #31
 ; ARM-NEXT:    mov r1, r0
 ; ARM-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; ARM-NEXT:    mov r3, r5
+; ARM-NEXT:    mov r3, r6
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r4
+; ARM-NEXT:    ldr r4, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; ARM-NEXT:    adcs r1, r2
 ; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; ARM-NEXT:    adds r2, r2, r0
+; ARM-NEXT:    adds r3, r2, r0
 ; ARM-NEXT:    adcs r1, r7
-; ARM-NEXT:    asrs r0, r3, #31
-; ARM-NEXT:    eors r1, r0
-; ARM-NEXT:    eors r2, r0
-; ARM-NEXT:    orrs r2, r1
-; ARM-NEXT:    eors r5, r6
-; ARM-NEXT:    asrs r0, r5, #31
-; ARM-NEXT:    ldr r1, .LCPI4_0
-; ARM-NEXT:    eors r1, r0
-; ARM-NEXT:    mvns r0, r0
-; ARM-NEXT:    cmp r2, #0
-; ARM-NEXT:    beq .LBB4_3
+; ARM-NEXT:    asrs r2, r4, #31
+; ARM-NEXT:    eors r1, r2
+; ARM-NEXT:    eors r3, r2
+; ARM-NEXT:    orrs r3, r1
+; ARM-NEXT:    eors r6, r5
+; ARM-NEXT:    asrs r1, r6, #31
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    bne .LBB4_3
 ; ARM-NEXT:  @ %bb.1:
+; ARM-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    beq .LBB4_4
 ; ARM-NEXT:  .LBB4_2:
+; ARM-NEXT:    ldr r2, .LCPI4_0
+; ARM-NEXT:    eors r1, r2
 ; ARM-NEXT:    add sp, #28
 ; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
 ; ARM-NEXT:  .LBB4_3:
-; ARM-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    mvns r0, r1
+; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    bne .LBB4_2
 ; ARM-NEXT:  .LBB4_4:
-; ARM-NEXT:    mov r1, r3
+; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    add sp, #28
 ; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
 ; ARM-NEXT:    .p2align 2
@@ -470,67 +483,79 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; ARM-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
 ; ARM-NEXT:    bl __aeabi_lmul
+; ARM-NEXT:    mov r2, r1
 ; ARM-NEXT:    adds r0, r0, r4
-; ARM-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; ARM-NEXT:    adcs r1, r2
-; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; ARM-NEXT:    adds r0, r2, r0
-; ARM-NEXT:    adcs r1, r7
-; ARM-NEXT:    rsbs r5, r1, #0
-; ARM-NEXT:    adcs r5, r1
-; ARM-NEXT:    movs r2, #1
+; ARM-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    adcs r2, r1
+; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    adds r0, r1, r0
+; ARM-NEXT:    adcs r2, r7
+; ARM-NEXT:    rsbs r5, r2, #0
+; ARM-NEXT:    adcs r5, r2
+; ARM-NEXT:    movs r4, #1
 ; ARM-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    cmp r0, #0
-; ARM-NEXT:    mov r3, r2
-; ARM-NEXT:    bge .LBB6_2
+; ARM-NEXT:    mov r3, r4
+; ARM-NEXT:    bmi .LBB6_2
 ; ARM-NEXT:  @ %bb.1:
 ; ARM-NEXT:    mov r3, r6
 ; ARM-NEXT:  .LBB6_2:
-; ARM-NEXT:    mov r4, r2
-; ARM-NEXT:    bmi .LBB6_4
+; ARM-NEXT:    ands r5, r3
+; ARM-NEXT:    cmp r2, #0
+; ARM-NEXT:    mov r1, r4
+; ARM-NEXT:    mov r3, r4
+; ARM-NEXT:    bgt .LBB6_4
 ; ARM-NEXT:  @ %bb.3:
-; ARM-NEXT:    mov r4, r6
+; ARM-NEXT:    mov r3, r6
 ; ARM-NEXT:  .LBB6_4:
-; ARM-NEXT:    ands r5, r4
-; ARM-NEXT:    cmp r1, #0
-; ARM-NEXT:    mov r7, r2
-; ARM-NEXT:    bgt .LBB6_6
+; ARM-NEXT:    orrs r3, r5
+; ARM-NEXT:    mvns r4, r6
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    mov r5, r4
+; ARM-NEXT:    bne .LBB6_6
 ; ARM-NEXT:  @ %bb.5:
-; ARM-NEXT:    mov r7, r6
+; ARM-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
 ; ARM-NEXT:  .LBB6_6:
-; ARM-NEXT:    orrs r7, r5
-; ARM-NEXT:    mvns r4, r6
-; ARM-NEXT:    cmp r7, #0
-; ARM-NEXT:    beq .LBB6_8
+; ARM-NEXT:    adds r0, r2, #1
+; ARM-NEXT:    rsbs r7, r0, #0
+; ARM-NEXT:    adcs r7, r0
+; ARM-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    cmp r0, #0
+; ARM-NEXT:    mov r0, r1
+; ARM-NEXT:    bge .LBB6_8
 ; ARM-NEXT:  @ %bb.7:
-; ARM-NEXT:    ldr r0, .LCPI6_0
-; ARM-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; ARM-NEXT:    mov r0, r6
 ; ARM-NEXT:  .LBB6_8:
-; ARM-NEXT:    mov r5, r4
-; ARM-NEXT:    bne .LBB6_10
+; ARM-NEXT:    ands r7, r0
+; ARM-NEXT:    cmp r2, r4
+; ARM-NEXT:    mov r0, r1
+; ARM-NEXT:    mov r2, r1
+; ARM-NEXT:    bge .LBB6_12
 ; ARM-NEXT:  @ %bb.9:
-; ARM-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    orrs r2, r7
+; ARM-NEXT:    beq .LBB6_13
 ; ARM-NEXT:  .LBB6_10:
-; ARM-NEXT:    adds r0, r1, #1
-; ARM-NEXT:    rsbs r7, r0, #0
-; ARM-NEXT:    adcs r7, r0
-; ARM-NEXT:    ands r7, r3
-; ARM-NEXT:    cmp r1, r4
-; ARM-NEXT:    mov r3, r2
-; ARM-NEXT:    blt .LBB6_12
-; ARM-NEXT:  @ %bb.11:
-; ARM-NEXT:    mov r3, r6
-; ARM-NEXT:  .LBB6_12:
-; ARM-NEXT:    orrs r3, r7
-; ARM-NEXT:    lsls r1, r2, #31
 ; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    bne .LBB6_14
-; ARM-NEXT:  @ %bb.13:
+; ARM-NEXT:  .LBB6_11:
+; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    cmp r2, #0
+; ARM-NEXT:    bne .LBB6_15
+; ARM-NEXT:    b .LBB6_16
+; ARM-NEXT:  .LBB6_12:
+; ARM-NEXT:    mov r2, r6
+; ARM-NEXT:    orrs r2, r7
+; ARM-NEXT:    bne .LBB6_10
+; ARM-NEXT:  .LBB6_13:
 ; ARM-NEXT:    mov r6, r5
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    beq .LBB6_11
 ; ARM-NEXT:  .LBB6_14:
-; ARM-NEXT:    bne .LBB6_16
-; ARM-NEXT:  @ %bb.15:
-; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, .LCPI6_0
+; ARM-NEXT:    cmp r2, #0
+; ARM-NEXT:    beq .LBB6_16
+; ARM-NEXT:  .LBB6_15:
+; ARM-NEXT:    lsls r1, r0, #31
 ; ARM-NEXT:  .LBB6_16:
 ; ARM-NEXT:    mov r0, r6
 ; ARM-NEXT:    add sp, #28
@@ -552,23 +577,23 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    sub sp, #28
 ; ARM-NEXT:    str r3, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    mov r5, r2
-; ARM-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; ARM-NEXT:    str r2, [sp, #12] @ 4-byte Spill
 ; ARM-NEXT:    mov r4, r1
 ; ARM-NEXT:    str r1, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    movs r7, #0
 ; ARM-NEXT:    mov r6, r0
-; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; ARM-NEXT:    mov r1, r7
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    mov r0, r4
 ; ARM-NEXT:    mov r1, r7
 ; ARM-NEXT:    mov r2, r5
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r5, r1
-; ARM-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    adds r4, r0, r1
 ; ARM-NEXT:    adcs r5, r7
 ; ARM-NEXT:    mov r0, r6
@@ -578,10 +603,10 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    adds r0, r0, r4
-; ARM-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; ARM-NEXT:    adcs r1, r7
 ; ARM-NEXT:    adds r0, r5, r1
-; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    mov r4, r7
 ; ARM-NEXT:    adcs r4, r7
 ; ARM-NEXT:    ldr r5, [sp, #20] @ 4-byte Reload
@@ -591,70 +616,75 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    mov r3, r7
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r6, r1
-; ARM-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
-; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    adcs r6, r4
 ; ARM-NEXT:    asrs r2, r5, #31
-; ARM-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; ARM-NEXT:    ldr r4, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r3, r2
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r5, r0
-; ARM-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; ARM-NEXT:    asrs r0, r4, #31
 ; ARM-NEXT:    mov r1, r0
-; ARM-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; ARM-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    adds r0, r0, r5
-; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; ARM-NEXT:    adcs r1, r2
-; ARM-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; ARM-NEXT:    adds r2, r2, r0
+; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    adds r0, r2, r0
 ; ARM-NEXT:    adcs r1, r6
-; ARM-NEXT:    lsls r0, r1, #1
-; ARM-NEXT:    lsrs r3, r2, #31
-; ARM-NEXT:    adds r0, r0, r3
-; ARM-NEXT:    lsls r2, r2, #1
-; ARM-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; ARM-NEXT:    lsrs r3, r3, #31
-; ARM-NEXT:    adds r2, r2, r3
-; ARM-NEXT:    mvns r3, r7
-; ARM-NEXT:    ldr r4, .LCPI7_1
-; ARM-NEXT:    cmp r1, r4
-; ARM-NEXT:    ble .LBB7_2
+; ARM-NEXT:    ldr r2, .LCPI7_0
+; ARM-NEXT:    cmp r1, r2
+; ARM-NEXT:    bgt .LBB7_2
 ; ARM-NEXT:  @ %bb.1:
-; ARM-NEXT:    ldr r0, .LCPI7_0
+; ARM-NEXT:    lsls r3, r0, #1
+; ARM-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    lsrs r4, r4, #31
+; ARM-NEXT:    adds r5, r3, r4
+; ARM-NEXT:    b .LBB7_3
 ; ARM-NEXT:  .LBB7_2:
-; ARM-NEXT:    bgt .LBB7_4
-; ARM-NEXT:  @ %bb.3:
-; ARM-NEXT:    mov r3, r2
-; ARM-NEXT:  .LBB7_4:
-; ARM-NEXT:    movs r2, #1
-; ARM-NEXT:    lsls r2, r2, #31
-; ARM-NEXT:    movs r4, #3
-; ARM-NEXT:    lsls r4, r4, #30
-; ARM-NEXT:    cmp r1, r4
-; ARM-NEXT:    blt .LBB7_6
-; ARM-NEXT:  @ %bb.5:
-; ARM-NEXT:    mov r7, r3
-; ARM-NEXT:  .LBB7_6:
+; ARM-NEXT:    mvns r5, r7
+; ARM-NEXT:  .LBB7_3:
+; ARM-NEXT:    movs r3, #3
+; ARM-NEXT:    lsls r3, r3, #30
+; ARM-NEXT:    cmp r1, r3
+; ARM-NEXT:    blt .LBB7_5
+; ARM-NEXT:  @ %bb.4:
+; ARM-NEXT:    mov r7, r5
+; ARM-NEXT:  .LBB7_5:
+; ARM-NEXT:    cmp r1, r2
+; ARM-NEXT:    bgt .LBB7_7
+; ARM-NEXT:  @ %bb.6:
+; ARM-NEXT:    lsls r2, r1, #1
+; ARM-NEXT:    lsrs r0, r0, #31
+; ARM-NEXT:    adds r2, r2, r0
+; ARM-NEXT:    cmp r1, r3
 ; ARM-NEXT:    blt .LBB7_8
-; ARM-NEXT:  @ %bb.7:
-; ARM-NEXT:    mov r2, r0
+; ARM-NEXT:    b .LBB7_9
+; ARM-NEXT:  .LBB7_7:
+; ARM-NEXT:    ldr r2, .LCPI7_1
+; ARM-NEXT:    cmp r1, r3
+; ARM-NEXT:    bge .LBB7_9
 ; ARM-NEXT:  .LBB7_8:
+; ARM-NEXT:    movs r0, #1
+; ARM-NEXT:    lsls r2, r0, #31
+; ARM-NEXT:  .LBB7_9:
 ; ARM-NEXT:    mov r0, r7
 ; ARM-NEXT:    mov r1, r2
 ; ARM-NEXT:    add sp, #28
 ; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
 ; ARM-NEXT:    .p2align 2
-; ARM-NEXT:  @ %bb.9:
+; ARM-NEXT:  @ %bb.10:
 ; ARM-NEXT:  .LCPI7_0:
-; ARM-NEXT:    .long 2147483647 @ 0x7fffffff
-; ARM-NEXT:  .LCPI7_1:
 ; ARM-NEXT:    .long 1073741823 @ 0x3fffffff
+; ARM-NEXT:  .LCPI7_1:
+; ARM-NEXT:    .long 2147483647 @ 0x7fffffff
   %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63)
   ret i64 %tmp
 }

diff  --git a/llvm/test/CodeGen/Thumb/stack-guard-xo.ll b/llvm/test/CodeGen/Thumb/stack-guard-xo.ll
index 421583d07ac7fe..c344e4f7fc022d 100644
--- a/llvm/test/CodeGen/Thumb/stack-guard-xo.ll
+++ b/llvm/test/CodeGen/Thumb/stack-guard-xo.ll
@@ -33,9 +33,9 @@ define dso_local i32 @main() #0 {
 ; V6M-LABEL: main:
 ; V6M:       @ %bb.0: @ %entry
 ; V6M-NEXT:    push {r7, lr}
-; V6M-NEXT:    sub sp, #24
+; V6M-NEXT:    sub sp, #
 ; V6M-NEXT:    movs r0, #0
-; V6M-NEXT:    str r0, [sp, #4]
+; V6M-NEXT:    str r0, [sp, #
 ; V6M-NEXT:    mrs r12, apsr
 ; V6M-NEXT:    movs r0, :upper8_15:__stack_chk_guard
 ; V6M-NEXT:    lsls r0, r0, #8
@@ -46,10 +46,10 @@ define dso_local i32 @main() #0 {
 ; V6M-NEXT:    adds r0, :lower0_7:__stack_chk_guard
 ; V6M-NEXT:    msr apsr, r12
 ; V6M-NEXT:    ldr r0, [r0]
-; V6M-NEXT:    str r0, [sp, #20]
-; V6M-NEXT:    add r0, sp, #8
+; V6M-NEXT:    str r0, [sp, #
+; V6M-NEXT:    add r0, sp, #
 ; V6M-NEXT:    ldrb r0, [r0]
-; V6M-NEXT:    ldr r1, [sp, #20]
+; V6M-NEXT:    ldr r1, [sp, #
 ; V6M-NEXT:    mrs r12, apsr
 ; V6M-NEXT:    movs r2, :upper8_15:__stack_chk_guard
 ; V6M-NEXT:    lsls r2, r2, #8
@@ -63,7 +63,7 @@ define dso_local i32 @main() #0 {
 ; V6M-NEXT:    cmp r2, r1
 ; V6M-NEXT:    bne .LBB0_2
 ; V6M-NEXT:  @ %bb.1: @ %entry
-; V6M-NEXT:    add sp, #24
+; V6M-NEXT:    add sp, #
 ; V6M-NEXT:    pop {r7, pc}
 ; V6M-NEXT:  .LBB0_2: @ %entry
 ; V6M-NEXT:    bl __stack_chk_fail
@@ -105,177 +105,77 @@ entry:
 @bb = hidden local_unnamed_addr global i64 0, align 8
 
 define dso_local i64 @cc() local_unnamed_addr #1 {
-; CHECK-LABEL: cc:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    sub sp, #8
-; CHECK-NEXT:    movs r0, #1
-; CHECK-NEXT:    lsls r3, r0, #31
-; CHECK-NEXT:    ldr r0, .LCPI1_0
-; CHECK-NEXT:    ldr r2, [r0]
-; CHECK-NEXT:    asrs r4, r2, #31
-; CHECK-NEXT:    eors r3, r4
-; CHECK-NEXT:    ldr r0, .LCPI1_1
-; CHECK-NEXT:    ldm r0!, {r1, r5}
-; CHECK-NEXT:    subs r0, r2, r1
-; CHECK-NEXT:    sbcs r3, r5
-; CHECK-NEXT:    subs r0, r2, r1
-; CHECK-NEXT:    ldr r1, .LCPI1_2
-; CHECK-NEXT:    ldr r1, [r1]
-; CHECK-NEXT:    str r1, [sp, #4]
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    sbcs r1, r5
-; CHECK-NEXT:    ands r3, r4
-; CHECK-NEXT:    ands r2, r0
-; CHECK-NEXT:    mov r4, r2
-; CHECK-NEXT:    orrs r4, r3
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    beq .LBB1_2
-; CHECK-NEXT:  @ %bb.1: @ %entry
-; CHECK-NEXT:    mov r1, r3
-; CHECK-NEXT:  .LBB1_2: @ %entry
-; CHECK-NEXT:    beq .LBB1_4
-; CHECK-NEXT:  @ %bb.3: @ %entry
-; CHECK-NEXT:    mov r0, r2
-; CHECK-NEXT:  .LBB1_4: @ %entry
-; CHECK-NEXT:    ldr r2, [sp, #4]
-; CHECK-NEXT:    ldr r3, .LCPI1_2
-; CHECK-NEXT:    ldr r3, [r3]
-; CHECK-NEXT:    cmp r3, r2
-; CHECK-NEXT:    bne .LBB1_6
-; CHECK-NEXT:  @ %bb.5: @ %entry
-; CHECK-NEXT:    add sp, #8
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-NEXT:  .LBB1_6: @ %entry
-; CHECK-NEXT:    bl __stack_chk_fail
-; CHECK-NEXT:    .p2align 2
-; CHECK-NEXT:  @ %bb.7:
-; CHECK-NEXT:  .LCPI1_0:
-; CHECK-NEXT:    .long aa
-; CHECK-NEXT:  .LCPI1_1:
-; CHECK-NEXT:    .long bb
-; CHECK-NEXT:  .LCPI1_2:
-; CHECK-NEXT:    .long __stack_chk_guard
-;
 ; V6M-LABEL: cc:
 ; V6M:       @ %bb.0: @ %entry
-; V6M-NEXT:    push {r4, r5, r7, lr}
-; V6M-NEXT:    sub sp, #8
-; V6M-NEXT:    movs r0, #1
-; V6M-NEXT:    lsls r3, r0, #31
-; V6M-NEXT:    movs r0, :upper8_15:aa
-; V6M-NEXT:    lsls r0, r0, #8
-; V6M-NEXT:    adds r0, :upper0_7:aa
-; V6M-NEXT:    lsls r0, r0, #8
-; V6M-NEXT:    adds r0, :lower8_15:aa
-; V6M-NEXT:    lsls r0, r0, #8
-; V6M-NEXT:    adds r0, :lower0_7:aa
-; V6M-NEXT:    ldr r2, [r0]
-; V6M-NEXT:    asrs r4, r2, #31
-; V6M-NEXT:    eors r3, r4
-; V6M-NEXT:    movs r0, :upper8_15:bb
-; V6M-NEXT:    lsls r0, r0, #8
-; V6M-NEXT:    adds r0, :upper0_7:bb
-; V6M-NEXT:    lsls r0, r0, #8
-; V6M-NEXT:    adds r0, :lower8_15:bb
-; V6M-NEXT:    lsls r0, r0, #8
-; V6M-NEXT:    adds r0, :lower0_7:bb
-; V6M-NEXT:    ldm r0!, {r1, r5}
-; V6M-NEXT:    subs r0, r2, r1
-; V6M-NEXT:    sbcs r3, r5
-; V6M-NEXT:    subs r0, r2, r1
-; V6M-NEXT:    mrs r12, apsr
-; V6M-NEXT:    movs r1, :upper8_15:__stack_chk_guard
-; V6M-NEXT:    lsls r1, r1, #8
-; V6M-NEXT:    adds r1, :upper0_7:__stack_chk_guard
-; V6M-NEXT:    lsls r1, r1, #8
-; V6M-NEXT:    adds r1, :lower8_15:__stack_chk_guard
-; V6M-NEXT:    lsls r1, r1, #8
-; V6M-NEXT:    adds r1, :lower0_7:__stack_chk_guard
-; V6M-NEXT:    msr apsr, r12
-; V6M-NEXT:    ldr r1, [r1]
-; V6M-NEXT:    str r1, [sp, #4]
-; V6M-NEXT:    mov r1, r4
-; V6M-NEXT:    sbcs r1, r5
-; V6M-NEXT:    ands r3, r4
-; V6M-NEXT:    ands r2, r0
-; V6M-NEXT:    mov r4, r2
-; V6M-NEXT:    orrs r4, r3
-; V6M-NEXT:    cmp r4, #0
-; V6M-NEXT:    beq .LBB1_2
-; V6M-NEXT:  @ %bb.1: @ %entry
-; V6M-NEXT:    mov r1, r3
-; V6M-NEXT:  .LBB1_2: @ %entry
-; V6M-NEXT:    beq .LBB1_4
-; V6M-NEXT:  @ %bb.3: @ %entry
-; V6M-NEXT:    mov r0, r2
-; V6M-NEXT:  .LBB1_4: @ %entry
-; V6M-NEXT:    ldr r2, [sp, #4]
-; V6M-NEXT:    mrs r12, apsr
-; V6M-NEXT:    movs r3, :upper8_15:__stack_chk_guard
-; V6M-NEXT:    lsls r3, r3, #8
-; V6M-NEXT:    adds r3, :upper0_7:__stack_chk_guard
-; V6M-NEXT:    lsls r3, r3, #8
-; V6M-NEXT:    adds r3, :lower8_15:__stack_chk_guard
-; V6M-NEXT:    lsls r3, r3, #8
-; V6M-NEXT:    adds r3, :lower0_7:__stack_chk_guard
-; V6M-NEXT:    msr apsr, r12
-; V6M-NEXT:    ldr r3, [r3]
-; V6M-NEXT:    cmp r3, r2
-; V6M-NEXT:    bne .LBB1_6
-; V6M-NEXT:  @ %bb.5: @ %entry
-; V6M-NEXT:    add sp, #8
-; V6M-NEXT:    pop {r4, r5, r7, pc}
-; V6M-NEXT:  .LBB1_6: @ %entry
-; V6M-NEXT:    bl __stack_chk_fail
-;
-; V8MBASE-LABEL: cc:
-; V8MBASE:       @ %bb.0: @ %entry
-; V8MBASE-NEXT:    push {r4, r5, r7, lr}
-; V8MBASE-NEXT:    sub sp, #8
-; V8MBASE-NEXT:    movs r0, #1
-; V8MBASE-NEXT:    lsls r3, r0, #31
-; V8MBASE-NEXT:    movw r0, :lower16:aa
-; V8MBASE-NEXT:    movt r0, :upper16:aa
-; V8MBASE-NEXT:    ldr r2, [r0]
-; V8MBASE-NEXT:    asrs r4, r2, #31
-; V8MBASE-NEXT:    eors r3, r4
-; V8MBASE-NEXT:    movw r0, :lower16:bb
-; V8MBASE-NEXT:    movt r0, :upper16:bb
-; V8MBASE-NEXT:    ldm r0!, {r1, r5}
-; V8MBASE-NEXT:    subs r0, r2, r1
-; V8MBASE-NEXT:    sbcs r3, r5
-; V8MBASE-NEXT:    subs r0, r2, r1
-; V8MBASE-NEXT:    movw r1, :lower16:__stack_chk_guard
-; V8MBASE-NEXT:    movt r1, :upper16:__stack_chk_guard
-; V8MBASE-NEXT:    ldr r1, [r1]
-; V8MBASE-NEXT:    str r1, [sp, #4]
-; V8MBASE-NEXT:    mov r1, r4
-; V8MBASE-NEXT:    sbcs r1, r5
-; V8MBASE-NEXT:    ands r3, r4
-; V8MBASE-NEXT:    ands r2, r0
-; V8MBASE-NEXT:    mov r4, r2
-; V8MBASE-NEXT:    orrs r4, r3
-; V8MBASE-NEXT:    cmp r4, #0
-; V8MBASE-NEXT:    beq .LBB1_2
-; V8MBASE-NEXT:  @ %bb.1: @ %entry
-; V8MBASE-NEXT:    mov r1, r3
-; V8MBASE-NEXT:  .LBB1_2: @ %entry
-; V8MBASE-NEXT:    beq .LBB1_4
-; V8MBASE-NEXT:  @ %bb.3: @ %entry
-; V8MBASE-NEXT:    mov r0, r2
-; V8MBASE-NEXT:  .LBB1_4: @ %entry
-; V8MBASE-NEXT:    ldr r2, [sp, #4]
-; V8MBASE-NEXT:    movw r3, :lower16:__stack_chk_guard
-; V8MBASE-NEXT:    movt r3, :upper16:__stack_chk_guard
-; V8MBASE-NEXT:    ldr r3, [r3]
-; V8MBASE-NEXT:    cmp r3, r2
-; V8MBASE-NEXT:    bne .LBB1_6
-; V8MBASE-NEXT:  @ %bb.5: @ %entry
-; V8MBASE-NEXT:    add sp, #8
-; V8MBASE-NEXT:    pop {r4, r5, r7, pc}
-; V8MBASE-NEXT:  .LBB1_6: @ %entry
-; V8MBASE-NEXT:    bl __stack_chk_fail
+; V6M-NEXT:	push	{r4, r5, r7, lr}
+; V6M-NEXT:	sub	sp, #8
+; V6M-NEXT:	movs	r0, #1
+; V6M-NEXT:	lsls	r3, r0, #31
+; V6M-NEXT:	movs	r0, :upper8_15:aa
+; V6M-NEXT:	lsls	r0, r0, #8
+; V6M-NEXT:	adds	r0, :upper0_7:aa
+; V6M-NEXT:	lsls	r0, r0, #8
+; V6M-NEXT:	adds	r0, :lower8_15:aa
+; V6M-NEXT:	lsls	r0, r0, #8
+; V6M-NEXT:	adds	r0, :lower0_7:aa
+; V6M-NEXT:	ldr	r2, [r0]
+; V6M-NEXT:	asrs	r4, r2, #31
+; V6M-NEXT:	eors	r3, r4
+; V6M-NEXT:	movs	r0, :upper8_15:bb
+; V6M-NEXT:	lsls	r0, r0, #8
+; V6M-NEXT:	adds	r0, :upper0_7:bb
+; V6M-NEXT:	lsls	r0, r0, #8
+; V6M-NEXT:	adds	r0, :lower8_15:bb
+; V6M-NEXT:	lsls	r0, r0, #8
+; V6M-NEXT:	adds	r0, :lower0_7:bb
+; V6M-NEXT:	ldm	r0!, {r1, r5}
+; V6M-NEXT:	subs	r0, r2, r1
+; V6M-NEXT:	sbcs	r3, r5
+; V6M-NEXT:	subs	r0, r2, r1
+; V6M-NEXT:	mrs	r12, apsr
+; V6M-NEXT:	movs	r1, :upper8_15:__stack_chk_guard
+; V6M-NEXT:	lsls	r1, r1, #8
+; V6M-NEXT:	adds	r1, :upper0_7:__stack_chk_guard
+; V6M-NEXT:	lsls	r1, r1, #8
+; V6M-NEXT:	adds	r1, :lower8_15:__stack_chk_guard
+; V6M-NEXT:	lsls	r1, r1, #8
+; V6M-NEXT:	adds	r1, :lower0_7:__stack_chk_guard
+; V6M-NEXT:	msr	apsr, r12
+; V6M-NEXT:	ldr	r1, [r1]
+; V6M-NEXT:	str	r1, [sp, #4]
+; V6M-NEXT:	mov	r1, r4
+; V6M-NEXT:	sbcs	r1, r5
+; V6M-NEXT:	ands	r3, r4
+; V6M-NEXT:	ands	r2, r0
+; V6M-NEXT:	mov	r4, r2
+; V6M-NEXT:	orrs	r4, r3
+; V6M-NEXT:	beq	.LBB1_2
+; V6M-NEXT:   @ %bb.1:                                @ %entry
+; V6M-NEXT:	mov	r1, r3
+; V6M-NEXT:   .LBB1_2:                                @ %entry
+; V6M-NEXT:	cmp	r4, #0
+; V6M-NEXT:	beq	.LBB1_4
+; V6M-NEXT:   @ %bb.3:                                @ %entry
+; V6M-NEXT:	mov	r0, r2
+; V6M-NEXT:   .LBB1_4:                                @ %entry
+; V6M-NEXT:	ldr	r2, [sp, #4]
+; V6M-NEXT:	mrs	r12, apsr
+; V6M-NEXT:	movs	r3, :upper8_15:__stack_chk_guard
+; V6M-NEXT:	lsls	r3, r3, #8
+; V6M-NEXT:	adds	r3, :upper0_7:__stack_chk_guard
+; V6M-NEXT:	lsls	r3, r3, #8
+; V6M-NEXT:	adds	r3, :lower8_15:__stack_chk_guard
+; V6M-NEXT:	lsls	r3, r3, #8
+; V6M-NEXT:	adds	r3, :lower0_7:__stack_chk_guard
+; V6M-NEXT:	msr	apsr, r12
+; V6M-NEXT:	ldr	r3, [r3]
+; V6M-NEXT:	cmp	r3, r2
+; V6M-NEXT:	bne	.LBB1_6
+; V6M-NEXT:   @ %bb.5:                                @ %entry
+; V6M-NEXT:	add	sp, #8
+; V6M-NEXT:	pop	{r4, r5, r7, pc}
+; V6M-NEXT:   .LBB1_6:                                @ %entry
+; V6M-NEXT:	bl	__stack_chk_fail
 
 entry:
   %0 = load i32, ptr @aa, align 4

diff  --git a/llvm/test/CodeGen/Thumb/umul_fix_sat.ll b/llvm/test/CodeGen/Thumb/umul_fix_sat.ll
index a43b22102c24bf..fa88024315211b 100644
--- a/llvm/test/CodeGen/Thumb/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/Thumb/umul_fix_sat.ll
@@ -38,26 +38,26 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    sub sp, #28
 ; ARM-NEXT:    str r3, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    mov r5, r1
-; ARM-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    movs r4, #0
 ; ARM-NEXT:    mov r6, r0
-; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r7, r2
-; ARM-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; ARM-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; ARM-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    mov r0, r5
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r2, r7
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r5, r1
-; ARM-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
-; ARM-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    adcs r5, r4
 ; ARM-NEXT:    mov r0, r6
 ; ARM-NEXT:    mov r1, r4
@@ -65,33 +65,33 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    mov r2, r7
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; ARM-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r2
-; ARM-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    adcs r1, r4
 ; ARM-NEXT:    adds r0, r5, r1
-; ARM-NEXT:    str r0, [sp] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; ARM-NEXT:    mov r6, r4
 ; ARM-NEXT:    adcs r6, r4
-; ARM-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    mov r0, r5
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r2, r7
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r7, r1
-; ARM-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
-; ARM-NEXT:    str r0, [sp] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; ARM-NEXT:    adcs r7, r6
-; ARM-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
 ; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r6, r0
 ; ARM-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; ARM-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; ARM-NEXT:    mov r1, r5
 ; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r4
@@ -99,33 +99,34 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    adds r0, r0, r6
 ; ARM-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    adcs r1, r2
-; ARM-NEXT:    ldr r2, [sp] @ 4-byte Reload
-; ARM-NEXT:    adds r0, r2, r0
+; ARM-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    adds r2, r2, r0
 ; ARM-NEXT:    adcs r1, r7
-; ARM-NEXT:    lsrs r5, r0, #2
-; ARM-NEXT:    orrs r5, r1
-; ARM-NEXT:    lsls r0, r0, #30
-; ARM-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; ARM-NEXT:    lsrs r1, r3, #2
-; ARM-NEXT:    adds r2, r0, r1
-; ARM-NEXT:    lsls r0, r3, #30
-; ARM-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; ARM-NEXT:    lsrs r1, r1, #2
-; ARM-NEXT:    adds r3, r0, r1
+; ARM-NEXT:    lsrs r3, r2, #2
+; ARM-NEXT:    orrs r3, r1
 ; ARM-NEXT:    mvns r1, r4
-; ARM-NEXT:    cmp r5, #0
+; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    mov r0, r1
 ; ARM-NEXT:    beq .LBB1_3
 ; ARM-NEXT:  @ %bb.1:
+; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    beq .LBB1_4
 ; ARM-NEXT:  .LBB1_2:
 ; ARM-NEXT:    add sp, #28
 ; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
 ; ARM-NEXT:  .LBB1_3:
-; ARM-NEXT:    mov r0, r3
+; ARM-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; ARM-NEXT:    lsls r0, r0, #30
+; ARM-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; ARM-NEXT:    lsrs r4, r4, #2
+; ARM-NEXT:    adds r0, r0, r4
+; ARM-NEXT:    cmp r3, #0
 ; ARM-NEXT:    bne .LBB1_2
 ; ARM-NEXT:  .LBB1_4:
-; ARM-NEXT:    mov r1, r2
+; ARM-NEXT:    lsls r1, r2, #30
+; ARM-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; ARM-NEXT:    lsrs r2, r2, #2
+; ARM-NEXT:    adds r1, r1, r2
 ; ARM-NEXT:    add sp, #28
 ; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
   %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 2)
@@ -224,16 +225,17 @@ define i64 @func5(i64 %x, i64 %y) {
 ; ARM-NEXT:    mov r3, r5
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    adds r3, r1, r6
-; ARM-NEXT:    mov r2, r5
-; ARM-NEXT:    adcs r2, r5
-; ARM-NEXT:    orrs r2, r4
+; ARM-NEXT:    mov r6, r5
+; ARM-NEXT:    adcs r6, r5
+; ARM-NEXT:    orrs r6, r4
 ; ARM-NEXT:    mvns r1, r5
-; ARM-NEXT:    cmp r2, #0
+; ARM-NEXT:    cmp r6, #0
 ; ARM-NEXT:    mov r2, r1
 ; ARM-NEXT:    bne .LBB4_2
 ; ARM-NEXT:  @ %bb.1:
 ; ARM-NEXT:    mov r2, r0
 ; ARM-NEXT:  .LBB4_2:
+; ARM-NEXT:    cmp r6, #0
 ; ARM-NEXT:    bne .LBB4_4
 ; ARM-NEXT:  @ %bb.3:
 ; ARM-NEXT:    mov r1, r3
@@ -397,27 +399,25 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    mov r2, r1
 ; ARM-NEXT:    adds r0, r0, r5
-; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; ARM-NEXT:    adcs r2, r1
-; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; ARM-NEXT:    adds r3, r1, r0
-; ARM-NEXT:    adcs r2, r6
-; ARM-NEXT:    mvns r1, r4
-; ARM-NEXT:    cmp r2, #0
-; ARM-NEXT:    mov r0, r1
-; ARM-NEXT:    beq .LBB7_3
+; ARM-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; ARM-NEXT:    adcs r1, r2
+; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    adds r3, r2, r0
+; ARM-NEXT:    adcs r1, r6
+; ARM-NEXT:    mvns r2, r4
+; ARM-NEXT:    cmp r1, #0
+; ARM-NEXT:    mov r0, r2
+; ARM-NEXT:    bne .LBB7_2
 ; ARM-NEXT:  @ %bb.1:
-; ARM-NEXT:    beq .LBB7_4
-; ARM-NEXT:  .LBB7_2:
-; ARM-NEXT:    add sp, #28
-; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
-; ARM-NEXT:  .LBB7_3:
 ; ARM-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; ARM-NEXT:    bne .LBB7_2
+; ARM-NEXT:  .LBB7_2:
+; ARM-NEXT:    cmp r1, #0
+; ARM-NEXT:    bne .LBB7_4
+; ARM-NEXT:  @ %bb.3:
+; ARM-NEXT:    mov r2, r3
 ; ARM-NEXT:  .LBB7_4:
-; ARM-NEXT:    mov r1, r3
+; ARM-NEXT:    mov r1, r2
 ; ARM-NEXT:    add sp, #28
 ; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
   %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 32)
@@ -433,23 +433,23 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    sub sp, #28
 ; ARM-NEXT:    str r3, [sp, #24] @ 4-byte Spill
 ; ARM-NEXT:    mov r7, r2
-; ARM-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; ARM-NEXT:    str r2, [sp, #20] @ 4-byte Spill
 ; ARM-NEXT:    mov r5, r1
-; ARM-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; ARM-NEXT:    movs r4, #0
 ; ARM-NEXT:    mov r6, r0
-; ARM-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
-; ARM-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; ARM-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    mov r0, r5
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r2, r7
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r7, r1
-; ARM-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    adds r5, r0, r1
 ; ARM-NEXT:    adcs r7, r4
 ; ARM-NEXT:    mov r0, r6
@@ -459,31 +459,31 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    adds r0, r0, r5
-; ARM-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; ARM-NEXT:    adcs r1, r4
 ; ARM-NEXT:    adds r0, r7, r1
-; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    mov r5, r4
 ; ARM-NEXT:    adcs r5, r4
-; ARM-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
 ; ARM-NEXT:    mov r0, r7
 ; ARM-NEXT:    mov r1, r4
 ; ARM-NEXT:    mov r2, r6
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r6, r1
-; ARM-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; ARM-NEXT:    adds r0, r0, r1
-; ARM-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; ARM-NEXT:    adcs r6, r5
-; ARM-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; ARM-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; ARM-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r4
 ; ARM-NEXT:    bl __aeabi_lmul
 ; ARM-NEXT:    mov r5, r0
 ; ARM-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; ARM-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; ARM-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
 ; ARM-NEXT:    mov r1, r7
 ; ARM-NEXT:    mov r2, r4
 ; ARM-NEXT:    mov r3, r4
@@ -491,28 +491,25 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; ARM-NEXT:    adds r0, r0, r5
 ; ARM-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
 ; ARM-NEXT:    adcs r1, r2
-; ARM-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; ARM-NEXT:    adds r0, r2, r0
+; ARM-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT:    adds r5, r2, r0
 ; ARM-NEXT:    adcs r1, r6
-; ARM-NEXT:    lsls r1, r1, #1
-; ARM-NEXT:    lsrs r5, r0, #31
-; ARM-NEXT:    adds r2, r1, r5
-; ARM-NEXT:    lsls r0, r0, #1
-; ARM-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; ARM-NEXT:    lsrs r1, r1, #31
-; ARM-NEXT:    adds r3, r0, r1
-; ARM-NEXT:    mvns r1, r4
-; ARM-NEXT:    cmp r5, #0
-; ARM-NEXT:    mov r0, r1
-; ARM-NEXT:    beq .LBB8_3
+; ARM-NEXT:    lsrs r3, r5, #31
+; ARM-NEXT:    mvns r2, r4
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    mov r0, r2
+; ARM-NEXT:    bne .LBB8_2
 ; ARM-NEXT:  @ %bb.1:
-; ARM-NEXT:    beq .LBB8_4
+; ARM-NEXT:    lsls r0, r5, #1
+; ARM-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; ARM-NEXT:    lsrs r4, r4, #31
+; ARM-NEXT:    adds r0, r0, r4
 ; ARM-NEXT:  .LBB8_2:
-; ARM-NEXT:    add sp, #28
-; ARM-NEXT:    pop {r4, r5, r6, r7, pc}
-; ARM-NEXT:  .LBB8_3:
-; ARM-NEXT:    mov r0, r3
-; ARM-NEXT:    bne .LBB8_2
+; ARM-NEXT:    cmp r3, #0
+; ARM-NEXT:    bne .LBB8_4
+; ARM-NEXT:  @ %bb.3:
+; ARM-NEXT:    lsls r1, r1, #1
+; ARM-NEXT:    adds r2, r1, r3
 ; ARM-NEXT:  .LBB8_4:
 ; ARM-NEXT:    mov r1, r2
 ; ARM-NEXT:    add sp, #28

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
index a87d363fa61ee7..13080fcfa13574 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
@@ -15,13 +15,13 @@ define void @arm_cmplx_dot_prod_f32(ptr %pSrcA, ptr %pSrcB, i32 %numSamples, ptr
 ; CHECK-NEXT:    lsrs r4, r2, #2
 ; CHECK-NEXT:    mov.w lr, #2
 ; CHECK-NEXT:    cmp r4, #2
-; CHECK-NEXT:    vldrw.u32 q2, [r1], #32
-; CHECK-NEXT:    vldrw.u32 q1, [r0], #32
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    lsrlt.w lr, r2, #2
 ; CHECK-NEXT:    rsb r4, lr, r2, lsr #2
-; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    vldrw.u32 q2, [r1], #32
 ; CHECK-NEXT:    add.w lr, r4, #1
+; CHECK-NEXT:    vldrw.u32 q1, [r0], #32
+; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:  .LBB0_2: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #0

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
index 3c1510623e5c43..b9a80af649f293 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
@@ -7,26 +7,26 @@ define void @foo(ptr nocapture readonly %st, ptr %x) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    ldrd r12, r3, [r0]
-; CHECK-NEXT:    ldrd r4, r2, [r0, #8]
-; CHECK-NEXT:    rsb r12, r12, r3, lsl #1
+; CHECK-NEXT:    ldrd r12, r2, [r0]
+; CHECK-NEXT:    ldrd r4, r3, [r0, #8]
+; CHECK-NEXT:    rsb r12, r12, r2, lsl #1
 ; CHECK-NEXT:    dlstp.16 lr, r12
 ; CHECK-NEXT:  .LBB0_1: @ %do.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldrh.u16 q0, [r2], #16
+; CHECK-NEXT:    vldrh.u16 q0, [r3], #16
 ; CHECK-NEXT:    vstrh.16 q0, [r4], #16
 ; CHECK-NEXT:    letp lr, .LBB0_1
 ; CHECK-NEXT:  @ %bb.2: @ %do.end
-; CHECK-NEXT:    ldr r3, [r0]
+; CHECK-NEXT:    ldr r2, [r0]
 ; CHECK-NEXT:    ldr r0, [r0, #8]
 ; CHECK-NEXT:    add.w r0, r0, r12, lsl #1
-; CHECK-NEXT:    mov.w r2, #6144
-; CHECK-NEXT:    dlstp.16 lr, r3
+; CHECK-NEXT:    mov.w r3, #6144
+; CHECK-NEXT:    dlstp.16 lr, r2
 ; CHECK-NEXT:  .LBB0_3: @ %do.body6
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrh.u16 q0, [r1], #16
 ; CHECK-NEXT:    vcvt.f16.s16 q0, q0
-; CHECK-NEXT:    vmul.f16 q0, q0, r2
+; CHECK-NEXT:    vmul.f16 q0, q0, r3
 ; CHECK-NEXT:    vstrh.16 q0, [r0], #16
 ; CHECK-NEXT:    letp lr, .LBB0_3
 ; CHECK-NEXT:  @ %bb.4: @ %do.end13

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
index 6cb98557c9bc13..2fdf534d526565 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -9,12 +9,12 @@ define arm_aapcs_vfpcc void @fast_float_mul(ptr nocapture %a, ptr nocapture read
 ; CHECK-NEXT:    beq.w .LBB0_11
 ; CHECK-NEXT:  @ %bb.1: @ %vector.memcheck
 ; CHECK-NEXT:    add.w r4, r2, r3, lsl #2
+; CHECK-NEXT:    add.w lr, r0, r3, lsl #2
 ; CHECK-NEXT:    cmp r4, r0
-; CHECK-NEXT:    add.w r4, r0, r3, lsl #2
-; CHECK-NEXT:    cset r12, hi
-; CHECK-NEXT:    cmp r4, r2
-; CHECK-NEXT:    csel r12, zr, r12, ls
-; CHECK-NEXT:    cmp r4, r1
+; CHECK-NEXT:    cset r4, hi
+; CHECK-NEXT:    cmp lr, r2
+; CHECK-NEXT:    csel r12, zr, r4, ls
+; CHECK-NEXT:    cmp lr, r1
 ; CHECK-NEXT:    add.w r4, r1, r3, lsl #2
 ; CHECK-NEXT:    cset lr, hi
 ; CHECK-NEXT:    cmp r4, r0

diff  --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll
index 191c775be3420e..d2b1dd6f05a3f9 100644
--- a/llvm/test/CodeGen/Thumb2/float-ops.ll
+++ b/llvm/test/CodeGen/Thumb2/float-ops.ll
@@ -289,15 +289,15 @@ define float @select_f(float %a, float %b, i1 %c) {
 define double @select_d(double %a, double %b, i1 %c) {
 ; CHECK-LABEL: select_d:
 ; NOREGS: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
-; NOREGS: lsls.w    [[REG]], [[REG]], #31
-; ONLYREGS: lsls    r0, r0, #31
+; NOREGS: ands    [[REG]], [[REG]], #1
+; ONLYREGS: ands    r0, r0, #1
 ; NOREGS-DAG: moveq   r0, r2
 ; NOREGS-DAG: moveq   r1, r3
-; ONLYREGS-DAG: csel   r0, r2, r1
-; ONLYREGS-DAG: csel   r1, r12, r3
+; ONLYREGS-DAG: csel   r0, r0, r2
+; ONLYREGS-DAG: csel   r1, r1, r3
+; SP: ands r0, r0, #1
 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
-; SP: lsls r0, r0, #31
 ; SP: itt ne
 ; SP-DAG: movne [[BLO]], [[ALO]]
 ; SP-DAG: movne [[BHI]], [[AHI]]

diff  --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index d076cb00ad7e0e..7087041e8dace6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -489,10 +489,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    @ Parent Loop BB1_10 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    vqadd.u32 q2, q5, r1
-; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:    subs r2, #4
 ; CHECK-NEXT:    vcmp.u32 hi, q7, q2
 ; CHECK-NEXT:    vshl.i32 q2, q1, #2
-; CHECK-NEXT:    subs r2, #4
+; CHECK-NEXT:    add.w r1, r1, #4
 ; CHECK-NEXT:    vadd.i32 q2, q2, r8
 ; CHECK-NEXT:    vadd.i32 q1, q1, r9
 ; CHECK-NEXT:    vpst
@@ -508,10 +508,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    @ Parent Loop BB1_10 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    vqadd.u32 q2, q5, r1
-; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:    subs r2, #4
 ; CHECK-NEXT:    vcmp.u32 hi, q6, q2
 ; CHECK-NEXT:    vshl.i32 q2, q1, #2
-; CHECK-NEXT:    subs r2, #4
+; CHECK-NEXT:    add.w r1, r1, #4
 ; CHECK-NEXT:    vadd.i32 q2, q2, r8
 ; CHECK-NEXT:    vadd.i32 q1, q1, r9
 ; CHECK-NEXT:    vpst

diff  --git a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
index 9ee6ec345d964b..67723e8aa41ad7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
@@ -411,12 +411,12 @@ define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    mvn r3, #-2147483648
 ; CHECK-NEXT:    mvn r2, #-2147483648
-; CHECK-NEXT:    vminv.s32 r3, q1
-; CHECK-NEXT:    vminv.s32 r2, q0
-; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    csel r1, r3, r1, lt
-; CHECK-NEXT:    cmp r2, r0
-; CHECK-NEXT:    csel r0, r2, r0, lt
+; CHECK-NEXT:    vminv.s32 r3, q0
+; CHECK-NEXT:    vminv.s32 r2, q1
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    csel r0, r3, r0, lt
+; CHECK-NEXT:    cmp r2, r1
+; CHECK-NEXT:    csel r1, r2, r1, lt
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, lt
 ; CHECK-NEXT:    bx lr
@@ -433,12 +433,12 @@ define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    mov.w r3, #-2147483648
 ; CHECK-NEXT:    mov.w r2, #-2147483648
-; CHECK-NEXT:    vmaxv.s32 r3, q1
-; CHECK-NEXT:    vmaxv.s32 r2, q0
-; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    csel r1, r3, r1, gt
-; CHECK-NEXT:    cmp r2, r0
-; CHECK-NEXT:    csel r0, r2, r0, gt
+; CHECK-NEXT:    vmaxv.s32 r3, q0
+; CHECK-NEXT:    vmaxv.s32 r2, q1
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    csel r0, r3, r0, gt
+; CHECK-NEXT:    cmp r2, r1
+; CHECK-NEXT:    csel r1, r2, r1, gt
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, gt
 ; CHECK-NEXT:    bx lr
@@ -455,12 +455,12 @@ define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    mov.w r3, #-1
 ; CHECK-NEXT:    mov.w r2, #-1
-; CHECK-NEXT:    vminv.u32 r3, q1
-; CHECK-NEXT:    vminv.u32 r2, q0
-; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    csel r1, r3, r1, lo
-; CHECK-NEXT:    cmp r2, r0
-; CHECK-NEXT:    csel r0, r2, r0, lo
+; CHECK-NEXT:    vminv.u32 r3, q0
+; CHECK-NEXT:    vminv.u32 r2, q1
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    csel r0, r3, r0, lo
+; CHECK-NEXT:    cmp r2, r1
+; CHECK-NEXT:    csel r1, r2, r1, lo
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, lo
 ; CHECK-NEXT:    bx lr
@@ -477,12 +477,12 @@ define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    movs r3, #0
 ; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    vmaxv.u32 r3, q1
-; CHECK-NEXT:    vmaxv.u32 r2, q0
-; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    csel r1, r3, r1, hi
-; CHECK-NEXT:    cmp r2, r0
-; CHECK-NEXT:    csel r0, r2, r0, hi
+; CHECK-NEXT:    vmaxv.u32 r3, q0
+; CHECK-NEXT:    vmaxv.u32 r2, q1
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    csel r0, r3, r0, hi
+; CHECK-NEXT:    cmp r2, r1
+; CHECK-NEXT:    csel r1, r2, r1, hi
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, hi
 ; CHECK-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index c8dd949ca9d882..1c95d28b5eed1b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1365,8 +1365,8 @@ define void @arm_biquad_cascade_df2T_f16(ptr nocapture readonly %S, ptr nocaptur
 ; CHECK-NEXT:    @ in Loop: Header=BB17_3 Depth=1
 ; CHECK-NEXT:    vstr.16 s5, [r6, #2]
 ; CHECK-NEXT:    add.w r12, r12, #10
-; CHECK-NEXT:    adds r6, #4
 ; CHECK-NEXT:    subs.w r9, r9, #1
+; CHECK-NEXT:    add.w r6, r6, #4
 ; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    beq .LBB17_8
 ; CHECK-NEXT:  .LBB17_3: @ %do.body

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 495ffe809f70fe..808626d9a0aebe 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1375,8 +1375,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(ptr nocapture re
 ; CHECK-NEXT:    le lr, .LBB17_3
 ; CHECK-NEXT:  @ %bb.4: @ %bb75
 ; CHECK-NEXT:    @ in Loop: Header=BB17_2 Depth=1
-; CHECK-NEXT:    adds r3, #20
 ; CHECK-NEXT:    subs.w r12, r12, #1
+; CHECK-NEXT:    add.w r3, r3, #20
 ; CHECK-NEXT:    vstrb.8 q3, [r0], #16
 ; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    bne .LBB17_2
@@ -1514,8 +1514,8 @@ define arm_aapcs_vfpcc void @fms(ptr nocapture readonly %pSrc1, ptr nocapture re
 ; CHECK-NEXT:    le lr, .LBB18_3
 ; CHECK-NEXT:  @ %bb.4: @ %while.end
 ; CHECK-NEXT:    @ in Loop: Header=BB18_2 Depth=1
-; CHECK-NEXT:    adds r2, #4
 ; CHECK-NEXT:    subs.w r12, r12, #1
+; CHECK-NEXT:    add.w r2, r2, #4
 ; CHECK-NEXT:    bne .LBB18_2
 ; CHECK-NEXT:  .LBB18_5: @ %do.end
 ; CHECK-NEXT:    pop {r4, r5, r7, pc}
@@ -1918,8 +1918,8 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
 ; CHECK-NEXT:    @ in Loop: Header=BB20_3 Depth=1
 ; CHECK-NEXT:    vstr s6, [r6, #4]
 ; CHECK-NEXT:    add.w r12, r12, #20
-; CHECK-NEXT:    adds r6, #8
 ; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    add.w r6, r6, #8
 ; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    beq .LBB20_8
 ; CHECK-NEXT:  .LBB20_3: @ %do.body

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fmas.ll b/llvm/test/CodeGen/Thumb2/mve-fmas.ll
index 94921c78ad912e..377440e1bbc939 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmas.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmas.ll
@@ -893,19 +893,19 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float
 ;
 ; CHECK-MVE-LABEL: vfma32_v1_pred:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
-; CHECK-MVE-NEXT:    vmov.f32 s12, s2
-; CHECK-MVE-NEXT:    vmov.f32 s14, s3
-; CHECK-MVE-NEXT:    vmla.f32 s12, s6, s10
+; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vmov.f32 s14, s2
+; CHECK-MVE-NEXT:    vmov.f32 s12, s3
+; CHECK-MVE-NEXT:    vmla.f32 s14, s6, s10
 ; CHECK-MVE-NEXT:    vmov.f32 s10, s1
-; CHECK-MVE-NEXT:    vmla.f32 s14, s7, s11
+; CHECK-MVE-NEXT:    vmla.f32 s12, s7, s11
 ; CHECK-MVE-NEXT:    vmla.f32 s10, s5, s9
 ; CHECK-MVE-NEXT:    vmov.f32 s9, s0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-MVE-NEXT:    vmla.f32 s9, s4, s8
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -913,13 +913,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
-; CHECK-MVE-NEXT:    cmp r3, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s12
 ; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s12
+; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s14
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s10
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s14
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s0, s9
 ; CHECK-MVE-NEXT:    bx lr
 entry:
@@ -946,19 +946,19 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float
 ;
 ; CHECK-MVE-LABEL: vfma32_v2_pred:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
-; CHECK-MVE-NEXT:    vmov.f32 s12, s2
-; CHECK-MVE-NEXT:    vmov.f32 s14, s3
-; CHECK-MVE-NEXT:    vmla.f32 s12, s6, s10
+; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vmov.f32 s14, s2
+; CHECK-MVE-NEXT:    vmov.f32 s12, s3
+; CHECK-MVE-NEXT:    vmla.f32 s14, s6, s10
 ; CHECK-MVE-NEXT:    vmov.f32 s10, s1
-; CHECK-MVE-NEXT:    vmla.f32 s14, s7, s11
+; CHECK-MVE-NEXT:    vmla.f32 s12, s7, s11
 ; CHECK-MVE-NEXT:    vmla.f32 s10, s5, s9
 ; CHECK-MVE-NEXT:    vmov.f32 s9, s0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-MVE-NEXT:    vmla.f32 s9, s4, s8
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -966,13 +966,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
-; CHECK-MVE-NEXT:    cmp r3, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s12
 ; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s12
+; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s14
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s10
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s14
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s0, s9
 ; CHECK-MVE-NEXT:    bx lr
 entry:
@@ -999,19 +999,19 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %
 ;
 ; CHECK-MVE-LABEL: vfms32_pred:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
-; CHECK-MVE-NEXT:    vmov.f32 s12, s2
-; CHECK-MVE-NEXT:    vmov.f32 s14, s3
-; CHECK-MVE-NEXT:    vmls.f32 s12, s6, s10
+; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vmov.f32 s14, s2
+; CHECK-MVE-NEXT:    vmov.f32 s12, s3
+; CHECK-MVE-NEXT:    vmls.f32 s14, s6, s10
 ; CHECK-MVE-NEXT:    vmov.f32 s10, s1
-; CHECK-MVE-NEXT:    vmls.f32 s14, s7, s11
+; CHECK-MVE-NEXT:    vmls.f32 s12, s7, s11
 ; CHECK-MVE-NEXT:    vmls.f32 s10, s5, s9
 ; CHECK-MVE-NEXT:    vmov.f32 s9, s0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-MVE-NEXT:    vmls.f32 s9, s4, s8
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1019,13 +1019,13 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
-; CHECK-MVE-NEXT:    cmp r3, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s12
 ; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s12
+; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s14
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s10
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s14
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s0, s9
 ; CHECK-MVE-NEXT:    bx lr
 entry:
@@ -1055,33 +1055,33 @@ define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float>
 ;
 ; CHECK-MVE-LABEL: vfmar32_pred:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
-; CHECK-MVE-NEXT:    vmov.f32 s10, s2
-; CHECK-MVE-NEXT:    vmov.f32 s12, s1
-; CHECK-MVE-NEXT:    vmov.f32 s14, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vmov.f32 s10, s3
+; CHECK-MVE-NEXT:    vmov.f32 s12, s2
+; CHECK-MVE-NEXT:    vmov.f32 s14, s1
 ; CHECK-MVE-NEXT:    vmov.f32 s9, s0
-; CHECK-MVE-NEXT:    vmla.f32 s10, s6, s8
-; CHECK-MVE-NEXT:    vmla.f32 s12, s5, s8
-; CHECK-MVE-NEXT:    vmla.f32 s14, s7, s8
+; CHECK-MVE-NEXT:    vmla.f32 s10, s7, s8
+; CHECK-MVE-NEXT:    vmla.f32 s12, s6, s8
+; CHECK-MVE-NEXT:    vmla.f32 s14, s5, s8
 ; CHECK-MVE-NEXT:    vmla.f32 s9, s4, s8
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s6, #0
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
-; CHECK-MVE-NEXT:    cmp r3, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s10
 ; CHECK-MVE-NEXT:    cmp r2, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s12
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s14
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s10
+; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s12
 ; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s14
+; CHECK-MVE-NEXT:    cmp r1, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s0, s9
 ; CHECK-MVE-NEXT:    bx lr
 entry:
@@ -1112,32 +1112,32 @@ define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float>
 ;
 ; CHECK-MVE-LABEL: vfmas32_pred:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, #0
 ; CHECK-MVE-NEXT:    vmov.f32 s10, s8
 ; CHECK-MVE-NEXT:    vmov.f32 s12, s8
 ; CHECK-MVE-NEXT:    vmov.f32 s14, s8
 ; CHECK-MVE-NEXT:    vmla.f32 s8, s0, s4
-; CHECK-MVE-NEXT:    vmla.f32 s10, s2, s6
-; CHECK-MVE-NEXT:    vmla.f32 s12, s1, s5
-; CHECK-MVE-NEXT:    vmla.f32 s14, s3, s7
+; CHECK-MVE-NEXT:    vmla.f32 s10, s3, s7
+; CHECK-MVE-NEXT:    vmla.f32 s12, s2, s6
+; CHECK-MVE-NEXT:    vmla.f32 s14, s1, s5
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s5, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s6, #0
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
-; CHECK-MVE-NEXT:    cmp r3, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s10
 ; CHECK-MVE-NEXT:    cmp r2, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s12
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s14
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s3, s10
+; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s2, s12
 ; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s1, s1, s14
+; CHECK-MVE-NEXT:    cmp r1, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s0, s8
 ; CHECK-MVE-NEXT:    bx lr
 entry:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 742f2a75a1aa80..f2ac5268921800 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -668,63 +668,62 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    vmov r12, lr, d9
-; CHECK-NEXT:    subs.w r5, r0, #-1
-; CHECK-NEXT:    mvn r4, #-2147483648
-; CHECK-NEXT:    sbcs.w r5, r1, r4
-; CHECK-NEXT:    sbcs r5, r2, #0
-; CHECK-NEXT:    mov.w r7, #-2147483648
-; CHECK-NEXT:    sbcs r5, r3, #0
+; CHECK-NEXT:    vmov r12, lr, d8
+; CHECK-NEXT:    subs.w r4, r0, #-1
+; CHECK-NEXT:    mvn r9, #-2147483648
+; CHECK-NEXT:    sbcs.w r4, r1, r9
+; CHECK-NEXT:    sbcs r4, r2, #0
+; CHECK-NEXT:    mov.w r7, #-1
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov.w r10, #-2147483648
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r3, r3, r4, ne
+; CHECK-NEXT:    csel r2, r2, r4, ne
+; CHECK-NEXT:    csel r4, r0, r7, ne
+; CHECK-NEXT:    csel r1, r1, r9, ne
+; CHECK-NEXT:    rsbs r0, r4, #0
+; CHECK-NEXT:    sbcs.w r0, r10, r1
+; CHECK-NEXT:    sbcs.w r0, r7, r2
+; CHECK-NEXT:    sbcs.w r0, r7, r3
 ; CHECK-NEXT:    cset r5, lt
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r3, r3, r5, ne
-; CHECK-NEXT:    csel r2, r2, r5, ne
-; CHECK-NEXT:    mov.w r5, #-1
-; CHECK-NEXT:    csel r1, r1, r4, ne
-; CHECK-NEXT:    csel r0, r0, r5, ne
-; CHECK-NEXT:    rsbs r6, r0, #0
-; CHECK-NEXT:    sbcs.w r6, r7, r1
-; CHECK-NEXT:    sbcs.w r2, r5, r2
-; CHECK-NEXT:    sbcs.w r2, r5, r3
-; CHECK-NEXT:    csel r8, r1, r7, lt
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csel r9, r0, r1, ne
+; CHECK-NEXT:    csel r8, r1, r10, ne
 ; CHECK-NEXT:    mov r0, r12
 ; CHECK-NEXT:    mov r1, lr
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    subs.w r6, r0, #-1
-; CHECK-NEXT:    sbcs.w r6, r1, r4
+; CHECK-NEXT:    sbcs.w r6, r1, r9
 ; CHECK-NEXT:    sbcs r6, r2, #0
 ; CHECK-NEXT:    sbcs r6, r3, #0
 ; CHECK-NEXT:    cset r6, lt
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r0, r0, r5, ne
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    csel r1, r1, r9, ne
 ; CHECK-NEXT:    csel r3, r3, r6, ne
 ; CHECK-NEXT:    csel r2, r2, r6, ne
-; CHECK-NEXT:    csel r1, r1, r4, ne
 ; CHECK-NEXT:    rsbs r6, r0, #0
-; CHECK-NEXT:    sbcs.w r6, r7, r1
-; CHECK-NEXT:    sbcs.w r2, r5, r2
-; CHECK-NEXT:    sbcs.w r2, r5, r3
+; CHECK-NEXT:    sbcs.w r6, r10, r1
+; CHECK-NEXT:    sbcs.w r2, r7, r2
+; CHECK-NEXT:    sbcs.w r2, r7, r3
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    csel r1, r1, r7, lt
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r1, r1, r10, ne
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csel r3, r4, r5, ne
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r8, r1
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r3
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r8
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -738,33 +737,38 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    vmov r12, lr, d8
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vmov r4, r1, d8
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r4, r1, r2, ne
-; CHECK-NEXT:    csel r5, r0, r2, ne
-; CHECK-NEXT:    mov r0, r12
-; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    cset r6, lo
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r7, r0, r6, ne
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    cset r2, lo
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r5, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %conv = fptoui <2 x double> %x to <2 x i128>
   %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -776,8 +780,10 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
@@ -787,44 +793,47 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    subs r4, r2, #1
 ; CHECK-NEXT:    sbcs r4, r3, #0
 ; CHECK-NEXT:    mov.w r8, #1
-; CHECK-NEXT:    cset r4, lt
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    csel r0, r0, r4, ne
-; CHECK-NEXT:    csel r3, r3, r4, ne
-; CHECK-NEXT:    csel r1, r1, r4, ne
+; CHECK-NEXT:    cset r5, lt
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csel r0, r0, r5, ne
+; CHECK-NEXT:    csel r3, r3, r5, ne
 ; CHECK-NEXT:    csel r2, r2, r8, ne
-; CHECK-NEXT:    rsbs r5, r0, #0
-; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    sbcs.w r5, r4, r1
-; CHECK-NEXT:    sbcs.w r2, r4, r2
-; CHECK-NEXT:    sbcs.w r2, r4, r3
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r5, r1, r2, ne
-; CHECK-NEXT:    csel r7, r0, r2, ne
+; CHECK-NEXT:    csel r4, r1, r5, ne
+; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    sbcs.w r1, r7, r4
+; CHECK-NEXT:    sbcs.w r1, r7, r2
+; CHECK-NEXT:    sbcs.w r1, r7, r3
+; CHECK-NEXT:    cset r6, lt
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r9, r0, r6, ne
 ; CHECK-NEXT:    mov r0, r12
 ; CHECK-NEXT:    mov r1, lr
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    subs r6, r2, #1
-; CHECK-NEXT:    sbcs r6, r3, #0
-; CHECK-NEXT:    cset r6, lt
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r0, r0, r6, ne
-; CHECK-NEXT:    csel r3, r3, r6, ne
-; CHECK-NEXT:    csel r1, r1, r6, ne
+; CHECK-NEXT:    subs r5, r2, #1
+; CHECK-NEXT:    sbcs r5, r3, #0
+; CHECK-NEXT:    cset r5, lt
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csel r0, r0, r5, ne
 ; CHECK-NEXT:    csel r2, r2, r8, ne
-; CHECK-NEXT:    rsbs r6, r0, #0
-; CHECK-NEXT:    sbcs.w r6, r4, r1
-; CHECK-NEXT:    sbcs.w r2, r4, r2
-; CHECK-NEXT:    sbcs.w r2, r4, r3
+; CHECK-NEXT:    csel r3, r3, r5, ne
+; CHECK-NEXT:    csel r1, r1, r5, ne
+; CHECK-NEXT:    rsbs r5, r0, #0
+; CHECK-NEXT:    sbcs.w r5, r7, r1
+; CHECK-NEXT:    sbcs.w r2, r7, r2
+; CHECK-NEXT:    sbcs.w r2, r7, r3
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r4, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r7
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -838,54 +847,59 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: stest_f32i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    vmov r0, r9, d0
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vmov r9, r0, d0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    subs.w r7, r0, #-1
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    sbcs.w r7, r1, r5
-; CHECK-NEXT:    mov.w r6, #-1
+; CHECK-NEXT:    mvn r10, #-2147483648
+; CHECK-NEXT:    sbcs.w r7, r1, r10
+; CHECK-NEXT:    mov.w r4, #-1
 ; CHECK-NEXT:    sbcs r7, r2, #0
+; CHECK-NEXT:    mov.w r11, #-2147483648
 ; CHECK-NEXT:    sbcs r7, r3, #0
 ; CHECK-NEXT:    cset r7, lt
 ; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    csel r0, r0, r6, ne
+; CHECK-NEXT:    csel r5, r0, r4, ne
 ; CHECK-NEXT:    csel r3, r3, r7, ne
 ; CHECK-NEXT:    csel r2, r2, r7, ne
-; CHECK-NEXT:    csel r1, r1, r5, ne
-; CHECK-NEXT:    rsbs r4, r0, #0
-; CHECK-NEXT:    mov.w r7, #-2147483648
-; CHECK-NEXT:    sbcs.w r4, r7, r1
-; CHECK-NEXT:    sbcs.w r2, r6, r2
-; CHECK-NEXT:    sbcs.w r2, r6, r3
-; CHECK-NEXT:    csel r8, r1, r7, lt
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csel r10, r0, r1, ne
+; CHECK-NEXT:    csel r1, r1, r10, ne
+; CHECK-NEXT:    rsbs r0, r5, #0
+; CHECK-NEXT:    sbcs.w r0, r11, r1
+; CHECK-NEXT:    sbcs.w r0, r4, r2
+; CHECK-NEXT:    sbcs.w r0, r4, r3
+; CHECK-NEXT:    cset r6, lt
 ; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r8, r1, r11, ne
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs.w r4, r0, #-1
-; CHECK-NEXT:    sbcs.w r4, r1, r5
-; CHECK-NEXT:    sbcs r4, r2, #0
-; CHECK-NEXT:    sbcs r4, r3, #0
-; CHECK-NEXT:    cset r4, lt
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    csel r0, r0, r6, ne
-; CHECK-NEXT:    csel r3, r3, r4, ne
-; CHECK-NEXT:    csel r2, r2, r4, ne
-; CHECK-NEXT:    csel r1, r1, r5, ne
-; CHECK-NEXT:    rsbs r5, r0, #0
-; CHECK-NEXT:    sbcs.w r5, r7, r1
-; CHECK-NEXT:    sbcs.w r2, r6, r2
-; CHECK-NEXT:    sbcs.w r2, r6, r3
+; CHECK-NEXT:    subs.w r7, r0, #-1
+; CHECK-NEXT:    sbcs.w r7, r1, r10
+; CHECK-NEXT:    sbcs r7, r2, #0
+; CHECK-NEXT:    sbcs r7, r3, #0
+; CHECK-NEXT:    cset r7, lt
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r4, ne
+; CHECK-NEXT:    csel r1, r1, r10, ne
+; CHECK-NEXT:    csel r3, r3, r7, ne
+; CHECK-NEXT:    csel r2, r2, r7, ne
+; CHECK-NEXT:    rsbs r7, r0, #0
+; CHECK-NEXT:    sbcs.w r7, r11, r1
+; CHECK-NEXT:    sbcs.w r2, r4, r2
+; CHECK-NEXT:    sbcs.w r2, r4, r3
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    csel r1, r1, r7, lt
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r1, r1, r11, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r5, r6, ne
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r10, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r8, r1
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r3
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r8
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -899,27 +913,33 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: utest_f32i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    vmov r4, r0, d0
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    subs r2, #1
-; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r6, r0, r2, ne
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    cset r6, lo
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r7, r0, r6, ne
 ; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    csel r5, r1, r2, ne
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    cset r2, lo
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r5, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %conv = fptoui <2 x float> %x to <2 x i128>
   %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -931,49 +951,54 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: ustest_f32i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    vmov r5, r0, d0
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vmov r6, r0, d0
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs r4, r2, #1
+; CHECK-NEXT:    subs r5, r2, #1
 ; CHECK-NEXT:    mov.w r8, #1
-; CHECK-NEXT:    sbcs r4, r3, #0
-; CHECK-NEXT:    mov.w r6, #0
+; CHECK-NEXT:    sbcs r5, r3, #0
 ; CHECK-NEXT:    cset r4, lt
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    csel r0, r0, r4, ne
 ; CHECK-NEXT:    csel r3, r3, r4, ne
-; CHECK-NEXT:    csel r1, r1, r4, ne
+; CHECK-NEXT:    csel r5, r1, r4, ne
 ; CHECK-NEXT:    csel r2, r2, r8, ne
-; CHECK-NEXT:    rsbs r4, r0, #0
-; CHECK-NEXT:    sbcs.w r4, r6, r1
-; CHECK-NEXT:    sbcs.w r2, r6, r2
-; CHECK-NEXT:    sbcs.w r2, r6, r3
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r7, r0, r2, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    csel r4, r1, r2, ne
+; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    mov.w r4, #0
+; CHECK-NEXT:    sbcs.w r1, r4, r5
+; CHECK-NEXT:    sbcs.w r1, r4, r2
+; CHECK-NEXT:    sbcs.w r1, r4, r3
+; CHECK-NEXT:    cset r7, lt
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r9, r0, r7, ne
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs r5, r2, #1
-; CHECK-NEXT:    sbcs r5, r3, #0
-; CHECK-NEXT:    cset r5, lt
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r0, r0, r5, ne
-; CHECK-NEXT:    csel r3, r3, r5, ne
-; CHECK-NEXT:    csel r1, r1, r5, ne
+; CHECK-NEXT:    subs r6, r2, #1
+; CHECK-NEXT:    sbcs r6, r3, #0
+; CHECK-NEXT:    cset r6, lt
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r0, r0, r6, ne
 ; CHECK-NEXT:    csel r2, r2, r8, ne
-; CHECK-NEXT:    rsbs r5, r0, #0
-; CHECK-NEXT:    sbcs.w r5, r6, r1
-; CHECK-NEXT:    sbcs.w r2, r6, r2
-; CHECK-NEXT:    sbcs.w r2, r6, r3
+; CHECK-NEXT:    csel r3, r3, r6, ne
+; CHECK-NEXT:    csel r1, r1, r6, ne
+; CHECK-NEXT:    rsbs r6, r0, #0
+; CHECK-NEXT:    sbcs.w r6, r4, r1
+; CHECK-NEXT:    sbcs.w r2, r4, r2
+; CHECK-NEXT:    sbcs.w r2, r4, r3
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r3, r5, r7, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r7
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1041,39 +1066,39 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-LABEL: ustest_f16i64:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov.u16 r0, q0[1]
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    bl __fixhfti
-; CHECK-NEXT:    rsbs r4, r0, #0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    rsbs r1, r0, #0
 ; CHECK-NEXT:    mov.w r5, #0
-; CHECK-NEXT:    sbcs.w r4, r5, r1
-; CHECK-NEXT:    sbcs.w r2, r5, r2
-; CHECK-NEXT:    sbcs.w r2, r5, r3
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r6, r0, r2, ne
+; CHECK-NEXT:    sbcs.w r1, r5, r4
+; CHECK-NEXT:    sbcs.w r1, r5, r2
+; CHECK-NEXT:    sbcs.w r1, r5, r3
+; CHECK-NEXT:    cset r6, lt
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r8, r0, r6, ne
 ; CHECK-NEXT:    vmov.u16 r0, q4[0]
-; CHECK-NEXT:    csel r7, r1, r2, ne
 ; CHECK-NEXT:    bl __fixhfti
-; CHECK-NEXT:    rsbs r4, r0, #0
-; CHECK-NEXT:    sbcs.w r4, r5, r1
+; CHECK-NEXT:    rsbs r7, r0, #0
+; CHECK-NEXT:    sbcs.w r7, r5, r1
 ; CHECK-NEXT:    sbcs.w r2, r5, r2
 ; CHECK-NEXT:    sbcs.w r2, r5, r3
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r4, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r8
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <2 x half> %x to <2 x i128>
   %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1723,63 +1748,62 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    vmov r12, lr, d8
-; CHECK-NEXT:    subs.w r5, r0, #-1
-; CHECK-NEXT:    mvn r4, #-2147483648
-; CHECK-NEXT:    sbcs.w r5, r1, r4
-; CHECK-NEXT:    sbcs r5, r2, #0
-; CHECK-NEXT:    mov.w r6, #-1
-; CHECK-NEXT:    sbcs r5, r3, #0
+; CHECK-NEXT:    subs.w r4, r0, #-1
+; CHECK-NEXT:    mvn r9, #-2147483648
+; CHECK-NEXT:    sbcs.w r4, r1, r9
+; CHECK-NEXT:    sbcs r4, r2, #0
+; CHECK-NEXT:    mov.w r7, #-1
+; CHECK-NEXT:    sbcs r4, r3, #0
+; CHECK-NEXT:    mov.w r10, #-2147483648
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r3, r3, r4, ne
+; CHECK-NEXT:    csel r2, r2, r4, ne
+; CHECK-NEXT:    csel r4, r0, r7, ne
+; CHECK-NEXT:    csel r1, r1, r9, ne
+; CHECK-NEXT:    rsbs r0, r4, #0
+; CHECK-NEXT:    sbcs.w r0, r10, r1
+; CHECK-NEXT:    sbcs.w r0, r7, r2
+; CHECK-NEXT:    sbcs.w r0, r7, r3
 ; CHECK-NEXT:    cset r5, lt
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csel r0, r0, r6, ne
-; CHECK-NEXT:    csel r3, r3, r5, ne
-; CHECK-NEXT:    csel r2, r2, r5, ne
-; CHECK-NEXT:    csel r1, r1, r4, ne
-; CHECK-NEXT:    rsbs r7, r0, #0
-; CHECK-NEXT:    mov.w r5, #-2147483648
-; CHECK-NEXT:    sbcs.w r7, r5, r1
-; CHECK-NEXT:    sbcs.w r2, r6, r2
-; CHECK-NEXT:    sbcs.w r2, r6, r3
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r9, r0, r2, ne
-; CHECK-NEXT:    csel r8, r1, r5, ne
+; CHECK-NEXT:    csel r8, r1, r10, ne
 ; CHECK-NEXT:    mov r0, r12
 ; CHECK-NEXT:    mov r1, lr
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    subs.w r7, r0, #-1
-; CHECK-NEXT:    sbcs.w r7, r1, r4
-; CHECK-NEXT:    sbcs r7, r2, #0
-; CHECK-NEXT:    sbcs r7, r3, #0
-; CHECK-NEXT:    cset r7, lt
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    csel r0, r0, r6, ne
-; CHECK-NEXT:    csel r3, r3, r7, ne
-; CHECK-NEXT:    csel r2, r2, r7, ne
-; CHECK-NEXT:    csel r1, r1, r4, ne
-; CHECK-NEXT:    rsbs r7, r0, #0
-; CHECK-NEXT:    sbcs.w r7, r5, r1
-; CHECK-NEXT:    sbcs.w r2, r6, r2
-; CHECK-NEXT:    sbcs.w r2, r6, r3
+; CHECK-NEXT:    subs.w r6, r0, #-1
+; CHECK-NEXT:    sbcs.w r6, r1, r9
+; CHECK-NEXT:    sbcs r6, r2, #0
+; CHECK-NEXT:    sbcs r6, r3, #0
+; CHECK-NEXT:    cset r6, lt
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    csel r1, r1, r9, ne
+; CHECK-NEXT:    csel r3, r3, r6, ne
+; CHECK-NEXT:    csel r2, r2, r6, ne
+; CHECK-NEXT:    rsbs r6, r0, #0
+; CHECK-NEXT:    sbcs.w r6, r10, r1
+; CHECK-NEXT:    sbcs.w r2, r7, r2
+; CHECK-NEXT:    sbcs.w r2, r7, r3
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r1, r1, r10, ne
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csel r3, r4, r5, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    csel r1, r1, r5, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r3
 ; CHECK-NEXT:    vmov q0[3], q0[1], r1, r8
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -1791,33 +1815,38 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    vmov r12, lr, d8
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vmov r4, r1, d8
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r4, r1, r2, ne
-; CHECK-NEXT:    csel r5, r0, r2, ne
-; CHECK-NEXT:    mov r0, r12
-; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    cset r6, lo
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r7, r0, r6, ne
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    cset r2, lo
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r5, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %conv = fptoui <2 x double> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -1828,43 +1857,49 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    vmov r12, lr, d8
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    vmov r4, r1, d8
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r5, r0, r2, ne
-; CHECK-NEXT:    csel r0, r3, r2, ne
-; CHECK-NEXT:    csel r4, r1, r2, ne
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt mi
-; CHECK-NEXT:    movmi r4, #0
-; CHECK-NEXT:    movmi r5, #0
-; CHECK-NEXT:    mov r0, r12
-; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    cset r7, lt
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r6, r0, r7, ne
+; CHECK-NEXT:    csel r5, r3, r7, ne
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r6, #0
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r1, r1, r2, ne
+; CHECK-NEXT:    csel r3, r3, r2, ne
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    csel r2, r3, r2, ne
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    itt mi
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it mi
 ; CHECK-NEXT:    movmi r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r6
+; CHECK-NEXT:    csel r7, r8, r7, ne
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r7, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r1, r1, r2, ne
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it mi
 ; CHECK-NEXT:    movmi r1, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -1876,54 +1911,59 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: stest_f32i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    vmov r8, r0, d0
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vmov r9, r0, d0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    subs.w r7, r0, #-1
-; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    sbcs.w r7, r1, r5
-; CHECK-NEXT:    mov.w r6, #-2147483648
+; CHECK-NEXT:    mvn r10, #-2147483648
+; CHECK-NEXT:    sbcs.w r7, r1, r10
+; CHECK-NEXT:    mov.w r4, #-1
 ; CHECK-NEXT:    sbcs r7, r2, #0
+; CHECK-NEXT:    mov.w r11, #-2147483648
 ; CHECK-NEXT:    sbcs r7, r3, #0
 ; CHECK-NEXT:    cset r7, lt
 ; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r5, r0, r4, ne
 ; CHECK-NEXT:    csel r3, r3, r7, ne
 ; CHECK-NEXT:    csel r2, r2, r7, ne
-; CHECK-NEXT:    mov.w r7, #-1
-; CHECK-NEXT:    csel r1, r1, r5, ne
-; CHECK-NEXT:    csel r0, r0, r7, ne
-; CHECK-NEXT:    rsbs r4, r0, #0
-; CHECK-NEXT:    sbcs.w r4, r6, r1
-; CHECK-NEXT:    sbcs.w r2, r7, r2
-; CHECK-NEXT:    sbcs.w r2, r7, r3
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r10, r0, r2, ne
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    csel r9, r1, r6, ne
+; CHECK-NEXT:    csel r1, r1, r10, ne
+; CHECK-NEXT:    rsbs r0, r5, #0
+; CHECK-NEXT:    sbcs.w r0, r11, r1
+; CHECK-NEXT:    sbcs.w r0, r4, r2
+; CHECK-NEXT:    sbcs.w r0, r4, r3
+; CHECK-NEXT:    cset r6, lt
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r8, r1, r11, ne
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs.w r4, r0, #-1
-; CHECK-NEXT:    sbcs.w r4, r1, r5
-; CHECK-NEXT:    sbcs r4, r2, #0
-; CHECK-NEXT:    sbcs r4, r3, #0
-; CHECK-NEXT:    cset r4, lt
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    csel r0, r0, r7, ne
-; CHECK-NEXT:    csel r3, r3, r4, ne
-; CHECK-NEXT:    csel r2, r2, r4, ne
-; CHECK-NEXT:    csel r1, r1, r5, ne
-; CHECK-NEXT:    rsbs r5, r0, #0
-; CHECK-NEXT:    sbcs.w r5, r6, r1
-; CHECK-NEXT:    sbcs.w r2, r7, r2
-; CHECK-NEXT:    sbcs.w r2, r7, r3
+; CHECK-NEXT:    subs.w r7, r0, #-1
+; CHECK-NEXT:    sbcs.w r7, r1, r10
+; CHECK-NEXT:    sbcs r7, r2, #0
+; CHECK-NEXT:    sbcs r7, r3, #0
+; CHECK-NEXT:    cset r7, lt
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r4, ne
+; CHECK-NEXT:    csel r1, r1, r10, ne
+; CHECK-NEXT:    csel r3, r3, r7, ne
+; CHECK-NEXT:    csel r2, r2, r7, ne
+; CHECK-NEXT:    rsbs r7, r0, #0
+; CHECK-NEXT:    sbcs.w r7, r11, r1
+; CHECK-NEXT:    sbcs.w r2, r4, r2
+; CHECK-NEXT:    sbcs.w r2, r4, r3
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r1, r1, r11, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r5, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    csel r1, r1, r6, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r10
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r9
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r3
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r8
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -1935,27 +1975,33 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: utest_f32i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    vmov r4, r0, d0
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    subs r2, #1
-; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r6, r0, r2, ne
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    cset r6, lo
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r7, r0, r6, ne
 ; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    csel r5, r1, r2, ne
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    cset r2, lo
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csel r3, r5, r6, ne
+; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    csel r1, r1, r2, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %conv = fptoui <2 x float> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -1966,37 +2012,44 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: ustest_f32i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    vmov r4, r0, d0
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    vmov r5, r0, d0
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    subs r2, #1
-; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r6, r0, r2, ne
-; CHECK-NEXT:    csel r0, r3, r2, ne
-; CHECK-NEXT:    csel r5, r1, r2, ne
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    itt mi
-; CHECK-NEXT:    movmi r5, #0
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    subs r1, r2, #1
+; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    cset r7, lt
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r6, r0, r7, ne
+; CHECK-NEXT:    csel r4, r3, r7, ne
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it mi
 ; CHECK-NEXT:    movmi r6, #0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csel r1, r1, r2, ne
+; CHECK-NEXT:    csel r3, r3, r2, ne
 ; CHECK-NEXT:    csel r0, r0, r2, ne
-; CHECK-NEXT:    csel r2, r3, r2, ne
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    itt mi
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it mi
 ; CHECK-NEXT:    movmi r0, #0
-; CHECK-NEXT:    movmi r1, #0
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    csel r7, r8, r7, ne
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r7, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r1, r1, r2, ne
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <2 x float> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -2059,8 +2112,8 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-LABEL: ustest_f16i64_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov.u16 r0, q0[1]
@@ -2069,19 +2122,24 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmov.u16 r0, q4[0]
 ; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    itt mi
-; CHECK-NEXT:    movmi r5, #0
-; CHECK-NEXT:    movmi r4, #0
+; CHECK-NEXT:    mov r6, r3
 ; CHECK-NEXT:    bl __fixhfti
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r4, #0
 ; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    itt mi
+; CHECK-NEXT:    it mi
 ; CHECK-NEXT:    movmi r0, #0
-; CHECK-NEXT:    movmi r1, #0
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r1, #0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %conv = fptosi <2 x half> %x to <2 x i128>
   %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 77548b49d77f23..75b6cb3e1272bc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -281,51 +281,49 @@ declare <6 x i32> @llvm.fptosi.sat.v6f64.v6i32 (<6 x double>)
 define arm_aapcs_vfpcc <1 x i32> @test_signed_v1f64_v1i32(<1 x double> %f) {
 ; CHECK-LABEL: test_signed_v1f64_v1i32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    vldr d1, .LCPI8_0
 ; CHECK-NEXT:    vmov r5, r4, d0
 ; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI8_1
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2iz
-; CHECK-NEXT:    vldr d0, .LCPI8_1
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    bl __aeabi_d2iz
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r7, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq.w r6, #-2147483648
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r7, #-2147483648
+; CHECK-NEXT:    mvnne r6, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI8_0:
-; CHECK-NEXT:    .long 0 @ double -2147483648
-; CHECK-NEXT:    .long 3252682752
-; CHECK-NEXT:  .LCPI8_1:
 ; CHECK-NEXT:    .long 4290772992 @ double 2147483647
 ; CHECK-NEXT:    .long 1105199103
+; CHECK-NEXT:  .LCPI8_1:
+; CHECK-NEXT:    .long 0 @ double -2147483648
+; CHECK-NEXT:    .long 3252682752
     %x = call <1 x i32> @llvm.fptosi.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
 }
@@ -339,82 +337,115 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI9_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r7, r3, d0
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI9_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r11, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vldr d0, .LCPI9_1
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-2147483648
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq.w r4, #-1
-; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r5, #-2147483648
-; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
 ; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq.w r7, #-2147483648
-; CHECK-NEXT:    moveq.w r6, #-1
-; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    mvnne r7, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -441,53 +472,90 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    .pad #24
 ; CHECK-NEXT:    sub sp, #24
-; CHECK-NEXT:    vmov.f32 s18, s0
-; CHECK-NEXT:    vmov.f32 s19, s1
+; CHECK-NEXT:    vmov.f32 s16, s0
+; CHECK-NEXT:    vmov.f32 s17, s1
 ; CHECK-NEXT:    vldr d0, .LCPI10_0
-; CHECK-NEXT:    vmov r10, r7, d1
-; CHECK-NEXT:    vmov r6, r3, d0
-; CHECK-NEXT:    vmov.f32 s16, s4
-; CHECK-NEXT:    vmov.f32 s17, s5
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    vmov r4, r6, d1
+; CHECK-NEXT:    vmov r2, r11, d0
+; CHECK-NEXT:    vmov.f32 s18, s4
+; CHECK-NEXT:    vmov.f32 s19, s5
+; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI10_1
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    vmov r2, r8, d0
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str.w r8, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    moveq.w r10, #-2147483648
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    vmov r5, r7, d9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r10, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vldr d0, .LCPI10_1
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    vmov r1, r0, d9
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    vmov r9, r8, d8
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    strd r1, r0, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r11, #-2147483648
-; CHECK-NEXT:    mov r4, r3
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    moveq.w r6, #-2147483648
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vmov r9, r8, d8
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r11, #-2147483648
+; CHECK-NEXT:    mvnne r6, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r11, #0
-; CHECK-NEXT:    ldr.w r10, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r0, r9
@@ -497,55 +565,19 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
 ; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    moveq.w r7, #-2147483648
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r7, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov.32 q0[1], r10
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r6, #-2147483648
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r6, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    vmov.32 q0[1], r11
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r7
+; CHECK-NEXT:    vmov q0[2], q0[0], r7, r6
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -553,11 +585,11 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI10_0:
-; CHECK-NEXT:    .long 0 @ double -2147483648
-; CHECK-NEXT:    .long 3252682752
-; CHECK-NEXT:  .LCPI10_1:
 ; CHECK-NEXT:    .long 4290772992 @ double 2147483647
 ; CHECK-NEXT:    .long 1105199103
+; CHECK-NEXT:  .LCPI10_1:
+; CHECK-NEXT:    .long 0 @ double -2147483648
+; CHECK-NEXT:    .long 3252682752
     %x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f)
     ret <3 x i32> %x
 }
@@ -571,86 +603,95 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
-; CHECK-NEXT:    .pad #24
-; CHECK-NEXT:    sub sp, #24
-; CHECK-NEXT:    vmov q5, q1
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI11_0
+; CHECK-NEXT:    vmov q5, q1
 ; CHECK-NEXT:    vmov r5, r6, d10
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r9, r3
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    vmov r9, r3, d0
+; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI11_1
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r11, r0, d11
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    vmov r7, r10, d8
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r4, #-2147483648
 ; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    vmov r11, r1, d11
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    vmov r7, r10, d8
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    moveq.w r0, #-2147483648
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r4, #-2147483648
+; CHECK-NEXT:    mvnne r0, #-2147483648
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r8, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r4, r9
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    str.w r9, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    str.w r9, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r10
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r9, #-2147483648
-; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    moveq.w r6, #-2147483648
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r9, #-2147483648
+; CHECK-NEXT:    mvnne r6, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr.w r9, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r11
@@ -660,17 +701,11 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    moveq.w r8, #-2147483648
-; CHECK-NEXT:    ldr.w r10, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    mov r2, r11
 ; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    vmov r7, r4, d9
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r8, #-2147483648
@@ -678,50 +713,51 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r6, #-2147483648
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq.w r5, #-2147483648
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r6, #-2147483648
+; CHECK-NEXT:    mvnne r5, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r9, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r8
-; CHECK-NEXT:    add sp, #24
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r6, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r8
+; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI11_0:
-; CHECK-NEXT:    .long 0 @ double -2147483648
-; CHECK-NEXT:    .long 3252682752
-; CHECK-NEXT:  .LCPI11_1:
 ; CHECK-NEXT:    .long 4290772992 @ double 2147483647
 ; CHECK-NEXT:    .long 1105199103
+; CHECK-NEXT:  .LCPI11_1:
+; CHECK-NEXT:    .long 0 @ double -2147483648
+; CHECK-NEXT:    .long 3252682752
     %x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f)
     ret <4 x i32> %x
 }
@@ -738,186 +774,186 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-NEXT:    .pad #32
 ; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov.f32 s16, s0
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmov.f32 s17, s1
 ; CHECK-NEXT:    vldr d0, .LCPI12_0
-; CHECK-NEXT:    vmov r7, r5, d4
+; CHECK-NEXT:    vmov r5, r4, d4
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    vmov.f32 s18, s6
-; CHECK-NEXT:    vmov.f32 s20, s4
+; CHECK-NEXT:    vmov.f32 s20, s6
+; CHECK-NEXT:    vmov.f32 s18, s4
 ; CHECK-NEXT:    vmov.f32 s22, s2
-; CHECK-NEXT:    vmov.f32 s19, s7
-; CHECK-NEXT:    vmov.f32 s21, s5
+; CHECK-NEXT:    vmov.f32 s21, s7
+; CHECK-NEXT:    vmov.f32 s19, s5
 ; CHECK-NEXT:    vmov.f32 s23, s3
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    strd r2, r3, [sp, #20] @ 8-byte Folded Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI12_1
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r1, r0, d10
-; CHECK-NEXT:    vldr d0, .LCPI12_1
-; CHECK-NEXT:    vmov r6, r8, d9
-; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT:    vmov r9, r0, d11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r11, r3
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    vmov r8, r0, d11
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    vmov r9, r6, d10
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r4, #-2147483648
-; CHECK-NEXT:    str.w r10, [sp] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    moveq.w r11, #-2147483648
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r4, #-2147483648
+; CHECK-NEXT:    mvnne r11, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    str r4, [r0, #16]
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    str.w r11, [r7, #16]
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr.w r10, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    ldr.w r11, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    moveq.w r0, #-2147483648
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r10, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r4, #-2147483648
+; CHECK-NEXT:    mvnne r0, #-2147483648
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r6, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r8, #-2147483648
-; CHECK-NEXT:    ldr.w r11, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    moveq.w r10, #-2147483648
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    vmov r11, r4, d9
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r8, #-2147483648
+; CHECK-NEXT:    mvnne r10, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    ldr.w r9, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r4, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    moveq.w r7, #-2147483648
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r4, #-2147483648
+; CHECK-NEXT:    mvnne r7, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    vmov r7, r6, d8
+; CHECK-NEXT:    vmov r5, r4, d8
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r3, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    moveq.w r6, #-2147483648
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r5, #-2147483648
+; CHECK-NEXT:    mvnne r6, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r6, r7
+; CHECK-NEXT:    vmov q0[3], q0[1], r10, r0
 ; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r4
-; CHECK-NEXT:    vmov q0[3], q0[1], r8, r0
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    vstrw.32 q0, [r0]
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
@@ -926,11 +962,11 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI12_0:
-; CHECK-NEXT:    .long 0 @ double -2147483648
-; CHECK-NEXT:    .long 3252682752
-; CHECK-NEXT:  .LCPI12_1:
 ; CHECK-NEXT:    .long 4290772992 @ double 2147483647
 ; CHECK-NEXT:    .long 1105199103
+; CHECK-NEXT:  .LCPI12_1:
+; CHECK-NEXT:    .long 0 @ double -2147483648
+; CHECK-NEXT:    .long 3252682752
     %x = call <5 x i32> @llvm.fptosi.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
 }
@@ -947,180 +983,182 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-NEXT:    .pad #40
 ; CHECK-NEXT:    sub sp, #40
 ; CHECK-NEXT:    vmov.f32 s16, s0
-; CHECK-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    vmov.f32 s17, s1
 ; CHECK-NEXT:    vldr d0, .LCPI13_0
-; CHECK-NEXT:    vmov r6, r4, d5
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    vmov.f32 s20, s8
-; CHECK-NEXT:    vmov.f32 s22, s6
+; CHECK-NEXT:    vmov r9, r4, d5
+; CHECK-NEXT:    vmov r2, r6, d0
+; CHECK-NEXT:    vmov.f32 s22, s8
+; CHECK-NEXT:    vmov.f32 s20, s6
 ; CHECK-NEXT:    vmov.f32 s18, s4
 ; CHECK-NEXT:    vmov.f32 s24, s2
-; CHECK-NEXT:    vmov.f32 s21, s9
-; CHECK-NEXT:    vmov.f32 s23, s7
+; CHECK-NEXT:    vmov.f32 s23, s9
+; CHECK-NEXT:    vmov.f32 s21, s7
 ; CHECK-NEXT:    vmov.f32 s19, s5
 ; CHECK-NEXT:    vmov.f32 s25, s3
-; CHECK-NEXT:    str r2, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI13_1
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    strd r2, r3, [sp, #32] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vldr d0, .LCPI13_1
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    vmov r8, r0, d10
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    vmov r7, r5, d11
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r1, r0, d12
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    vmov r7, r8, d10
-; CHECK-NEXT:    vmov r11, r10, d11
-; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    strd r1, r0, [sp, #12] @ 8-byte Folded Spill
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    mov r9, r2
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    moveq.w r10, #-2147483648
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r5, #-2147483648
+; CHECK-NEXT:    mvnne r10, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    ldr.w r11, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    str r5, [r6, #20]
-; CHECK-NEXT:    ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str.w r10, [r11, #20]
+; CHECK-NEXT:    ldr.w r10, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vmov r2, r1, d9
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq.w r6, #-2147483648
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    strd r2, r1, [sp] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r5, #-2147483648
+; CHECK-NEXT:    mvnne r6, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    str r5, [r6, #16]
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    ldr.w r8, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    ldr.w r9, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    str.w r6, [r11, #16]
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    ldr r4, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r0, #-2147483648
-; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r4, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    moveq.w r10, #-2147483648
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r10, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r5, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r10, #-2147483648
-; CHECK-NEXT:    ldr.w r11, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    moveq.w r8, #-2147483648
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r2, r4
 ; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    vmov r7, r6, d9
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r10, #-2147483648
+; CHECK-NEXT:    mvnne r8, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #0
-; CHECK-NEXT:    ldr r5, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    movne.w r8, #0
+; CHECK-NEXT:    ldr.w r11, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    moveq.w r4, #-2147483648
-; CHECK-NEXT:    ldr.w r8, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r4, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
@@ -1128,39 +1166,38 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r11
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
 ; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r5, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vmov q0[2], q0[0], r5, r4
-; CHECK-NEXT:    vmov q0[3], q0[1], r10, r0
-; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[3], q0[1], r8, r10
 ; CHECK-NEXT:    vstrw.32 q0, [r0]
 ; CHECK-NEXT:    add sp, #40
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12}
@@ -1169,11 +1206,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI13_0:
-; CHECK-NEXT:    .long 0 @ double -2147483648
-; CHECK-NEXT:    .long 3252682752
-; CHECK-NEXT:  .LCPI13_1:
 ; CHECK-NEXT:    .long 4290772992 @ double 2147483647
 ; CHECK-NEXT:    .long 1105199103
+; CHECK-NEXT:  .LCPI13_1:
+; CHECK-NEXT:    .long 0 @ double -2147483648
+; CHECK-NEXT:    .long 3252682752
     %x = call <6 x i32> @llvm.fptosi.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
 }
@@ -1754,10 +1791,18 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    vmov r0, s19
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    vldr s22, .LCPI28_0
 ; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vldr s20, .LCPI28_1
@@ -1766,60 +1811,48 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movwlt r11, #0
 ; CHECK-NEXT:    movtlt r11, #65534
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    vcmp.f32 s17, s17
-; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movwgt r11, #65535
 ; CHECK-NEXT:    movtgt r11, #1
+; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    movvs.w r11, #0
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    vmov r0, s19
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    ittt lt
 ; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    movtlt r5, #65534
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movwgt r5, #65535
 ; CHECK-NEXT:    movtgt r5, #1
-; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    vcmp.f32 s19, s22
-; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    ittt lt
 ; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    movtlt r7, #65534
-; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movwgt r7, #65535
 ; CHECK-NEXT:    movtgt r7, #1
-; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r4, #-1
 ; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1831,9 +1864,9 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    bfc r5, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt lt
-; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movtlt r1, #65534
+; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1847,10 +1880,11 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    str.w r0, [r8]
-; CHECK-NEXT:    lsr.w r0, r7, #10
+; CHECK-NEXT:    lsrs r0, r7, #10
 ; CHECK-NEXT:    bfc r7, #18, #14
 ; CHECK-NEXT:    bfc r11, #18, #14
 ; CHECK-NEXT:    lsll r4, r7, #22
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    orr.w r3, r5, r7
 ; CHECK-NEXT:    str.w r3, [r8, #20]
 ; CHECK-NEXT:    orr.w r2, r2, r4
@@ -1995,23 +2029,52 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vmov r6, s17
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    vldr s22, .LCPI30_0
+; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vldr s20, .LCPI30_1
-; CHECK-NEXT:    vmov r7, s19
+; CHECK-NEXT:    mov r10, r2
+; CHECK-NEXT:    vcmp.f32 s17, s22
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    vcmp.f32 s17, s17
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    itt vs
+; CHECK-NEXT:    movvs.w r10, #0
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    str r7, [sp] @ 4-byte Spill
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs.w r11, #0
+; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    mov r5, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    mvnlt r3, #7
+; CHECK-NEXT:    mvnlt r5, #7
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    movgt r5, #7
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
@@ -2023,67 +2086,40 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    str.w r1, [r4, #29]
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    str.w r0, [r4, #25]
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    vcmp.f32 s17, s22
-; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    mov r6, r2
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt.w r5, #-1
-; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    movgt r3, #7
-; CHECK-NEXT:    vcmp.f32 s17, s17
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    ittt vs
-; CHECK-NEXT:    movvs r6, #0
-; CHECK-NEXT:    movvs.w r10, #0
 ; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    vcmp.f32 s19, s22
-; CHECK-NEXT:    mov r11, r1
-; CHECK-NEXT:    mov r8, r2
-; CHECK-NEXT:    mov r9, r3
+; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r8, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    mvnlt r9, #7
-; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    mvnlt r8, #7
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt.w r9, #0
 ; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    movgt.w r11, #-1
-; CHECK-NEXT:    movgt.w r8, #-1
-; CHECK-NEXT:    movgt.w r9, #7
+; CHECK-NEXT:    movgt.w r9, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    movgt.w r8, #7
 ; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt vs
-; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    movvs.w r8, #0
-; CHECK-NEXT:    movvs.w r11, #0
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s16, s22
@@ -2112,30 +2148,30 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    str r0, [r4]
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    lsrl r0, r11, #28
-; CHECK-NEXT:    and r1, r9, #15
+; CHECK-NEXT:    lsrl r0, r9, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    orr.w r1, r9, r6, lsl #4
+; CHECK-NEXT:    str.w r1, [r4, #45]
+; CHECK-NEXT:    and r1, r8, #15
 ; CHECK-NEXT:    str.w r0, [r4, #41]
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    lsrl r0, r5, #28
-; CHECK-NEXT:    str r0, [r4, #16]
-; CHECK-NEXT:    orr.w r0, r11, r8, lsl #4
-; CHECK-NEXT:    lsrl r8, r1, #28
-; CHECK-NEXT:    str.w r0, [r4, #45]
-; CHECK-NEXT:    strb.w r8, [r4, #49]
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    and r0, r5, #15
+; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    strb.w r6, [r4, #49]
 ; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
 ; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    orr.w r0, r5, r6, lsl #4
-; CHECK-NEXT:    str r0, [r4, #20]
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    lsrl r0, r11, #28
+; CHECK-NEXT:    orr.w r1, r11, r10, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r4, #16]
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
-; CHECK-NEXT:    lsrl r6, r1, #28
-; CHECK-NEXT:    strb r6, [r4, #24]
+; CHECK-NEXT:    lsrl r10, r1, #28
+; CHECK-NEXT:    strb.w r10, [r4, #24]
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    and r0, r3, #15
-; CHECK-NEXT:    orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r2, lsl #4
 ; CHECK-NEXT:    str r0, [r4, #12]
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
@@ -2164,58 +2200,61 @@ define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmov r0, s19
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vmov r5, s18
-; CHECK-NEXT:    vldr s22, .LCPI31_0
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vldr s22, .LCPI31_0
+; CHECK-NEXT:    vmov r7, s16
 ; CHECK-NEXT:    vldr s20, .LCPI31_1
-; CHECK-NEXT:    add.w r12, r4, #48
+; CHECK-NEXT:    vmov r6, s17
 ; CHECK-NEXT:    vcmp.f32 s19, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s19, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s19, s20
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s19, s19
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    vmov r7, s16
-; CHECK-NEXT:    vmov r6, s17
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    strd r5, r1, [r4, #48]
+; CHECK-NEXT:    strd r2, r3, [r4, #56]
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    add.w r12, r4, #32
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt vs
+; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r6
@@ -2223,48 +2262,52 @@ define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    vcmp.f32 s17, s22
 ; CHECK-NEXT:    add.w r12, r4, #16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s17, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt vs
+; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt vs
+; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm r4!, {r0, r1, r2, r3}
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
@@ -2303,72 +2346,70 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI32_0
 ; CHECK-NEXT:    vmov r8, r7, d8
-; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    bl __aeabi_d2iz
+; CHECK-NEXT:    strd r2, r3, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI32_1
 ; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_d2iz
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    cmp.w r10, #0
-; CHECK-NEXT:    vmov r6, r5, d9
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r9, #-1
-; CHECK-NEXT:    mov r10, r3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq.w r11, #-1
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    vmov r6, r5, d9
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
+; CHECK-NEXT:    movne.w r11, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    and r0, r9, #1
-; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    and r0, r11, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #12] @ 8-byte Folded Reload
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    bfi r4, r0, #0, #1
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
 ; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2iz
 ; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    moveq.w r7, #-1
-; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r7, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
@@ -2378,20 +2419,20 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-NEXT:    and r0, r7, #1
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r4, r0, #1, #1
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    strb r4, [r0]
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI32_0:
-; CHECK-NEXT:    .long 0 @ double -1
-; CHECK-NEXT:    .long 3220176896
-; CHECK-NEXT:  .LCPI32_1:
 ; CHECK-NEXT:    .long 0 @ double 0
 ; CHECK-NEXT:    .long 0
+; CHECK-NEXT:  .LCPI32_1:
+; CHECK-NEXT:    .long 0 @ double -1
+; CHECK-NEXT:    .long 3220176896
     %x = call <2 x i1> @llvm.fptosi.sat.v2f64.v2i1(<2 x double> %f)
     ret <2 x i1> %x
 }
@@ -2405,82 +2446,115 @@ define arm_aapcs_vfpcc <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI33_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r7, r3, d0
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI33_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r11, #127
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #127
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vldr d0, .LCPI33_1
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r5, #127
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq.w r4, #-1
-; CHECK-NEXT:    mvneq r5, #127
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #127
-; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
 ; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    mvneq r7, #127
-; CHECK-NEXT:    moveq.w r6, #-1
-; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    movne r7, #127
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2505,84 +2579,118 @@ define arm_aapcs_vfpcc <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI34_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r7, r3, d0
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    vldr d0, .LCPI34_1
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r11, r10, d8
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    ittt eq
-; CHECK-NEXT:    movweq r5, #61440
-; CHECK-NEXT:    movteq r5, #65535
-; CHECK-NEXT:    moveq.w r4, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r5, #4095
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
 ; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
 ; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ittt eq
-; CHECK-NEXT:    movweq r7, #61440
-; CHECK-NEXT:    movteq r7, #65535
-; CHECK-NEXT:    moveq.w r6, #-1
-; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    str r4, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    movwne r7, #4095
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    movwne r5, #61440
+; CHECK-NEXT:    movtne r5, #65535
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r11, #61440
+; CHECK-NEXT:    movtne r11, #65535
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r11, #4095
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #4095
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r0
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2607,84 +2715,118 @@ define arm_aapcs_vfpcc <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI35_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r7, r3, d0
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    vldr d0, .LCPI35_1
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r11, r10, d8
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    ittt eq
-; CHECK-NEXT:    movweq r5, #32768
-; CHECK-NEXT:    movteq r5, #65535
-; CHECK-NEXT:    moveq.w r4, #-1
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r5, #32767
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
 ; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
 ; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    ittt eq
-; CHECK-NEXT:    movweq r7, #32768
-; CHECK-NEXT:    movteq r7, #65535
-; CHECK-NEXT:    moveq.w r6, #-1
-; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    str r4, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    movwne r7, #32767
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    movwne r5, #32768
+; CHECK-NEXT:    movtne r5, #65535
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r11, #32768
+; CHECK-NEXT:    movtne r11, #65535
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r11, #32767
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #32767
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r0
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2709,94 +2851,118 @@ define arm_aapcs_vfpcc <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #24
-; CHECK-NEXT:    sub sp, #24
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI36_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r7, r6, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    strd r5, r4, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT:    vmov r7, r6, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    vldr d0, .LCPI36_1
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmov r11, r5, d8
-; CHECK-NEXT:    mov r9, r1
-; CHECK-NEXT:    vmov r10, r0, d0
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov r8, r0, d8
+; CHECK-NEXT:    vmov r11, r10, d0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    clz r0, r4
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    ittt ne
+; CHECK-NEXT:    movwne r9, #0
+; CHECK-NEXT:    movtne r9, #65532
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    ittt eq
-; CHECK-NEXT:    movweq r8, #0
-; CHECK-NEXT:    movteq r8, #65532
-; CHECK-NEXT:    moveq.w r9, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    ittt eq
-; CHECK-NEXT:    moveq r7, #0
-; CHECK-NEXT:    movteq r7, #65532
-; CHECK-NEXT:    moveq.w r6, #-1
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    movwne r7, #65535
-; CHECK-NEXT:    movtne r7, #3
-; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r9, #65535
+; CHECK-NEXT:    movtne r9, #3
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    movwne r8, #65535
-; CHECK-NEXT:    movtne r8, #3
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r11, r4
+; CHECK-NEXT:    lsr.w r10, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r1
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    movtne r4, #65532
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r4, #65535
+; CHECK-NEXT:    movtne r4, #3
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r8
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r9
-; CHECK-NEXT:    add sp, #24
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r4, r9
+; CHECK-NEXT:    vmov q0[3], q0[1], r7, r0
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2821,82 +2987,115 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double>
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI37_0
-; CHECK-NEXT:    vmov r9, r8, d9
-; CHECK-NEXT:    vmov r7, r3, d0
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    vmov r8, r7, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI37_1
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r11, #-2147483648
+; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vldr d0, .LCPI37_1
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-2147483648
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq.w r4, #-1
-; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    mvnne r5, #-2147483648
-; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
 ; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq.w r7, #-2147483648
-; CHECK-NEXT:    moveq.w r6, #-1
-; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    mvnne r7, #-2147483648
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2925,84 +3124,114 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI38_0
-; CHECK-NEXT:    vmov r5, r4, d9
-; CHECK-NEXT:    vmov r6, r7, d0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    strd r4, r5, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT:    vmov r7, r6, d9
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vmov r5, r10, d8
 ; CHECK-NEXT:    vldr d0, .LCPI38_1
-; CHECK-NEXT:    cmp.w r11, #0
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    vmov r9, r8, d0
-; CHECK-NEXT:    csel r11, r0, r11, ne
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r4, #0
-; CHECK-NEXT:    movteq r4, #65534
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    csel r7, r0, r7, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r6, #0
-; CHECK-NEXT:    movteq r6, #65534
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movne.w r7, #-1
-; CHECK-NEXT:    movwne r6, #65535
-; CHECK-NEXT:    movtne r6, #1
-; CHECK-NEXT:    ldrd r9, r0, [sp, #8] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov r8, r0, d8
+; CHECK-NEXT:    vmov r11, r10, d0
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    clz r0, r4
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    movtne r1, #65534
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movne.w r11, #-1
-; CHECK-NEXT:    movwne r4, #65535
-; CHECK-NEXT:    movtne r4, #1
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r5, #65535
+; CHECK-NEXT:    movtne r5, #1
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #0
+; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r2, r11
 ; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r11, r4
+; CHECK-NEXT:    lsr.w r10, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    movtne r7, #65534
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r7, #65535
+; CHECK-NEXT:    movtne r7, #1
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r7, r11
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r4
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r4, r9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r7, r1
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -3028,80 +3257,115 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI39_0
 ; CHECK-NEXT:    vmov r8, r7, d9
-; CHECK-NEXT:    vmov r11, r5, d0
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vldr d0, .LCPI39_1
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r6, r5, d0
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    vldr d0, .LCPI39_1
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    csel r9, r0, r9, ne
-; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r6, r10, d8
-; CHECK-NEXT:    strd r2, r3, [sp] @ 8-byte Folded Spill
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r4, #-2147483648
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    mvnne r4, #-2147483648
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    vmov r10, r7, d8
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r7, #-2147483648
-; CHECK-NEXT:    ldrd r2, r3, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT:    csel r5, r0, r5, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r8, #1
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r10
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    mvnne r7, #-2147483648
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r4
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-2147483648
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r2, #-2147483648
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-2147483648
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    mvnne r4, #-2147483648
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -3130,252 +3394,258 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #48
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI40_0
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmov r10, r9, d0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vmov r10, r9, d8
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    vmov r7, r3, d0
+; CHECK-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    str r7, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI40_1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    vmov r8, r3, d0
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r11, r3
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    vldr d0, .LCPI40_1
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
 ; CHECK-NEXT:    csel r4, r2, r4, ne
-; CHECK-NEXT:    vmov r5, r11, d0
-; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str.w r8, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    str.w r4, [r8, #8]
-; CHECK-NEXT:    str.w r9, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    str.w r11, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    str r4, [r6, #8]
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    ldr r5, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpun
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    str.w r8, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str.w r4, [r8, #4]
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str r7, [r6, #4]
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    str.w r11, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r7, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    str.w r10, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    str.w r9, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    vmov r9, r8, d9
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    str r4, [r0]
-; CHECK-NEXT:    ldr.w r11, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str r7, [r6]
+; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr.w r10, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    strd r2, r3, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r7, r1, r4, ne
-; CHECK-NEXT:    mov r4, r5
-; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    ldr r5, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bl __fixdfti
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r11, r1, r11, ne
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    movne.w r11, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    csel r6, r6, r0, ne
+; CHECK-NEXT:    movne.w r11, #0
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r6, r10
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    csel r10, r4, r0, ne
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    movne.w r10, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    lsrl r0, r7, #28
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    str r0, [r1, #16]
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r6, r11
+; CHECK-NEXT:    str.w r10, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    lsrl r10, r11, #28
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr.w r11, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    orr.w r0, r7, r4, lsl #4
-; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    str r0, [r7, #20]
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    orr.w r0, r11, r4, lsl #4
 ; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    strd r10, r0, [r6, #16]
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    ldr.w r11, [sp, #40] @ 4-byte Reload
 ; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    mvneq r6, #7
-; CHECK-NEXT:    mov r10, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    mvneq r0, #7
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #7
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r6, #7
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    and r1, r0, #15
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    and r1, r5, #15
+; CHECK-NEXT:    mov r8, r6
 ; CHECK-NEXT:    lsrl r4, r1, #28
-; CHECK-NEXT:    strb r4, [r7, #24]
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    strb r4, [r6, #24]
+; CHECK-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    mvneq r4, #7
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mvneq r0, #7
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #7
+; CHECK-NEXT:    movne r0, #7
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    and r0, r4, #15
 ; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    str r0, [r7, #12]
+; CHECK-NEXT:    str.w r0, [r8, #12]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -3383,11 +3653,11 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI40_0:
-; CHECK-NEXT:    .long 0 @ double -6.338253001141147E+29
-; CHECK-NEXT:    .long 3323985920
-; CHECK-NEXT:  .LCPI40_1:
 ; CHECK-NEXT:    .long 4294967295 @ double 6.3382530011411463E+29
 ; CHECK-NEXT:    .long 1176502271
+; CHECK-NEXT:  .LCPI40_1:
+; CHECK-NEXT:    .long 0 @ double -6.338253001141147E+29
+; CHECK-NEXT:    .long 3323985920
     %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f)
     ret <2 x i100> %x
 }
@@ -3406,237 +3676,247 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI41_0
 ; CHECK-NEXT:    vmov r8, r7, d9
-; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r4, r2
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r11, r3
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI41_1
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    vldr d0, .LCPI41_1
-; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT:    vmov r4, r3, d0
+; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    vmov r10, r11, d0
-; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    bl __fixdfti
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    strd r2, r1, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r5, #-2147483648
-; CHECK-NEXT:    str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq.w r10, #-2147483648
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r5, #-2147483648
+; CHECK-NEXT:    mvnne r10, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    str.w r5, [r9, #28]
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    str.w r10, [r6, #28]
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r9, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    str r4, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    str.w r10, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    mov r5, r11
+; CHECK-NEXT:    str.w r11, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldr.w r10, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r11, r4
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r5, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r4, r1, r0, ne
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r5, #0
-; CHECK-NEXT:    str.w r5, [r9, #24]
-; CHECK-NEXT:    mov r11, r6
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    str r4, [r6, #24]
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str.w r4, [r9, #20]
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    vmov r6, r5, d8
-; CHECK-NEXT:    mov r10, r9
-; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r9, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr.w r11, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    str r4, [r6, #20]
 ; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    ldr.w r10, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r11, r6
+; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    vmov r6, r5, d8
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str.w r4, [r10, #16]
+; CHECK-NEXT:    str.w r4, [r11, #16]
 ; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr.w r9, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    ldr.w r8, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    mov r9, r3
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mov r4, r3
+; CHECK-NEXT:    strd r2, r1, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq.w r9, #-2147483648
-; CHECK-NEXT:    ldr.w r10, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r4, r2
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    moveq.w r4, #-2147483648
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    mvnne r9, #-2147483648
+; CHECK-NEXT:    mvnne r4, #-2147483648
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    movne r4, #0
+; CHECK-NEXT:    ldr.w r10, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    str.w r9, [r7, #12]
-; CHECK-NEXT:    ldr.w r9, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r4, r4, r0, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    str.w r4, [r10, #12]
+; CHECK-NEXT:    ldr.w r11, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r7, r1, r0, ne
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [r7, #8]
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str.w r7, [r10, #8]
 ; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    bl __aeabi_dcmpun
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r7, r1, r0, ne
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [r7, #4]
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str.w r7, [r10, #4]
 ; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    movne.w r7, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpun
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    str r4, [r7]
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    str.w r7, [r10]
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -3644,11 +3924,11 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI41_0:
-; CHECK-NEXT:    .long 0 @ double -1.7014118346046923E+38
-; CHECK-NEXT:    .long 3353346048
-; CHECK-NEXT:  .LCPI41_1:
 ; CHECK-NEXT:    .long 4294967295 @ double 1.7014118346046921E+38
 ; CHECK-NEXT:    .long 1205862399
+; CHECK-NEXT:  .LCPI41_1:
+; CHECK-NEXT:    .long 0 @ double -1.7014118346046923E+38
+; CHECK-NEXT:    .long 3353346048
     %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f)
     ret <2 x i128> %x
 }
@@ -4279,103 +4559,101 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    .pad #24
 ; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtt.f32.f16 s24, s16
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s17
 ; CHECK-NEXT:    vmov r0, s24
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s17
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    vmov r0, s26
-; CHECK-NEXT:    vldr s22, .LCPI48_0
-; CHECK-NEXT:    vldr s20, .LCPI48_1
-; CHECK-NEXT:    vcmp.f32 s24, s22
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    movtlt r1, #65534
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
-; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    vcmp.f32 s24, s24
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r1, #65535
-; CHECK-NEXT:    movtgt r1, #1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    vcvtb.f32.f16 s28, s17
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    vmov r0, s28
+; CHECK-NEXT:    mov r6, r1
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcvtt.f32.f16 s24, s17
+; CHECK-NEXT:    vcvtt.f32.f16 s30, s16
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov r0, s30
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    bl __aeabi_f2lz
+; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
 ; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    vmov r0, s24
-; CHECK-NEXT:    vcmp.f32 s26, s22
+; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    vldr s20, .LCPI48_0
+; CHECK-NEXT:    vldr s22, .LCPI48_1
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movtlt r1, #65534
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, s22
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    vcmp.f32 s30, s30
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #1
-; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    vmov r0, s26
-; CHECK-NEXT:    vcmp.f32 s24, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    movtlt r1, #65534
+; CHECK-NEXT:    vcmp.f32 s28, s22
+; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    ittt lt
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movtlt r5, #65534
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #1
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r5, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r1, #65535
-; CHECK-NEXT:    movtgt r1, #1
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    ittt lt
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movtlt r6, #65534
+; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    vcmp.f32 s24, s22
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    movwgt r6, #65535
+; CHECK-NEXT:    movtgt r6, #1
+; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    str.w r8, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str r1, [sp] @ 4-byte Spill
+; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    str r6, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s26, s22
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    mov r6, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt lt
-; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    movtlt r6, #65534
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s26, s22
 ; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
@@ -4386,144 +4664,144 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str.w r0, [r4, #25]
+; CHECK-NEXT:    str.w r0, [r7, #25]
 ; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    mov r9, r1
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    ittt lt
+; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    movwlt r9, #0
 ; CHECK-NEXT:    movtlt r9, #65534
-; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vcvtb.f32.f16 s18, s19
 ; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    movwgt r9, #65535
 ; CHECK-NEXT:    movtgt r9, #1
+; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs.w r8, #0
 ; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    mov r11, r0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    ittt lt
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    movtlt r5, #65534
-; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movwlt r11, #0
+; CHECK-NEXT:    movtlt r11, #65534
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vcvtt.f32.f16 s18, s19
 ; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r11, #-1
-; CHECK-NEXT:    movwgt r5, #65535
-; CHECK-NEXT:    movtgt r5, #1
+; CHECK-NEXT:    movwgt r11, #65535
+; CHECK-NEXT:    movtgt r11, #1
+; CHECK-NEXT:    movgt.w r10, #-1
 ; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt vs
+; CHECK-NEXT:    movvs.w r10, #0
 ; CHECK-NEXT:    movvs.w r11, #0
-; CHECK-NEXT:    movvs r5, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    vcvtb.f32.f16 s16, s16
-; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt lt
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    movtlt r7, #65534
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    movlt r4, #0
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movtlt r5, #65534
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    movwgt r7, #65535
-; CHECK-NEXT:    movtgt r7, #1
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #1
+; CHECK-NEXT:    movgt.w r4, #-1
 ; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt vs
-; CHECK-NEXT:    movvs.w r10, #0
-; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    movvs r5, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s16, s22
-; CHECK-NEXT:    bfc r5, #18, #14
+; CHECK-NEXT:    vcmp.f32 s16, s20
+; CHECK-NEXT:    bfc r11, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt lt
-; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movtlt r1, #65534
-; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s16, s22
+; CHECK-NEXT:    mov r2, r10
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #1
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    vcmp.f32 s16, s16
-; CHECK-NEXT:    lsrl r2, r5, #28
+; CHECK-NEXT:    lsrl r2, r11, #28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    lsr.w r0, r7, #10
-; CHECK-NEXT:    bfc r7, #18, #14
+; CHECK-NEXT:    str r0, [r7]
+; CHECK-NEXT:    lsrs r0, r5, #10
+; CHECK-NEXT:    bfc r5, #18, #14
 ; CHECK-NEXT:    bfc r9, #18, #14
-; CHECK-NEXT:    lsll r10, r7, #22
+; CHECK-NEXT:    lsll r4, r5, #22
 ; CHECK-NEXT:    bfc r6, #18, #14
-; CHECK-NEXT:    orr.w r3, r5, r7
-; CHECK-NEXT:    str.w r3, [r4, #45]
-; CHECK-NEXT:    orr.w r2, r2, r10
-; CHECK-NEXT:    str.w r2, [r4, #41]
-; CHECK-NEXT:    strb.w r0, [r4, #49]
+; CHECK-NEXT:    orr.w r3, r11, r5
+; CHECK-NEXT:    str.w r3, [r7, #45]
+; CHECK-NEXT:    orrs r2, r4
+; CHECK-NEXT:    str.w r2, [r7, #41]
+; CHECK-NEXT:    strb.w r0, [r7, #49]
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    lsrl r0, r9, #14
-; CHECK-NEXT:    orr.w r2, r9, r11, lsl #4
-; CHECK-NEXT:    str.w r2, [r4, #37]
-; CHECK-NEXT:    str.w r0, [r4, #33]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    orr.w r2, r9, r10, lsl #4
+; CHECK-NEXT:    str.w r2, [r7, #37]
+; CHECK-NEXT:    str.w r0, [r7, #33]
 ; CHECK-NEXT:    orr.w r0, r6, r8, lsl #18
-; CHECK-NEXT:    str.w r0, [r4, #29]
+; CHECK-NEXT:    str.w r0, [r7, #29]
+; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    lsr.w r5, r3, #10
-; CHECK-NEXT:    bfc r3, #18, #14
-; CHECK-NEXT:    lsll r0, r3, #22
-; CHECK-NEXT:    mov r7, r3
-; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    bfc r5, #18, #14
+; CHECK-NEXT:    lsr.w r0, r3, #10
 ; CHECK-NEXT:    bfc r3, #18, #14
-; CHECK-NEXT:    lsrl r2, r3, #28
-; CHECK-NEXT:    orr.w r3, r3, r7
-; CHECK-NEXT:    str r3, [r4, #20]
-; CHECK-NEXT:    orr.w r2, r2, r0
-; CHECK-NEXT:    str r2, [r4, #16]
-; CHECK-NEXT:    strb r5, [r4, #24]
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    lsll r6, r3, #22
+; CHECK-NEXT:    lsrl r2, r5, #28
+; CHECK-NEXT:    orr.w r3, r3, r5
+; CHECK-NEXT:    str r3, [r7, #20]
+; CHECK-NEXT:    orr.w r2, r2, r6
+; CHECK-NEXT:    str r2, [r7, #16]
+; CHECK-NEXT:    strb r0, [r7, #24]
 ; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    bfc r3, #18, #14
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    lsrl r0, r3, #14
-; CHECK-NEXT:    orr.w r2, r3, r6, lsl #4
-; CHECK-NEXT:    strd r0, r2, [r4, #8]
+; CHECK-NEXT:    orr.w r2, r3, r4, lsl #4
+; CHECK-NEXT:    strd r0, r2, [r7, #8]
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    orr.w r0, r1, r7, lsl #18
-; CHECK-NEXT:    str r0, [r4, #4]
+; CHECK-NEXT:    orr.w r0, r1, r6, lsl #18
+; CHECK-NEXT:    str r0, [r7, #4]
 ; CHECK-NEXT:    add sp, #24
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 2
@@ -4552,37 +4830,37 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    vcvtb.f32.f16 s26, s19
 ; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    vmov r0, s26
-; CHECK-NEXT:    vldr s28, .LCPI49_0
-; CHECK-NEXT:    vldr s30, .LCPI49_1
+; CHECK-NEXT:    vldr s30, .LCPI49_0
+; CHECK-NEXT:    vldr s28, .LCPI49_1
 ; CHECK-NEXT:    mov r8, r1
-; CHECK-NEXT:    vcmp.f32 s24, s28
-; CHECK-NEXT:    vcvtt.f32.f16 s20, s16
+; CHECK-NEXT:    vcmp.f32 s24, s30
+; CHECK-NEXT:    vcvtt.f32.f16 s22, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r8, #-2147483648
 ; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    vcmp.f32 s24, s30
-; CHECK-NEXT:    vcvtt.f32.f16 s22, s18
+; CHECK-NEXT:    vcmp.f32 s24, s28
+; CHECK-NEXT:    vcvtt.f32.f16 s20, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    mvngt r8, #-2147483648
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov r6, s20
 ; CHECK-NEXT:    vmov r4, s22
+; CHECK-NEXT:    vmov r6, s20
 ; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs.w r8, #0
 ; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    vcmp.f32 s26, s28
+; CHECK-NEXT:    vcmp.f32 s26, s30
 ; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    movlt.w r11, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s26, s30
+; CHECK-NEXT:    vcmp.f32 s26, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    itt gt
@@ -4595,13 +4873,13 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    movvs.w r11, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    vcmp.f32 s22, s28
+; CHECK-NEXT:    vcmp.f32 s22, s30
 ; CHECK-NEXT:    mov r4, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r4, #-2147483648
 ; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    vcmp.f32 s22, s30
+; CHECK-NEXT:    vcmp.f32 s22, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    itt gt
@@ -4617,12 +4895,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    vcmp.f32 s20, s28
+; CHECK-NEXT:    vcmp.f32 s20, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r6, #-2147483648
 ; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    vcmp.f32 s20, s30
+; CHECK-NEXT:    vcmp.f32 s20, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r7, #-1
@@ -4633,12 +4911,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s16, s28
+; CHECK-NEXT:    vcmp.f32 s16, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt.w r1, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s16, s30
+; CHECK-NEXT:    vcmp.f32 s16, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    itt gt
@@ -4654,11 +4932,11 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmov q5[3], q5[1], r1, r6
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s16, s28
+; CHECK-NEXT:    vcmp.f32 s16, s30
 ; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    vcmp.f32 s16, s30
+; CHECK-NEXT:    vcmp.f32 s16, s28
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r6, #-2147483648
 ; CHECK-NEXT:    movlt r7, #0
@@ -4674,12 +4952,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    movvs r7, #0
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s16, s28
+; CHECK-NEXT:    vcmp.f32 s16, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt.w r1, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s16, s30
+; CHECK-NEXT:    vcmp.f32 s16, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    itt gt
@@ -4695,13 +4973,13 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    vmov q6[3], q6[1], r1, r6
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    vcmp.f32 s16, s28
+; CHECK-NEXT:    vcmp.f32 s16, s30
 ; CHECK-NEXT:    vmov q3[2], q3[0], r10, r9
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt.w r1, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s16, s30
+; CHECK-NEXT:    vcmp.f32 s16, s28
 ; CHECK-NEXT:    vmov q3[3], q3[1], r11, r8
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
@@ -4738,109 +5016,77 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    .pad #48
-; CHECK-NEXT:    sub sp, #48
+; CHECK-NEXT:    .pad #56
+; CHECK-NEXT:    sub sp, #56
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtb.f32.f16 s24, s17
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s16
 ; CHECK-NEXT:    vmov r0, s24
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    vcvtt.f32.f16 s26, s17
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmov r0, s26
 ; CHECK-NEXT:    vldr s22, .LCPI50_0
 ; CHECK-NEXT:    vldr s20, .LCPI50_1
-; CHECK-NEXT:    mov r9, r1
 ; CHECK-NEXT:    vcmp.f32 s24, s22
-; CHECK-NEXT:    mov r10, r2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    movlt.w r10, #0
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    movgt.w r9, #-1
-; CHECK-NEXT:    movgt.w r8, #-1
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    ittt vs
-; CHECK-NEXT:    movvs.w r8, #0
-; CHECK-NEXT:    movvs.w r9, #0
-; CHECK-NEXT:    movvs.w r10, #0
-; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcvtb.f32.f16 s24, s19
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    vmov r0, s24
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    vcmp.f32 s26, s22
-; CHECK-NEXT:    mov r7, r2
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    vcmp.f32 s26, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    movgt.w r5, #-1
-; CHECK-NEXT:    movgt r3, #7
-; CHECK-NEXT:    vcmp.f32 s26, s26
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    str r3, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    str r2, [sp, #48] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    ittt vs
-; CHECK-NEXT:    movvs r5, #0
-; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    str r7, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    str r1, [sp, #44] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s24, s22
+; CHECK-NEXT:    vcmp.f32 s26, s22
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    mvnlt r3, #7
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt r3, #7
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    str r3, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str.w r2, [r4, #83]
+; CHECK-NEXT:    str r2, [sp, #32] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str.w r1, [r4, #79]
+; CHECK-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    vcvtt.f32.f16 s24, s16
-; CHECK-NEXT:    str.w r0, [r4, #75]
+; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r0, s24
-; CHECK-NEXT:    str.w r7, [r4, #58]
-; CHECK-NEXT:    str.w r6, [r4, #54]
-; CHECK-NEXT:    str.w r5, [r4, #50]
-; CHECK-NEXT:    str.w r10, [r4, #33]
-; CHECK-NEXT:    str.w r9, [r4, #29]
-; CHECK-NEXT:    str.w r8, [r4, #25]
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s24, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -4860,119 +5106,155 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    vcvtt.f32.f16 s24, s17
-; CHECK-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    vcvtb.f32.f16 s24, s17
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r0, s24
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    vcvtb.f32.f16 s18, s18
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    mov r8, r1
 ; CHECK-NEXT:    vcmp.f32 s24, s22
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
+; CHECK-NEXT:    mov r6, r2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    movlt r2, #0
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    movgt.w r8, #-1
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    ittt vs
+; CHECK-NEXT:    movvs r5, #0
+; CHECK-NEXT:    movvs.w r8, #0
+; CHECK-NEXT:    movvs r6, #0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    mov r8, r2
+; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    mov r4, r2
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    mvnlt r3, #7
-; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    movlt r4, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s18
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s19
+; CHECK-NEXT:    vcvtb.f32.f16 s18, s19
 ; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    movgt.w r11, #-1
 ; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    movgt.w r5, #-1
-; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    ittt vs
-; CHECK-NEXT:    movvs.w r8, #0
-; CHECK-NEXT:    movvs r5, #0
 ; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    movvs.w r11, #0
+; CHECK-NEXT:    movvs r4, #0
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    vcmp.f32 s18, s22
+; CHECK-NEXT:    mov r9, r3
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    mvnlt r9, #7
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s18
+; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    movgt.w r9, #7
+; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    str.w r2, [r10, #83]
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    str.w r1, [r10, #79]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs r0, #0
+; CHECK-NEXT:    vcvtt.f32.f16 s18, s19
+; CHECK-NEXT:    str.w r0, [r10, #75]
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    str.w r4, [r10, #58]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    str.w r11, [r10, #54]
+; CHECK-NEXT:    str.w r7, [r10, #50]
+; CHECK-NEXT:    str.w r6, [r10, #33]
+; CHECK-NEXT:    str.w r8, [r10, #29]
+; CHECK-NEXT:    str.w r5, [r10, #25]
+; CHECK-NEXT:    it vs
+; CHECK-NEXT:    movvs.w r9, #0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcvtb.f32.f16 s16, s16
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    mov r10, r2
-; CHECK-NEXT:    mov r11, r3
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r4, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    mvnlt r11, #7
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    movlt.w r9, #0
+; CHECK-NEXT:    mvnlt r4, #7
 ; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
 ; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    movgt.w r9, #-1
-; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    movgt.w r11, #7
+; CHECK-NEXT:    movgt r4, #7
 ; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt vs
-; CHECK-NEXT:    movvs.w r11, #0
-; CHECK-NEXT:    movvs.w r10, #0
-; CHECK-NEXT:    movvs.w r9, #0
+; CHECK-NEXT:    movvs r4, #0
 ; CHECK-NEXT:    movvs r6, #0
+; CHECK-NEXT:    movvs r7, #0
+; CHECK-NEXT:    movvs r5, #0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    vcmp.f32 s16, s22
-; CHECK-NEXT:    mov r12, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    mvnlt r12, #7
+; CHECK-NEXT:    mvnlt r3, #7
 ; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt.w r12, #7
+; CHECK-NEXT:    movgt r3, #7
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
@@ -4980,73 +5262,74 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
-; CHECK-NEXT:    str r2, [r4, #8]
+; CHECK-NEXT:    str.w r2, [r10, #8]
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r1, #0
-; CHECK-NEXT:    str r1, [r4, #4]
+; CHECK-NEXT:    str.w r1, [r10, #4]
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
-; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    lsrl r0, r9, #28
-; CHECK-NEXT:    str.w r0, [r4, #91]
+; CHECK-NEXT:    str.w r0, [r10]
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    lsrl r0, r7, #28
-; CHECK-NEXT:    str.w r0, [r4, #66]
-; CHECK-NEXT:    ldr.w lr, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, lr
-; CHECK-NEXT:    lsrl r0, r3, #28
-; CHECK-NEXT:    str.w r0, [r4, #41]
-; CHECK-NEXT:    ldrd r0, r1, [sp, #40] @ 8-byte Folded Reload
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    orr.w r1, r7, r6, lsl #4
+; CHECK-NEXT:    str.w r1, [r10, #95]
+; CHECK-NEXT:    and r1, r4, #15
+; CHECK-NEXT:    str.w r0, [r10, #91]
+; CHECK-NEXT:    and r0, r9, #15
+; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    strb.w r6, [r10, #99]
+; CHECK-NEXT:    orr.w r0, r0, r5, lsl #4
+; CHECK-NEXT:    str.w r0, [r10, #87]
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    lsrl r0, r1, #28
-; CHECK-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    and r1, r11, #15
-; CHECK-NEXT:    str r0, [r4, #16]
-; CHECK-NEXT:    orr.w r0, r9, r10, lsl #4
-; CHECK-NEXT:    lsrl r10, r1, #28
-; CHECK-NEXT:    str.w r0, [r4, #95]
-; CHECK-NEXT:    strb.w r10, [r4, #99]
-; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #87]
-; CHECK-NEXT:    orr.w r0, r7, r8, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #70]
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    str.w r1, [r10, #70]
+; CHECK-NEXT:    str.w r0, [r10, #66]
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
-; CHECK-NEXT:    lsrl r8, r1, #28
-; CHECK-NEXT:    strb.w r8, [r4, #74]
-; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    strb.w r2, [r10, #74]
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, r5, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #62]
-; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r3, r2, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #45]
-; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT:    str.w r0, [r10, #62]
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    str.w r1, [r10, #45]
+; CHECK-NEXT:    str.w r0, [r10, #41]
+; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
-; CHECK-NEXT:    strb.w r2, [r4, #49]
-; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    strb.w r2, [r10, #49]
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, lr, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r0, r2, lsl #4
-; CHECK-NEXT:    str r0, [r4, #20]
-; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT:    str.w r0, [r10, #37]
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r10, #16]
+; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
-; CHECK-NEXT:    strb r2, [r4, #24]
+; CHECK-NEXT:    strb.w r2, [r10, #24]
 ; CHECK-NEXT:    it vs
-; CHECK-NEXT:    movvs.w r12, #0
-; CHECK-NEXT:    ldr r1, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r12, #15
-; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    str r0, [r4, #12]
-; CHECK-NEXT:    add sp, #48
+; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    and r0, r3, #15
+; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT:    str.w r0, [r10, #12]
+; CHECK-NEXT:    add sp, #56
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -5063,63 +5346,62 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i128:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtt.f32.f16 s30, s19
-; CHECK-NEXT:    vcvtb.f32.f16 s20, s16
-; CHECK-NEXT:    vmov r0, s30
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s19
-; CHECK-NEXT:    vldr s22, .LCPI51_0
-; CHECK-NEXT:    vmov r5, s20
-; CHECK-NEXT:    vmov r7, s26
-; CHECK-NEXT:    vcvtt.f32.f16 s28, s18
+; CHECK-NEXT:    vcvtt.f32.f16 s26, s19
+; CHECK-NEXT:    vcvtb.f32.f16 s28, s19
+; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    vcvtb.f32.f16 s24, s17
+; CHECK-NEXT:    vldr s20, .LCPI51_0
+; CHECK-NEXT:    vmov r5, s28
+; CHECK-NEXT:    vmov r8, s24
+; CHECK-NEXT:    vcvtt.f32.f16 s30, s18
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vldr s24, .LCPI51_1
+; CHECK-NEXT:    vldr s22, .LCPI51_1
 ; CHECK-NEXT:    add.w r12, r4, #112
-; CHECK-NEXT:    vmov r6, s28
-; CHECK-NEXT:    vcvtb.f32.f16 s18, s18
-; CHECK-NEXT:    vcmp.f32 s30, s24
+; CHECK-NEXT:    vmov r6, s30
+; CHECK-NEXT:    vcmp.f32 s26, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s30, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s26, s24
+; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    add.w r12, r4, #96
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s26, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s26
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
@@ -5128,26 +5410,27 @@ define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    vmov r7, s18
-; CHECK-NEXT:    vcvtt.f32.f16 s26, s17
+; CHECK-NEXT:    vmov r7, s26
+; CHECK-NEXT:    vcvtt.f32.f16 s28, s17
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    vcmp.f32 s30, s22
 ; CHECK-NEXT:    add.w r12, r4, #80
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s30, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s28, s28
+; CHECK-NEXT:    vcmp.f32 s30, s30
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
@@ -5156,145 +5439,155 @@ define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    vmov r6, s26
-; CHECK-NEXT:    vcvtb.f32.f16 s28, s17
+; CHECK-NEXT:    vmov r5, s28
+; CHECK-NEXT:    vcvtt.f32.f16 s18, s16
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s18, s24
+; CHECK-NEXT:    vcmp.f32 s26, s22
 ; CHECK-NEXT:    add.w r12, r4, #64
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s18, s22
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    ittt vs
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    vmov r7, s28
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    vcvtb.f32.f16 s16, s16
+; CHECK-NEXT:    vmov r6, s18
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s26, s24
+; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    add.w r12, r4, #48
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s26, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s26, s26
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r3, #0
-; CHECK-NEXT:    ittt vs
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r2, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt vs
 ; CHECK-NEXT:    movvs r1, #0
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    vmov r6, s16
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    vmov r7, s16
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s28, s24
+; CHECK-NEXT:    vcmp.f32 s24, s22
 ; CHECK-NEXT:    add.w r12, r4, #32
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s28, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s24, s24
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s28, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt vs
+; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s16, s24
+; CHECK-NEXT:    vcmp.f32 s18, s22
 ; CHECK-NEXT:    add.w r12, r4, #16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s18
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt vs
+; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __fixsfti
-; CHECK-NEXT:    vcmp.f32 s20, s24
+; CHECK-NEXT:    vcmp.f32 s16, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r3, #-2147483648
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    vcmp.f32 s20, s22
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s16
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    mvngt r3, #-2147483648
-; CHECK-NEXT:    vcmp.f32 s20, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt vs
+; CHECK-NEXT:    ittt vs
 ; CHECK-NEXT:    movvs r3, #0
 ; CHECK-NEXT:    movvs r2, #0
 ; CHECK-NEXT:    movvs r1, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it vs
 ; CHECK-NEXT:    movvs r0, #0
 ; CHECK-NEXT:    stm r4!, {r0, r1, r2, r3}
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI51_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index ee040feca4240f..13609bd1903f2d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -268,38 +268,41 @@ declare <6 x i32> @llvm.fptoui.sat.v6f64.v6i32 (<6 x double>)
 define arm_aapcs_vfpcc <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
 ; CHECK-LABEL: test_unsigned_v1f64_v1i32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    vldr d1, .LCPI8_0
 ; CHECK-NEXT:    vmov r4, r5, d0
 ; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI8_1
 ; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2uiz
-; CHECK-NEXT:    vldr d0, .LCPI8_1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bl __aeabi_d2uiz
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    cmp r6, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI8_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI8_1:
 ; CHECK-NEXT:    .long 4292870144 @ double 4294967295
 ; CHECK-NEXT:    .long 1106247679
+; CHECK-NEXT:  .LCPI8_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
 }
@@ -313,60 +316,84 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI9_0
 ; CHECK-NEXT:    vmov r6, r7, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vldr d0, .LCPI9_1
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    csel r9, r0, r8, ne
-; CHECK-NEXT:    csel r8, r1, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r5, r4, d8
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r7, r1, r6, ne
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -391,93 +418,99 @@ define arm_aapcs_vfpcc <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov.f32 s18, s0
 ; CHECK-NEXT:    vmov.f32 s19, s1
 ; CHECK-NEXT:    vldr d0, .LCPI10_0
-; CHECK-NEXT:    vmov r8, r9, d1
-; CHECK-NEXT:    vmov r5, r4, d0
+; CHECK-NEXT:    vmov r4, r5, d1
+; CHECK-NEXT:    vmov r9, r7, d0
 ; CHECK-NEXT:    vmov.f32 s16, s4
 ; CHECK-NEXT:    vmov.f32 s17, s5
-; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    str r4, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vldr d0, .LCPI10_1
-; CHECK-NEXT:    vmov r11, r1, d9
-; CHECK-NEXT:    cmp.w r10, #0
-; CHECK-NEXT:    vmov r7, r6, d8
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    csel r10, r0, r10, ne
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    mov r9, r2
-; CHECK-NEXT:    mov r8, r3
+; CHECK-NEXT:    str.w r9, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #-1
+; CHECK-NEXT:    vldr d0, .LCPI10_1
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    vmov r11, r3, d0
+; CHECK-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    csel r5, r0, r4, ne
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    vmov r10, r8, d8
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    csel r0, r0, r6, ne
 ; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    vmov r5, r4, d9
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    csel r6, r0, r9, ne
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r5, #-1
-; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vmov.32 q0[1], r10
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r4, r5
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    vmov.32 q0[1], r1
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r6
+; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI10_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI10_1:
 ; CHECK-NEXT:    .long 4292870144 @ double 4294967295
 ; CHECK-NEXT:    .long 1106247679
+; CHECK-NEXT:  .LCPI10_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f)
     ret <3 x i32> %x
 }
@@ -496,103 +529,103 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI11_0
 ; CHECK-NEXT:    vmov q5, q1
-; CHECK-NEXT:    vmov r8, r9, d10
-; CHECK-NEXT:    vmov r2, r11, d0
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    vmov r7, r9, d0
+; CHECK-NEXT:    vmov r4, r5, d10
+; CHECK-NEXT:    str.w r9, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI11_1
-; CHECK-NEXT:    vmov r5, r1, d11
-; CHECK-NEXT:    cmp.w r10, #0
-; CHECK-NEXT:    vmov r6, r7, d8
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    csel r4, r0, r10, ne
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    strd r5, r1, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r8, r2
-; CHECK-NEXT:    mov r9, r3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    mov r5, r7
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    mov r10, r11
-; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    strd r2, r3, [sp, #16] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    vmov r10, r8, d8
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    csel r0, r0, r6, ne
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r8, r9
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    ldr.w r11, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    ldr.w r9, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    ldr.w r9, [sp] @ 4-byte Reload
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r5, r0, r4, ne
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    vmov r7, r6, d9
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    vmov r11, r5, d11
+; CHECK-NEXT:    mov r4, r7
+; CHECK-NEXT:    str r7, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    csel r8, r0, r9, ne
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r4
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r5, #-1
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    movne.w r8, #-1
+; CHECK-NEXT:    ldr.w r10, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    vmov r4, r5, d9
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r6, r0, r7, ne
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #16] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    ldrd r1, r0, [sp, #12] @ 8-byte Folded Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r4, r5
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r8, r1
+; CHECK-NEXT:    vmov q0[3], q0[1], r0, r6
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, #4
@@ -600,11 +633,11 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI11_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI11_1:
 ; CHECK-NEXT:    .long 4292870144 @ double 4294967295
 ; CHECK-NEXT:    .long 1106247679
+; CHECK-NEXT:  .LCPI11_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f)
     ret <4 x i32> %x
 }
@@ -618,151 +651,162 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
-; CHECK-NEXT:    .pad #32
-; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    .pad #40
+; CHECK-NEXT:    sub sp, #40
 ; CHECK-NEXT:    vmov.f32 s16, s0
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmov.f32 s17, s1
 ; CHECK-NEXT:    vldr d0, .LCPI12_0
-; CHECK-NEXT:    vmov r6, r11, d4
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    vmov r5, r6, d4
+; CHECK-NEXT:    str r0, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    vmov.f32 s18, s6
-; CHECK-NEXT:    vmov.f32 s20, s4
+; CHECK-NEXT:    vmov.f32 s20, s6
+; CHECK-NEXT:    vmov.f32 s18, s4
 ; CHECK-NEXT:    vmov.f32 s22, s2
-; CHECK-NEXT:    vmov.f32 s19, s7
-; CHECK-NEXT:    vmov.f32 s21, s5
+; CHECK-NEXT:    vmov.f32 s21, s7
+; CHECK-NEXT:    vmov.f32 s19, s5
 ; CHECK-NEXT:    vmov.f32 s23, s3
-; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    strd r2, r3, [sp, #32] @ 8-byte Folded Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI12_1
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    vmov r7, r3, d0
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    str r7, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vmov r8, r1, d11
-; CHECK-NEXT:    vldr d0, .LCPI12_1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vmov r10, r9, d9
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    vmov r5, r1, d10
-; CHECK-NEXT:    strd r5, r1, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r11, r2
-; CHECK-NEXT:    mov r5, r3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    vmov r6, r9, d10
+; CHECK-NEXT:    csel r0, r0, r11, ne
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r1, d9
+; CHECK-NEXT:    strd r2, r1, [sp, #16] @ 8-byte Folded Spill
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r7, #16]
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r4, #16]
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    ldr r5, [sp, #32] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r10, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r9
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r9
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    mov r10, r11
-; CHECK-NEXT:    mov r11, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    csel r0, r0, r4, ne
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    str r4, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r11, r10
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr.w r10, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r9, r0, r4, ne
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    mov r5, r6
 ; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r8, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    ldr.w r11, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r9, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    csel r0, r0, r6, ne
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r7, r0, r4, ne
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vmov r4, r5, d8
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r7, #-1
-; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r8, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r11, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    csel r4, r0, r5, ne
+; CHECK-NEXT:    vmov r5, r6, d8
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r7
-; CHECK-NEXT:    vmov q0[3], q0[1], r9, r0
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
 ; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    vstrw.32 q0, [r0]
-; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    add sp, #40
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI12_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI12_1:
 ; CHECK-NEXT:    .long 4292870144 @ double 4294967295
 ; CHECK-NEXT:    .long 1106247679
+; CHECK-NEXT:  .LCPI12_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
 }
@@ -779,161 +823,172 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-NEXT:    .pad #40
 ; CHECK-NEXT:    sub sp, #40
 ; CHECK-NEXT:    vmov.f32 s16, s0
-; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    str r0, [sp, #32] @ 4-byte Spill
 ; CHECK-NEXT:    vmov.f32 s17, s1
 ; CHECK-NEXT:    vldr d0, .LCPI13_0
 ; CHECK-NEXT:    vmov r5, r6, d5
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    vmov.f32 s20, s8
-; CHECK-NEXT:    vmov.f32 s22, s6
+; CHECK-NEXT:    vmov r11, r3, d0
+; CHECK-NEXT:    vmov.f32 s22, s8
+; CHECK-NEXT:    vmov.f32 s20, s6
 ; CHECK-NEXT:    vmov.f32 s18, s4
 ; CHECK-NEXT:    vmov.f32 s24, s2
-; CHECK-NEXT:    vmov.f32 s21, s9
-; CHECK-NEXT:    vmov.f32 s23, s7
+; CHECK-NEXT:    vmov.f32 s23, s9
+; CHECK-NEXT:    vmov.f32 s21, s7
 ; CHECK-NEXT:    vmov.f32 s19, s5
 ; CHECK-NEXT:    vmov.f32 s25, s3
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    str.w r11, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI13_1
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    vmov r4, r9, d0
+; CHECK-NEXT:    str r4, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vmov r9, r1, d11
-; CHECK-NEXT:    vldr d0, .LCPI13_1
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vmov r8, r11, d10
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    str r1, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    vmov r7, r1, d12
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    mov r5, r2
-; CHECK-NEXT:    strd r7, r1, [sp, #24] @ 8-byte Folded Spill
+; CHECK-NEXT:    vmov r10, r1, d10
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    vmov r5, r6, d11
+; CHECK-NEXT:    csel r0, r0, r8, ne
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r1, d12
+; CHECK-NEXT:    strd r2, r1, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    str r0, [r7, #20]
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    ldr.w r8, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    str r4, [r7, #20]
-; CHECK-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vmov r2, r1, d9
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r8, r5
-; CHECK-NEXT:    strd r2, r1, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    ldr.w r11, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r0, r0, r4, ne
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    strd r2, r1, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r7, #16]
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    ldr r5, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    str.w r8, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r7, #16]
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r11, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    str r4, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    mov r8, r9
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    csel r0, r0, r7, ne
 ; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r4, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r9, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    csel r9, r0, r4, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r2, r8
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    mov r6, r7
+; CHECK-NEXT:    mov r10, r5
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r9
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    csel r9, r0, r7, ne
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it ne
 ; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r8, r0, r4, ne
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    mov r2, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    vmov r4, r5, d8
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r8, #-1
-; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    vmov r5, r6, d8
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r4, r0, r7, ne
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csel r0, r0, r7, ne
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r8
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    vmov q0[3], q0[1], r9, r0
-; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
 ; CHECK-NEXT:    vstrw.32 q0, [r0]
 ; CHECK-NEXT:    add sp, #40
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12}
@@ -942,11 +997,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI13_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI13_1:
 ; CHECK-NEXT:    .long 4292870144 @ double 4294967295
 ; CHECK-NEXT:    .long 1106247679
+; CHECK-NEXT:  .LCPI13_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
 }
@@ -1425,66 +1480,65 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    .vsave {d8, d9, d10}
 ; CHECK-NEXT:    vpush {d8, d9, d10}
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r8, r0
 ; CHECK-NEXT:    vmov r0, s19
-; CHECK-NEXT:    vcmp.f32 s16, #0
-; CHECK-NEXT:    mov r9, r1
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov r7, s18
 ; CHECK-NEXT:    vldr s20, .LCPI28_0
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vmov r5, s16
+; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    movwgt r7, #65535
+; CHECK-NEXT:    movtgt r7, #3
+; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vcmp.f32 s19, #0
-; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    mov r10, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    movlt r4, #0
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    vmov r0, s17
-; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s19, s20
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r1, #65535
-; CHECK-NEXT:    movtgt r1, #3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r4, #-1
-; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movwgt r10, #65535
 ; CHECK-NEXT:    movtgt r10, #3
-; CHECK-NEXT:    mov r3, r10
-; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    bfc r3, #18, #14
-; CHECK-NEXT:    mov r6, r7
+; CHECK-NEXT:    movgt.w r4, #-1
+; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    vcmp.f32 s16, s20
-; CHECK-NEXT:    lsll r4, r3, #22
-; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    mov r1, r10
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r5, #-1
-; CHECK-NEXT:    str.w r5, [r8]
+; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    str.w r0, [r9]
+; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    bfc r1, #18, #14
+; CHECK-NEXT:    bfc r7, #18, #14
+; CHECK-NEXT:    mov r6, r8
+; CHECK-NEXT:    lsll r4, r1, #22
+; CHECK-NEXT:    lsrl r6, r7, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r9, #65535
-; CHECK-NEXT:    movtgt r9, #3
-; CHECK-NEXT:    orrs r1, r3
-; CHECK-NEXT:    str.w r1, [r8, #20]
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #3
+; CHECK-NEXT:    orrs r1, r7
+; CHECK-NEXT:    str.w r1, [r9, #20]
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    orr.w r2, r6, r4
@@ -1493,23 +1547,24 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    bfc r9, #18, #14
+; CHECK-NEXT:    bfc r5, #18, #14
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #3
-; CHECK-NEXT:    str.w r2, [r8, #16]
+; CHECK-NEXT:    str.w r2, [r9, #16]
 ; CHECK-NEXT:    lsr.w r2, r10, #10
-; CHECK-NEXT:    strb.w r2, [r8, #24]
+; CHECK-NEXT:    strb.w r2, [r9, #24]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    mov r2, r0
 ; CHECK-NEXT:    bfc r1, #18, #14
+; CHECK-NEXT:    orr.w r0, r5, r0, lsl #18
 ; CHECK-NEXT:    lsrl r2, r1, #14
-; CHECK-NEXT:    orr.w r0, r9, r0, lsl #18
-; CHECK-NEXT:    orr.w r1, r1, r7, lsl #4
-; CHECK-NEXT:    strd r2, r1, [r8, #8]
-; CHECK-NEXT:    str.w r0, [r8, #4]
+; CHECK-NEXT:    orr.w r1, r1, r8, lsl #4
+; CHECK-NEXT:    strd r2, r1, [r9, #8]
+; CHECK-NEXT:    str.w r0, [r9, #4]
 ; CHECK-NEXT:    vpop {d8, d9, d10}
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
 ; CHECK-NEXT:    .p2align 2
@@ -1615,14 +1670,36 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vmov r6, s17
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    vldr s20, .LCPI30_0
-; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    vcmp.f32 s17, #0
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    mov r6, r2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    vcmp.f32 s17, s20
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt r3, #15
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
@@ -1634,37 +1711,16 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    str.w r1, [r4, #29]
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    str.w r0, [r4, #25]
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    vmov r7, s19
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    vcmp.f32 s17, #0
-; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    mov r6, r2
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    movgt.w r5, #-1
-; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    movgt.w r10, #15
+; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    vcmp.f32 s19, #0
 ; CHECK-NEXT:    mov r9, r1
@@ -1673,7 +1729,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    movlt.w r11, #0
 ; CHECK-NEXT:    vcmp.f32 s19, s20
@@ -1681,7 +1737,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    itttt gt
 ; CHECK-NEXT:    movgt.w r11, #15
 ; CHECK-NEXT:    movgt.w r8, #-1
-; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    movgt.w r5, #-1
 ; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s16, #0
@@ -1702,31 +1758,31 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    lsrl r0, r9, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    orr.w r1, r9, r8, lsl #4
+; CHECK-NEXT:    str.w r1, [r4, #45]
 ; CHECK-NEXT:    and r1, r11, #15
 ; CHECK-NEXT:    str.w r0, [r4, #41]
-; CHECK-NEXT:    mov r0, r10
-; CHECK-NEXT:    lsrl r0, r5, #28
-; CHECK-NEXT:    str r0, [r4, #16]
-; CHECK-NEXT:    orr.w r0, r9, r8, lsl #4
+; CHECK-NEXT:    and r0, r10, #15
 ; CHECK-NEXT:    lsrl r8, r1, #28
-; CHECK-NEXT:    str.w r0, [r4, #45]
 ; CHECK-NEXT:    strb.w r8, [r4, #49]
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r5, lsl #4
 ; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    orr.w r0, r5, r6, lsl #4
-; CHECK-NEXT:    str r0, [r4, #20]
-; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    lsrl r0, r7, #28
+; CHECK-NEXT:    orr.w r1, r7, r6, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r4, #16]
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
 ; CHECK-NEXT:    lsrl r6, r1, #28
 ; CHECK-NEXT:    strb r6, [r4, #24]
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r3, #15
 ; CHECK-NEXT:    and r0, r3, #15
-; CHECK-NEXT:    orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r2, lsl #4
 ; CHECK-NEXT:    str r0, [r4, #12]
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    vpop {d8, d9, d10}
@@ -1753,13 +1809,13 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    vmov r0, s19
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vmov r5, s18
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    vldr s20, .LCPI31_0
 ; CHECK-NEXT:    vcmp.f32 s19, #0
-; CHECK-NEXT:    add.w r12, r4, #48
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
@@ -1767,29 +1823,32 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r3, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    vmov r7, s16
+; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    strd r5, r1, [r4, #48]
 ; CHECK-NEXT:    vmov r6, s17
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    vmov r7, s16
+; CHECK-NEXT:    strd r2, r3, [r4, #56]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s18, #0
 ; CHECK-NEXT:    add.w r12, r4, #32
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r3, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r6
@@ -1797,34 +1856,38 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    vcmp.f32 s17, #0
 ; CHECK-NEXT:    add.w r12, r4, #16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s17, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r3, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r3, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm r4!, {r0, r1, r2, r3}
 ; CHECK-NEXT:    vpop {d8, d9, d10}
@@ -1865,55 +1928,57 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI32_0
-; CHECK-NEXT:    vmov r4, r8, d8
+; CHECK-NEXT:    vmov r5, r6, d8
 ; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    vmov r10, r9, d0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI32_1
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    vmov r4, r11, d0
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_d2uiz
-; CHECK-NEXT:    vldr d0, .LCPI32_1
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    csel r7, r0, r9, ne
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    mov r1, r8
 ; CHECK-NEXT:    vmov r6, r5, d9
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r8, r3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    csel r0, r0, r8, ne
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #1
-; CHECK-NEXT:    and r0, r7, #1
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    movne r0, #1
 ; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    mov r3, r9
 ; CHECK-NEXT:    bfi r7, r0, #0, #1
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_d2uiz
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r0, r0, r4, ne
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r4, #1
-; CHECK-NEXT:    and r0, r4, #1
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    and r0, r0, #1
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r7, r0, #1, #1
 ; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
@@ -1925,11 +1990,11 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI32_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI32_1:
 ; CHECK-NEXT:    .long 0 @ double 1
 ; CHECK-NEXT:    .long 1072693248
+; CHECK-NEXT:  .LCPI32_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f)
     ret <2 x i1> %x
 }
@@ -1943,60 +2008,84 @@ define arm_aapcs_vfpcc <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI33_0
 ; CHECK-NEXT:    vmov r6, r7, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vldr d0, .LCPI33_1
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    csel r9, r0, r8, ne
-; CHECK-NEXT:    csel r8, r1, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r5, r4, d8
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    movne.w r9, #255
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r7, r1, r6, ne
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #255
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne r6, #255
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #255
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2021,60 +2110,84 @@ define arm_aapcs_vfpcc <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI34_0
 ; CHECK-NEXT:    vmov r6, r7, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vldr d0, .LCPI34_1
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    csel r9, r0, r8, ne
-; CHECK-NEXT:    csel r8, r1, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r5, r4, d8
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    movwne r9, #8191
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r7, r1, r6, ne
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #8191
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r6, #8191
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r0, #8191
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2099,60 +2212,84 @@ define arm_aapcs_vfpcc <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI35_0
 ; CHECK-NEXT:    vmov r6, r7, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vldr d0, .LCPI35_1
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    csel r9, r0, r8, ne
-; CHECK-NEXT:    csel r8, r1, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r5, r4, d8
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    movwne r9, #65535
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r7, r1, r6, ne
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r5, #65535
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movwne r6, #65535
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movwne r0, #65535
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2181,61 +2318,79 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI36_0
-; CHECK-NEXT:    vmov r11, r5, d8
-; CHECK-NEXT:    vmov r6, r7, d0
-; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI36_1
 ; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vldr d0, .LCPI36_1
-; CHECK-NEXT:    vmov r5, r8, d9
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vmov r3, r2, d0
-; CHECK-NEXT:    csel r9, r1, r4, ne
-; CHECK-NEXT:    csel r10, r0, r4, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    vmov r5, r7, d0
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    vmov r8, r6, d9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    csel r6, r0, r4, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    csel r4, r1, r4, ne
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movne r4, #0
-; CHECK-NEXT:    movwne r6, #65535
-; CHECK-NEXT:    movtne r6, #7
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movwne r10, #65535
-; CHECK-NEXT:    movtne r10, #7
-; CHECK-NEXT:    movne.w r9, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r10, r6
-; CHECK-NEXT:    vmov q0[3], q0[1], r9, r4
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r5, #65535
+; CHECK-NEXT:    movtne r5, #7
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r0, #65535
+; CHECK-NEXT:    movtne r0, #7
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -2243,11 +2398,11 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI36_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI36_1:
 ; CHECK-NEXT:    .long 0 @ double 524287
 ; CHECK-NEXT:    .long 1092616188
+; CHECK-NEXT:  .LCPI36_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f)
     ret <2 x i19> %x
 }
@@ -2261,60 +2416,84 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI37_0
 ; CHECK-NEXT:    vmov r6, r7, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vldr d0, .LCPI37_1
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    csel r9, r0, r8, ne
-; CHECK-NEXT:    csel r8, r1, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r5, r4, d8
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r8, #0
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r7, r1, r6, ne
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    movne r7, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2343,61 +2522,79 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI38_0
-; CHECK-NEXT:    vmov r11, r5, d8
-; CHECK-NEXT:    vmov r6, r7, d0
-; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    vmov r11, r10, d8
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    mov r2, r6
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    strd r2, r3, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI38_1
 ; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    vldr d0, .LCPI38_1
-; CHECK-NEXT:    vmov r5, r8, d9
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vmov r3, r2, d0
-; CHECK-NEXT:    csel r10, r0, r4, ne
-; CHECK-NEXT:    csel r9, r1, r4, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    vmov r5, r7, d0
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    vmov r8, r6, d9
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    lsr.w r9, r0, #5
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    csel r6, r1, r4, ne
-; CHECK-NEXT:    csel r4, r0, r4, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r8
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #0
+; CHECK-NEXT:    ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    movwne r6, #65535
-; CHECK-NEXT:    movtne r6, #3
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r2, r7
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ittt ne
-; CHECK-NEXT:    movwne r9, #65535
-; CHECK-NEXT:    movtne r9, #3
-; CHECK-NEXT:    movne.w r10, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r10, r4
-; CHECK-NEXT:    vmov q0[3], q0[1], r9, r6
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r7, #65535
+; CHECK-NEXT:    movtne r7, #3
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movwne r1, #65535
+; CHECK-NEXT:    movtne r1, #3
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r7
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -2405,11 +2602,11 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI38_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI38_1:
 ; CHECK-NEXT:    .long 4294967288 @ double 1125899906842623
 ; CHECK-NEXT:    .long 1125122047
+; CHECK-NEXT:  .LCPI38_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f)
     ret <2 x i50> %x
 }
@@ -2423,60 +2620,84 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI39_0
 ; CHECK-NEXT:    vmov r6, r7, d9
-; CHECK-NEXT:    vmov r11, r3, d0
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    strd r3, r2, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r7
 ; CHECK-NEXT:    bl __aeabi_d2ulz
 ; CHECK-NEXT:    vldr d0, .LCPI39_1
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    csel r9, r0, r8, ne
-; CHECK-NEXT:    csel r8, r1, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r1
+; CHECK-NEXT:    vmov r9, r8, d0
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    vmov r5, r4, d8
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    vmov r11, r4, d8
+; CHECK-NEXT:    lsrs r0, r0, #5
+; CHECK-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r5, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r8, #-1
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_d2ulz
-; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csel r7, r1, r6, ne
-; CHECK-NEXT:    csel r6, r0, r6, ne
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r5, #-1
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    movne.w r7, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT:    vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r7, #1
+; CHECK-NEXT:    ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r8, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    lsrs r4, r0, #5
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r10, #-1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2503,188 +2724,194 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    .pad #48
 ; CHECK-NEXT:    sub sp, #48
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vldr d0, .LCPI40_0
-; CHECK-NEXT:    vmov r11, r4, d8
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    vmov r2, r3, d0
-; CHECK-NEXT:    str r3, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vldr d0, .LCPI40_0
+; CHECK-NEXT:    vmov r6, r5, d8
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov r2, r7, d0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    mov r9, r2
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    vldr d0, .LCPI40_1
+; CHECK-NEXT:    mov r11, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    str r2, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    mov r10, r3
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    mov r5, r4
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    vldr d0, .LCPI40_1
 ; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    csel r4, r2, r8, ne
-; CHECK-NEXT:    vmov r10, r3, d0
-; CHECK-NEXT:    strd r1, r0, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    str r5, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r8, r3
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    strd r1, r0, [sp, #20] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r0, r2, r8, ne
+; CHECK-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    cmp.w r11, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r6, #8]
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r4, #8]
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r11, r6
+; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r8, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r4, #4]
 ; CHECK-NEXT:    mov r0, r11
 ; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    str.w r10, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    strd r4, r11, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r6, #4]
-; CHECK-NEXT:    mov r5, r6
-; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    ldr r6, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    str.w r11, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    mov r1, r6
-; CHECK-NEXT:    mov r0, r11
-; CHECK-NEXT:    mov r6, r8
-; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vmov r8, r11, d9
+; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r5]
-; CHECK-NEXT:    mov r10, r9
-; CHECK-NEXT:    str.w r9, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT:    mov r5, r7
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r4]
+; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r5, r9
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    mov r2, r6
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    mov r6, r10
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    mov r10, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    ldr r4, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    cmp.w r9, #0
-; CHECK-NEXT:    strd r3, r0, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r7, r1, r9, ne
-; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    add.w r12, sp, #16
+; CHECK-NEXT:    cmp.w r10, #0
+; CHECK-NEXT:    stm.w r12, {r0, r2, r3} @ 12-byte Folded Spill
+; CHECK-NEXT:    csel r9, r1, r10, ne
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r9, #-1
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r10, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    ldr r5, [sp, #44] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r7, #-1
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    csel r9, r1, r0, ne
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r2, r7
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r0, r9
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r0, #-1
-; CHECK-NEXT:    ldr.w r9, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    lsrl r0, r7, #28
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    str.w r0, [r9, #16]
+; CHECK-NEXT:    movne.w r4, #-1
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    csel r10, r1, r0, ne
+; CHECK-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    lsrl r4, r9, #28
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r2, r5
 ; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    csel r6, r1, r0, ne
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r10, #-1
-; CHECK-NEXT:    orr.w r0, r7, r10, lsl #4
-; CHECK-NEXT:    str.w r0, [r9, #20]
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    movne.w r6, #-1
+; CHECK-NEXT:    ldr r5, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    orr.w r0, r9, r6, lsl #4
 ; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r4
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r5, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    strd r4, r0, [r5, #16]
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r8, r6
+; CHECK-NEXT:    ldr.w r9, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r9
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    ldr.w r11, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r8, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r9
+; CHECK-NEXT:    mov r3, r10
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #15
-; CHECK-NEXT:    and r1, r7, #15
-; CHECK-NEXT:    lsrl r10, r1, #28
-; CHECK-NEXT:    strb.w r10, [r9, #24]
-; CHECK-NEXT:    ldr r6, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
+; CHECK-NEXT:    movne r0, #15
+; CHECK-NEXT:    and r1, r0, #15
+; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    strb r6, [r5, #24]
+; CHECK-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    ldr r4, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r7, r1, r0, ne
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    mov r1, r4
-; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r2, r11
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #40] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r7, #15
-; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r7, #15
+; CHECK-NEXT:    movne r0, #15
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    and r0, r0, #15
 ; CHECK-NEXT:    orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT:    str.w r0, [r9, #12]
+; CHECK-NEXT:    str r0, [r5, #12]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
@@ -2692,11 +2919,11 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI40_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI40_1:
 ; CHECK-NEXT:    .long 4294967295 @ double 1.2676506002282293E+30
 ; CHECK-NEXT:    .long 1177550847
+; CHECK-NEXT:  .LCPI40_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
     ret <2 x i100> %x
 }
@@ -2710,185 +2937,196 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    .pad #24
-; CHECK-NEXT:    sub sp, #24
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI41_0
 ; CHECK-NEXT:    vmov r8, r7, d9
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r2, r9, d0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r11, r2
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    vmov r6, r4, d0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    bl __fixunsdfti
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    vldr d0, .LCPI41_1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r6, r3, r6, ne
-; CHECK-NEXT:    vmov r10, r5, d0
-; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    str r5, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    vmov r10, r11, d0
 ; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    bl __fixunsdfti
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r0, r3, r5, ne
+; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    cmp.w r9, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r6, #-1
-; CHECK-NEXT:    str r6, [r4, #28]
-; CHECK-NEXT:    str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    csel r5, r1, r0, ne
-; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr r5, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    str r0, [r5, #28]
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r5, #-1
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    mov r2, r11
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str r5, [r4, #24]
-; CHECK-NEXT:    mov r5, r4
-; CHECK-NEXT:    mov r4, r9
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    str.w r10, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r6
-; CHECK-NEXT:    csel r9, r1, r0, ne
+; CHECK-NEXT:    mov r2, r6
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r5, #24]
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    str r4, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r9, #-1
-; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r9, r0
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    str.w r9, [r5, #20]
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:    vmov r6, r11, d8
-; CHECK-NEXT:    mov r9, r4
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    vmov r6, r5, d8
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r9, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    ldr.w r9, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    mov r1, r7
-; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    str.w r0, [r9, #20]
 ; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    mov r7, r11
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    str.w r11, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r11, r9
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r5, #16]
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str.w r0, [r9, #16]
+; CHECK-NEXT:    ldr.w r8, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    ldr.w r9, [sp] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r7
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __fixunsdfti
-; CHECK-NEXT:    cmp.w r8, #0
-; CHECK-NEXT:    strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT:    csel r4, r3, r8, ne
-; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r7
-; CHECK-NEXT:    mov r8, r7
-; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT:    csel r0, r3, r7, ne
+; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    cmp.w r10, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str.w r0, [r11, #12]
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r8
 ; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    str r4, [r7, #12]
-; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r4, r1, r0, ne
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r7, r11
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r10, r4
+; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r7, #8]
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
 ; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r7, #8]
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mov r11, r0
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
-; CHECK-NEXT:    mov r2, r5
-; CHECK-NEXT:    mov r3, r9
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r7, #4]
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:    bl __aeabi_dcmpge
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r2, r10
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    csel r4, r1, r0, ne
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r11, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r7, #4]
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    mov r1, r11
+; CHECK-NEXT:    mov r1, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r8, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    mov r2, r10
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:    bl __aeabi_dcmpge
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    cmp.w r8, #0
 ; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r4, #-1
-; CHECK-NEXT:    str r4, [r7]
-; CHECK-NEXT:    add sp, #24
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    str r0, [r7]
+; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI41_0:
-; CHECK-NEXT:    .long 0 @ double 0
-; CHECK-NEXT:    .long 0
-; CHECK-NEXT:  .LCPI41_1:
 ; CHECK-NEXT:    .long 4294967295 @ double 3.4028236692093843E+38
 ; CHECK-NEXT:    .long 1206910975
+; CHECK-NEXT:  .LCPI41_1:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
     %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
     ret <2 x i128> %x
 }
@@ -3333,86 +3571,81 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    .pad #24
 ; CHECK-NEXT:    sub sp, #24
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    vcvtb.f32.f16 s22, s18
-; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s22, s17
 ; CHECK-NEXT:    vmov r0, s22
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcvtt.f32.f16 s26, s17
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    vcvtb.f32.f16 s24, s18
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    vmov r0, s24
 ; CHECK-NEXT:    vcvtt.f32.f16 s20, s18
-; CHECK-NEXT:    vcvtb.f32.f16 s24, s17
+; CHECK-NEXT:    vldr s18, .LCPI48_0
 ; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
+; CHECK-NEXT:    vcvtt.f32.f16 s26, s17
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    vmov r8, s20
-; CHECK-NEXT:    vldr s18, .LCPI48_0
-; CHECK-NEXT:    vmov r9, s24
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    vcmp.f32 s22, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    mov r7, r0
-; CHECK-NEXT:    mov r0, r9
-; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    itt gt
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #3
+; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vmov r5, s26
+; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    vmov r6, s20
 ; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vcmp.f32 s24, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r1, #0
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    vcmp.f32 s24, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcmp.f32 s26, s18
-; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #3
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    vcmp.f32 s22, s18
-; CHECK-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r7, #-1
-; CHECK-NEXT:    str r7, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r5, #65535
-; CHECK-NEXT:    movtgt r5, #3
-; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    bl __aeabi_f2ulz
+; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt lt
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    vcmp.f32 s26, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    str.w r6, [r10, #25]
 ; CHECK-NEXT:    itt gt
-; CHECK-NEXT:    movwgt r4, #65535
-; CHECK-NEXT:    movtgt r4, #3
-; CHECK-NEXT:    str r4, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    movwgt r1, #65535
+; CHECK-NEXT:    movtgt r1, #3
+; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    str.w r7, [r4, #25]
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r0, #-1
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    vcmp.f32 s20, #0
 ; CHECK-NEXT:    mov r6, r0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s20, s18
 ; CHECK-NEXT:    vcvtb.f32.f16 s20, s19
-; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    vmov r0, s20
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    movwgt r5, #65535
-; CHECK-NEXT:    movtgt r5, #3
+; CHECK-NEXT:    movwgt r7, #65535
+; CHECK-NEXT:    movtgt r7, #3
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    vcmp.f32 s20, #0
 ; CHECK-NEXT:    mov r9, r0
@@ -3435,16 +3668,16 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s20, s18
 ; CHECK-NEXT:    vcvtb.f32.f16 s20, s16
-; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    vmov r0, s20
 ; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r5, #0
 ; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    movwgt r7, #65535
-; CHECK-NEXT:    movtgt r7, #3
+; CHECK-NEXT:    movwgt r5, #65535
+; CHECK-NEXT:    movtgt r5, #3
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    vcmp.f32 s20, #0
 ; CHECK-NEXT:    mov r8, r1
@@ -3457,36 +3690,36 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    ldr r4, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    mov r2, r9
-; CHECK-NEXT:    lsrl r2, r11, #28
-; CHECK-NEXT:    bfc r5, #18, #14
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    lsr.w r0, r7, #10
-; CHECK-NEXT:    bfc r7, #18, #14
-; CHECK-NEXT:    lsll r10, r7, #22
-; CHECK-NEXT:    orr.w r1, r11, r7
+; CHECK-NEXT:    lsrs r0, r5, #10
+; CHECK-NEXT:    bfc r5, #18, #14
+; CHECK-NEXT:    lsll r10, r5, #22
+; CHECK-NEXT:    lsrl r2, r11, #28
+; CHECK-NEXT:    orr.w r1, r11, r5
 ; CHECK-NEXT:    str.w r1, [r4, #45]
 ; CHECK-NEXT:    orr.w r1, r2, r10
 ; CHECK-NEXT:    str.w r1, [r4, #41]
 ; CHECK-NEXT:    strb.w r0, [r4, #49]
+; CHECK-NEXT:    bfc r7, #18, #14
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    lsrl r0, r5, #14
-; CHECK-NEXT:    mov r7, r4
-; CHECK-NEXT:    orr.w r1, r5, r9, lsl #4
+; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
+; CHECK-NEXT:    lsrl r0, r7, #14
+; CHECK-NEXT:    mov r5, r4
+; CHECK-NEXT:    orr.w r1, r7, r9, lsl #4
 ; CHECK-NEXT:    str.w r1, [r4, #37]
 ; CHECK-NEXT:    str.w r0, [r4, #33]
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    bfc r0, #18, #14
 ; CHECK-NEXT:    orr.w r0, r0, r6, lsl #18
 ; CHECK-NEXT:    str.w r0, [r4, #29]
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    ldr.w r9, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, r7
+; CHECK-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    bfc r1, #18, #14
 ; CHECK-NEXT:    bfc r3, #18, #14
 ; CHECK-NEXT:    mov r6, r9
@@ -3496,7 +3729,7 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    movwgt r8, #65535
 ; CHECK-NEXT:    movtgt r8, #3
 ; CHECK-NEXT:    orrs r1, r3
-; CHECK-NEXT:    str r1, [r7, #20]
+; CHECK-NEXT:    str r1, [r5, #20]
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    orr.w r2, r6, r4
@@ -3510,18 +3743,19 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movwgt r1, #65535
 ; CHECK-NEXT:    movtgt r1, #3
-; CHECK-NEXT:    str r2, [r7, #16]
-; CHECK-NEXT:    lsr.w r2, r5, #10
-; CHECK-NEXT:    strb r2, [r7, #24]
+; CHECK-NEXT:    str r2, [r5, #16]
+; CHECK-NEXT:    lsrs r2, r7, #10
+; CHECK-NEXT:    strb r2, [r5, #24]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    mov r2, r0
 ; CHECK-NEXT:    bfc r1, #18, #14
-; CHECK-NEXT:    lsrl r2, r1, #14
 ; CHECK-NEXT:    orr.w r0, r8, r0, lsl #18
+; CHECK-NEXT:    lsrl r2, r1, #14
 ; CHECK-NEXT:    orr.w r1, r1, r9, lsl #4
-; CHECK-NEXT:    strd r2, r1, [r7, #8]
-; CHECK-NEXT:    str r0, [r7, #4]
+; CHECK-NEXT:    strd r2, r1, [r5, #8]
+; CHECK-NEXT:    str r0, [r5, #4]
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    add sp, #4
@@ -3544,38 +3778,38 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
 ; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vcvtt.f32.f16 s22, s19
-; CHECK-NEXT:    vmov r0, s22
+; CHECK-NEXT:    vcvtt.f32.f16 s20, s19
+; CHECK-NEXT:    vmov r0, s20
 ; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s19
+; CHECK-NEXT:    vcvtb.f32.f16 s22, s19
 ; CHECK-NEXT:    mov r9, r0
-; CHECK-NEXT:    vmov r0, s26
+; CHECK-NEXT:    vmov r0, s22
 ; CHECK-NEXT:    vldr s28, .LCPI49_0
-; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    vcmp.f32 s20, #0
 ; CHECK-NEXT:    mov r8, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcvtt.f32.f16 s20, s16
 ; CHECK-NEXT:    vcvtt.f32.f16 s24, s18
+; CHECK-NEXT:    vcvtt.f32.f16 s26, s16
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r9, #0
 ; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    vcmp.f32 s22, s28
+; CHECK-NEXT:    vcmp.f32 s20, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vmov r6, s20
 ; CHECK-NEXT:    vmov r4, s24
+; CHECK-NEXT:    vmov r6, s26
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r8, #-1
 ; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    mov r10, r0
-; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    mov r0, r4
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt.w r11, #0
 ; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    vcmp.f32 s26, s28
+; CHECK-NEXT:    vcmp.f32 s22, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r10, #-1
@@ -3599,12 +3833,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
 ; CHECK-NEXT:    mov r7, r0
 ; CHECK-NEXT:    vmov r0, s16
 ; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    vcmp.f32 s20, #0
+; CHECK-NEXT:    vcmp.f32 s26, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt lt
 ; CHECK-NEXT:    movlt r7, #0
 ; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    vcmp.f32 s20, s28
+; CHECK-NEXT:    vcmp.f32 s26, s28
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r6, #-1
@@ -3694,86 +3928,61 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12}
 ; CHECK-NEXT:    .pad #56
-; CHECK-NEXT:    sub sp, #56
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtb.f32.f16 s22, s17
-; CHECK-NEXT:    vmov r0, s22
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcvtb.f32.f16 s24, s18
-; CHECK-NEXT:    mov r8, r0
-; CHECK-NEXT:    vmov r0, s24
-; CHECK-NEXT:    vldr s20, .LCPI50_0
-; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    mov r9, r1
-; CHECK-NEXT:    mov r10, r2
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt.w r10, #0
-; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp, #52] @ 4-byte Spill
-; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r8, #-1
-; CHECK-NEXT:    movgt.w r9, #-1
-; CHECK-NEXT:    movgt.w r10, #-1
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcvtb.f32.f16 s22, s19
-; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    sub sp, #56
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtt.f32.f16 s22, s16
 ; CHECK-NEXT:    vmov r0, s22
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    vcmp.f32 s24, #0
-; CHECK-NEXT:    mov r7, r2
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s17
+; CHECK-NEXT:    mov r7, r0
+; CHECK-NEXT:    vmov r0, s24
+; CHECK-NEXT:    vldr s20, .LCPI50_0
+; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r7, #0
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s24, s20
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp, #48] @ 4-byte Spill
-; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r5, #-1
-; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    str r3, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    str r2, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r7, #-1
+; CHECK-NEXT:    str r7, [sp, #40] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    vcmp.f32 s24, #0
+; CHECK-NEXT:    vcvtt.f32.f16 s22, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    str.w r2, [r4, #83]
+; CHECK-NEXT:    movgt r3, #15
+; CHECK-NEXT:    str r3, [sp, #36] @ 4-byte Spill
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    str.w r1, [r4, #79]
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    str r2, [sp, #32] @ 4-byte Spill
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcvtt.f32.f16 s22, s16
-; CHECK-NEXT:    str.w r0, [r4, #75]
+; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r0, s22
-; CHECK-NEXT:    str.w r7, [r4, #58]
-; CHECK-NEXT:    str.w r6, [r4, #54]
-; CHECK-NEXT:    str.w r5, [r4, #50]
-; CHECK-NEXT:    str.w r10, [r4, #33]
-; CHECK-NEXT:    str.w r9, [r4, #29]
-; CHECK-NEXT:    str.w r8, [r4, #25]
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -3786,85 +3995,115 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    str r2, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    str r1, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    vcvtt.f32.f16 s22, s17
-; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    vcvtb.f32.f16 s22, s17
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    vmov r0, s22
 ; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcvtb.f32.f16 s18, s18
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    mov r8, r1
 ; CHECK-NEXT:    vcmp.f32 s22, #0
-; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r6, r2
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt r5, #0
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt.w r8, #0
+; CHECK-NEXT:    movlt.w r9, #0
 ; CHECK-NEXT:    movlt r3, #0
 ; CHECK-NEXT:    vcmp.f32 s22, s20
-; CHECK-NEXT:    vcvtt.f32.f16 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r2, #-1
-; CHECK-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    movgt.w r9, #-1
+; CHECK-NEXT:    movgt.w r8, #-1
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s20
+; CHECK-NEXT:    vcvtb.f32.f16 s18, s19
+; CHECK-NEXT:    mov r11, r1
 ; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    mov r7, r2
+; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt.w r11, #0
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt r3, #15
+; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    mov r9, r1
-; CHECK-NEXT:    mov r8, r2
+; CHECK-NEXT:    mov r10, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itttt lt
+; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    movlt.w r9, #0
-; CHECK-NEXT:    movlt.w r8, #0
-; CHECK-NEXT:    movlt r3, #0
+; CHECK-NEXT:    movlt r1, #0
+; CHECK-NEXT:    movlt r2, #0
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    str.w r2, [r4, #83]
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    str.w r1, [r4, #79]
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r3, #15
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    ittt gt
-; CHECK-NEXT:    movgt.w r8, #-1
-; CHECK-NEXT:    movgt.w r9, #-1
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    vcvtt.f32.f16 s18, s19
-; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    str.w r0, [r4, #75]
 ; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    str.w r7, [r4, #58]
+; CHECK-NEXT:    str.w r11, [r4, #54]
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    str.w r5, [r4, #50]
+; CHECK-NEXT:    str.w r6, [r4, #33]
+; CHECK-NEXT:    str.w r8, [r4, #29]
+; CHECK-NEXT:    str.w r9, [r4, #25]
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    movgt.w r10, #15
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcvtb.f32.f16 s16, s16
-; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    mov r7, r1
 ; CHECK-NEXT:    vcmp.f32 s18, #0
-; CHECK-NEXT:    mov r10, r2
-; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r8, r3
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt.w r11, #0
-; CHECK-NEXT:    movlt r6, #0
-; CHECK-NEXT:    movlt.w r10, #0
 ; CHECK-NEXT:    movlt r7, #0
+; CHECK-NEXT:    movlt r5, #0
+; CHECK-NEXT:    movlt r6, #0
+; CHECK-NEXT:    movlt.w r8, #0
 ; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    itttt gt
-; CHECK-NEXT:    movgt r7, #15
-; CHECK-NEXT:    movgt.w r10, #-1
+; CHECK-NEXT:    movgt.w r8, #15
 ; CHECK-NEXT:    movgt.w r6, #-1
-; CHECK-NEXT:    movgt.w r11, #-1
+; CHECK-NEXT:    movgt.w r5, #-1
+; CHECK-NEXT:    movgt.w r7, #-1
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
@@ -3884,65 +4123,65 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    str r0, [r4]
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    lsrl r0, r11, #28
-; CHECK-NEXT:    and r1, r7, #15
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    lsrl r0, r7, #28
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    orr.w r1, r7, r6, lsl #4
+; CHECK-NEXT:    str.w r1, [r4, #95]
+; CHECK-NEXT:    and r1, r8, #15
 ; CHECK-NEXT:    str.w r0, [r4, #91]
-; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r2
-; CHECK-NEXT:    lsrl r0, r9, #28
-; CHECK-NEXT:    str.w r0, [r4, #66]
-; CHECK-NEXT:    ldr.w lr, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, lr
-; CHECK-NEXT:    lsrl r0, r5, #28
-; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    str.w r0, [r4, #41]
-; CHECK-NEXT:    ldr.w r12, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    ldr r5, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    mov r0, r12
-; CHECK-NEXT:    lsrl r0, r5, #28
-; CHECK-NEXT:    str r0, [r4, #16]
-; CHECK-NEXT:    orr.w r0, r11, r10, lsl #4
-; CHECK-NEXT:    lsrl r10, r1, #28
-; CHECK-NEXT:    str.w r0, [r4, #95]
-; CHECK-NEXT:    strb.w r10, [r4, #99]
-; CHECK-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, r6, lsl #4
+; CHECK-NEXT:    and r0, r10, #15
+; CHECK-NEXT:    lsrl r6, r1, #28
+; CHECK-NEXT:    strb.w r6, [r4, #99]
+; CHECK-NEXT:    orr.w r0, r0, r5, lsl #4
 ; CHECK-NEXT:    str.w r0, [r4, #87]
-; CHECK-NEXT:    orr.w r0, r9, r8, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #70]
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    str.w r1, [r4, #70]
+; CHECK-NEXT:    str.w r0, [r4, #66]
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
-; CHECK-NEXT:    lsrl r8, r1, #28
-; CHECK-NEXT:    strb.w r8, [r4, #74]
-; CHECK-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r2, r1, #28
+; CHECK-NEXT:    strb.w r2, [r4, #74]
+; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, r2, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
 ; CHECK-NEXT:    str.w r0, [r4, #62]
-; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r0, r2, lsl #4
-; CHECK-NEXT:    str.w r0, [r4, #45]
-; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    str.w r1, [r4, #45]
+; CHECK-NEXT:    str.w r0, [r4, #41]
+; CHECK-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
 ; CHECK-NEXT:    strb.w r2, [r4, #49]
-; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    and r0, r0, #15
-; CHECK-NEXT:    orr.w r0, r0, lr, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
 ; CHECK-NEXT:    str.w r0, [r4, #37]
-; CHECK-NEXT:    ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    orr.w r0, r5, r2, lsl #4
-; CHECK-NEXT:    str r0, [r4, #20]
-; CHECK-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    lsrl r0, r1, #28
+; CHECK-NEXT:    orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT:    strd r0, r1, [r4, #16]
+; CHECK-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
 ; CHECK-NEXT:    and r1, r0, #15
 ; CHECK-NEXT:    lsrl r2, r1, #28
 ; CHECK-NEXT:    strb r2, [r4, #24]
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt r3, #15
 ; CHECK-NEXT:    and r0, r3, #15
-; CHECK-NEXT:    orr.w r0, r0, r12, lsl #4
+; CHECK-NEXT:    orr.w r0, r0, r7, lsl #4
 ; CHECK-NEXT:    str r0, [r4, #12]
 ; CHECK-NEXT:    add sp, #56
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12}
@@ -3967,61 +4206,64 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vcvtt.f32.f16 s24, s19
-; CHECK-NEXT:    vcvtb.f32.f16 s22, s16
-; CHECK-NEXT:    vmov r0, s24
+; CHECK-NEXT:    vcvtt.f32.f16 s22, s19
+; CHECK-NEXT:    vmov r0, s22
+; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcvtb.f32.f16 s28, s19
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    vmov r0, s28
 ; CHECK-NEXT:    vldr s20, .LCPI51_0
-; CHECK-NEXT:    vmov r5, s22
-; CHECK-NEXT:    vmov r7, s28
-; CHECK-NEXT:    vcvtt.f32.f16 s26, s18
-; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s24, #0
-; CHECK-NEXT:    add.w r12, r4, #112
+; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s22, s20
+; CHECK-NEXT:    vcvtb.f32.f16 s26, s18
 ; CHECK-NEXT:    itttt lt
-; CHECK-NEXT:    movlt r0, #0
+; CHECK-NEXT:    movlt r6, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s24, s20
-; CHECK-NEXT:    vcvtb.f32.f16 s18, s18
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r3, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
-; CHECK-NEXT:    movgt.w r0, #-1
-; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    vmov r6, s26
+; CHECK-NEXT:    movgt.w r6, #-1
+; CHECK-NEXT:    strd r6, r1, [r4, #112]
+; CHECK-NEXT:    vmov r7, s24
+; CHECK-NEXT:    vmov r5, s26
+; CHECK-NEXT:    vcvtt.f32.f16 s18, s17
+; CHECK-NEXT:    strd r2, r3, [r4, #120]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    vcmp.f32 s28, #0
 ; CHECK-NEXT:    add.w r12, r4, #96
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s28, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s28, s20
-; CHECK-NEXT:    vcvtt.f32.f16 s24, s17
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r3, #-1
-; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    vmov r7, s18
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    vmov r6, s18
+; CHECK-NEXT:    vcvtb.f32.f16 s22, s17
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    vcmp.f32 s24, #0
 ; CHECK-NEXT:    add.w r12, r4, #80
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vcmp.f32 s26, s20
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
@@ -4030,105 +4272,116 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r3, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r7
-; CHECK-NEXT:    vmov r6, s24
-; CHECK-NEXT:    vcvtb.f32.f16 s26, s17
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    vcvtt.f32.f16 s24, s16
+; CHECK-NEXT:    vmov r7, s22
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s18, #0
+; CHECK-NEXT:    vcmp.f32 s26, #0
 ; CHECK-NEXT:    add.w r12, r4, #64
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s18, s20
-; CHECK-NEXT:    vcvtt.f32.f16 s16, s16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r3, #-1
-; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r6
-; CHECK-NEXT:    vmov r7, s26
+; CHECK-NEXT:    vmov r5, s24
+; CHECK-NEXT:    vcvtb.f32.f16 s16, s16
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s24, #0
+; CHECK-NEXT:    vcmp.f32 s18, #0
 ; CHECK-NEXT:    add.w r12, r4, #48
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s18, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r3, #-1
-; CHECK-NEXT:    ittt gt
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r2, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    itt gt
 ; CHECK-NEXT:    movgt.w r1, #-1
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
 ; CHECK-NEXT:    mov r0, r7
 ; CHECK-NEXT:    vmov r6, s16
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s26, #0
+; CHECK-NEXT:    vcmp.f32 s22, #0
 ; CHECK-NEXT:    add.w r12, r4, #32
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s26, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r3, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s16, #0
+; CHECK-NEXT:    vcmp.f32 s24, #0
 ; CHECK-NEXT:    add.w r12, r4, #16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s24, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r3, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r0, r6
 ; CHECK-NEXT:    bl __fixunssfti
-; CHECK-NEXT:    vcmp.f32 s22, #0
+; CHECK-NEXT:    vcmp.f32 s16, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vcmp.f32 s16, s20
 ; CHECK-NEXT:    itttt lt
 ; CHECK-NEXT:    movlt r0, #0
 ; CHECK-NEXT:    movlt r1, #0
 ; CHECK-NEXT:    movlt r2, #0
 ; CHECK-NEXT:    movlt r3, #0
-; CHECK-NEXT:    vcmp.f32 s22, s20
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    itttt gt
+; CHECK-NEXT:    ittt gt
 ; CHECK-NEXT:    movgt.w r3, #-1
 ; CHECK-NEXT:    movgt.w r2, #-1
 ; CHECK-NEXT:    movgt.w r1, #-1
+; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    it gt
 ; CHECK-NEXT:    movgt.w r0, #-1
 ; CHECK-NEXT:    stm r4!, {r0, r1, r2, r3}
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}

diff  --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
index 96aff0233e4d9a..9738f7ade6fe9d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
@@ -623,9 +623,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v4i32(ptr nocapture readonly %x, p
 ; CHECK-NEXT:    vldrw.u32 q3, [r0, q0]
 ; CHECK-NEXT:    vldrw.u32 q4, [r0, q1, uxtw #2]
 ; CHECK-NEXT:    vldrw.u32 q5, [r0, q2]
-; CHECK-NEXT:    adds r0, #48
-; CHECK-NEXT:    vmul.i32 q3, q4, q3
 ; CHECK-NEXT:    subs r2, #4
+; CHECK-NEXT:    vmul.i32 q3, q4, q3
+; CHECK-NEXT:    add.w r0, r0, #48
 ; CHECK-NEXT:    vmul.i32 q5, q4, q5
 ; CHECK-NEXT:    vmul.i32 q4, q4, r3
 ; CHECK-NEXT:    vstrw.32 q4, [r1, q1, uxtw #2]
@@ -705,9 +705,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v4i8(ptr nocapture readonly %x, pt
 ; CHECK-NEXT:    vldrb.u32 q3, [r0, q0]
 ; CHECK-NEXT:    vldrb.u32 q4, [r0, q1]
 ; CHECK-NEXT:    vldrb.u32 q5, [r0, q2]
-; CHECK-NEXT:    adds r0, #12
-; CHECK-NEXT:    vmul.i32 q3, q4, q3
 ; CHECK-NEXT:    subs r2, #4
+; CHECK-NEXT:    vmul.i32 q3, q4, q3
+; CHECK-NEXT:    add.w r0, r0, #12
 ; CHECK-NEXT:    vmul.i32 q5, q4, q5
 ; CHECK-NEXT:    vmul.i32 q4, q4, r3
 ; CHECK-NEXT:    vstrb.32 q4, [r1, q1]
@@ -793,9 +793,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v8i16(ptr nocapture readonly %x, p
 ; CHECK-NEXT:    vldrh.u16 q3, [r0, q0]
 ; CHECK-NEXT:    vldrh.u16 q4, [r0, q1, uxtw #1]
 ; CHECK-NEXT:    vldrh.u16 q5, [r0, q2]
-; CHECK-NEXT:    adds r0, #48
-; CHECK-NEXT:    vmul.i16 q3, q4, q3
 ; CHECK-NEXT:    subs r2, #4
+; CHECK-NEXT:    vmul.i16 q3, q4, q3
+; CHECK-NEXT:    add.w r0, r0, #48
 ; CHECK-NEXT:    vmul.i16 q5, q4, q5
 ; CHECK-NEXT:    vmul.i16 q4, q4, r3
 ; CHECK-NEXT:    vstrh.16 q4, [r1, q1, uxtw #1]
@@ -887,9 +887,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v16i8(ptr nocapture readonly %x, p
 ; CHECK-NEXT:    vldrb.u8 q3, [r0, q0]
 ; CHECK-NEXT:    vldrb.u8 q4, [r0, q1]
 ; CHECK-NEXT:    vldrb.u8 q5, [r0, q2]
-; CHECK-NEXT:    adds r0, #48
-; CHECK-NEXT:    vmul.i8 q3, q4, q3
 ; CHECK-NEXT:    subs r2, #4
+; CHECK-NEXT:    vmul.i8 q3, q4, q3
+; CHECK-NEXT:    add.w r0, r0, #48
 ; CHECK-NEXT:    vmul.i8 q5, q4, q5
 ; CHECK-NEXT:    vmul.i8 q4, q4, r3
 ; CHECK-NEXT:    vstrb.8 q4, [r1, q1]

diff  --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
index acbe48f9e59271..fe28f785623ed5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
@@ -307,83 +307,82 @@ define arm_aapcs_vfpcc <4 x i32> @ext_ops_trunc_i32(<4 x i32> %a, <4 x i32> %b)
 ; CHECK-NEXT:    vmov.f32 s10, s7
 ; CHECK-NEXT:    vmov r10, s8
 ; CHECK-NEXT:    vmov.f32 s8, s6
+; CHECK-NEXT:    vmov r7, s2
+; CHECK-NEXT:    vmov.f32 s2, s1
 ; CHECK-NEXT:    vmov.f32 s6, s5
 ; CHECK-NEXT:    vmov r2, s8
 ; CHECK-NEXT:    asr.w r0, r10, #31
-; CHECK-NEXT:    adds.w r6, r10, r2
-; CHECK-NEXT:    eor.w r7, r10, r2
+; CHECK-NEXT:    asrs r5, r7, #31
+; CHECK-NEXT:    adds.w r4, r10, r2
+; CHECK-NEXT:    eor.w r6, r10, r2
 ; CHECK-NEXT:    adc r3, r0, #0
-; CHECK-NEXT:    asrl r6, r3, r2
-; CHECK-NEXT:    subs r0, r6, r2
-; CHECK-NEXT:    vmov r6, s2
+; CHECK-NEXT:    asrl r4, r3, r2
+; CHECK-NEXT:    subs r0, r4, r2
 ; CHECK-NEXT:    sbc lr, r3, #0
 ; CHECK-NEXT:    vmov r3, s10
-; CHECK-NEXT:    vmov.f32 s2, s1
 ; CHECK-NEXT:    umull r0, r8, r0, r2
-; CHECK-NEXT:    asrs r5, r6, #31
-; CHECK-NEXT:    adds r4, r6, r3
+; CHECK-NEXT:    adds r4, r7, r3
+; CHECK-NEXT:    eor.w r1, r7, r3
 ; CHECK-NEXT:    adc r5, r5, #0
-; CHECK-NEXT:    eor.w r1, r6, r3
 ; CHECK-NEXT:    asrl r4, r5, r3
 ; CHECK-NEXT:    subs r4, r4, r3
 ; CHECK-NEXT:    sbc r5, r5, #0
-; CHECK-NEXT:    orrs.w r7, r7, r10, asr #31
+; CHECK-NEXT:    orrs.w r6, r6, r10, asr #31
 ; CHECK-NEXT:    umull r4, r12, r4, r3
 ; CHECK-NEXT:    csetm r9, eq
-; CHECK-NEXT:    orrs.w r1, r1, r6, asr #31
-; CHECK-NEXT:    mov.w r7, #0
+; CHECK-NEXT:    orrs.w r1, r1, r7, asr #31
+; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    csetm r1, eq
-; CHECK-NEXT:    bfi r7, r9, #0, #8
+; CHECK-NEXT:    bfi r6, r9, #0, #8
 ; CHECK-NEXT:    mla r5, r5, r3, r12
-; CHECK-NEXT:    bfi r7, r1, #8, #8
-; CHECK-NEXT:    rsbs r1, r6, #0
-; CHECK-NEXT:    vmsr p0, r7
+; CHECK-NEXT:    bfi r6, r1, #8, #8
+; CHECK-NEXT:    rsbs r1, r7, #0
 ; CHECK-NEXT:    mla r7, lr, r2, r8
 ; CHECK-NEXT:    lsll r4, r5, r1
 ; CHECK-NEXT:    rsb.w r1, r10, #0
-; CHECK-NEXT:    lsll r4, r5, r3
 ; CHECK-NEXT:    lsll r0, r7, r1
-; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    vmov lr, s2
 ; CHECK-NEXT:    vmov r1, s6
 ; CHECK-NEXT:    lsll r0, r7, r2
+; CHECK-NEXT:    lsll r4, r5, r3
+; CHECK-NEXT:    vmsr p0, r6
 ; CHECK-NEXT:    vmov q3[2], q3[0], r0, r4
 ; CHECK-NEXT:    mov.w r12, #0
 ; CHECK-NEXT:    vpsel q2, q3, q2
-; CHECK-NEXT:    adds r2, r3, r1
-; CHECK-NEXT:    asr.w r0, r3, #31
-; CHECK-NEXT:    adc r5, r0, #0
-; CHECK-NEXT:    asrl r2, r5, r1
+; CHECK-NEXT:    adds.w r2, lr, r1
+; CHECK-NEXT:    asr.w r0, lr, #31
+; CHECK-NEXT:    adc r3, r0, #0
+; CHECK-NEXT:    asrl r2, r3, r1
 ; CHECK-NEXT:    subs r0, r2, r1
 ; CHECK-NEXT:    vmov r2, s0
-; CHECK-NEXT:    sbc r8, r5, #0
-; CHECK-NEXT:    umull r4, lr, r0, r1
-; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    sbc r7, r3, #0
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    umull r0, r6, r0, r1
 ; CHECK-NEXT:    asrs r5, r2, #31
-; CHECK-NEXT:    adds r6, r2, r0
-; CHECK-NEXT:    adc r7, r5, #0
-; CHECK-NEXT:    mla r5, r8, r1, lr
-; CHECK-NEXT:    asrl r6, r7, r0
-; CHECK-NEXT:    subs.w r8, r6, r0
-; CHECK-NEXT:    eor.w r6, r2, r0
-; CHECK-NEXT:    sbc lr, r7, #0
-; CHECK-NEXT:    eor.w r7, r3, r1
-; CHECK-NEXT:    orrs.w r6, r6, r2, asr #31
-; CHECK-NEXT:    orr.w r7, r7, r3, asr #31
-; CHECK-NEXT:    csetm r6, eq
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    bfi r12, r6, #0, #8
+; CHECK-NEXT:    adds r4, r2, r3
+; CHECK-NEXT:    adc r5, r5, #0
+; CHECK-NEXT:    asrl r4, r5, r3
+; CHECK-NEXT:    subs r4, r4, r3
+; CHECK-NEXT:    sbc r8, r5, #0
+; CHECK-NEXT:    mla r5, r7, r1, r6
+; CHECK-NEXT:    eor.w r6, lr, r1
+; CHECK-NEXT:    orrs.w r6, r6, lr, asr #31
+; CHECK-NEXT:    eor.w r7, r2, r3
 ; CHECK-NEXT:    csetm r6, eq
+; CHECK-NEXT:    orrs.w r7, r7, r2, asr #31
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    rsb.w lr, lr, #0
+; CHECK-NEXT:    bfi r12, r7, #0, #8
+; CHECK-NEXT:    lsll r0, r5, lr
 ; CHECK-NEXT:    bfi r12, r6, #8, #8
-; CHECK-NEXT:    umull r6, r7, r8, r0
-; CHECK-NEXT:    rsb.w r8, r3, #0
-; CHECK-NEXT:    lsll r4, r5, r8
-; CHECK-NEXT:    vmsr p0, r12
-; CHECK-NEXT:    mla r3, lr, r0, r7
-; CHECK-NEXT:    lsll r4, r5, r1
+; CHECK-NEXT:    umull r4, r6, r4, r3
+; CHECK-NEXT:    lsll r0, r5, r1
 ; CHECK-NEXT:    rsbs r1, r2, #0
-; CHECK-NEXT:    lsll r6, r3, r1
-; CHECK-NEXT:    lsll r6, r3, r0
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r4
+; CHECK-NEXT:    vmsr p0, r12
+; CHECK-NEXT:    mla r7, r8, r3, r6
+; CHECK-NEXT:    lsll r4, r7, r1
+; CHECK-NEXT:    lsll r4, r7, r3
+; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    vmov.f32 s1, s2
 ; CHECK-NEXT:    vmov.f32 s2, s8

diff  --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
index d9ef1030ee9222..55a621eaf4c9cc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
@@ -6,8 +6,8 @@ declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone
 define arm_aapcs_vfpcc i8 @smaxi8(i8 %a, i8 %b) {
 ; CHECK-LABEL: smaxi8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    sxtb r0, r0
+; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, gt
 ; CHECK-NEXT:    bx lr
@@ -20,8 +20,8 @@ declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone
 define arm_aapcs_vfpcc i16 @smaxi16(i16 %a, i16 %b) {
 ; CHECK-LABEL: smaxi16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    sxth r1, r1
 ; CHECK-NEXT:    sxth r0, r0
+; CHECK-NEXT:    sxth r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, gt
 ; CHECK-NEXT:    bx lr
@@ -48,8 +48,10 @@ define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    subs.w r12, r2, r0
 ; CHECK-NEXT:    sbcs.w r12, r3, r1
-; CHECK-NEXT:    csel r0, r0, r2, lt
-; CHECK-NEXT:    csel r1, r1, r3, lt
+; CHECK-NEXT:    cset r12, lt
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    bx lr
   %c = call i64 @llvm.smax.i64(i64 %a, i64 %b)
   ret i64 %c
@@ -203,8 +205,10 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    subs.w r12, r2, r0
 ; CHECK-NEXT:    sbcs.w r12, r3, r1
-; CHECK-NEXT:    csel r0, r0, r2, lt
-; CHECK-NEXT:    csel r1, r1, r3, lt
+; CHECK-NEXT:    cset r12, lt
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -286,8 +290,8 @@ declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone
 define arm_aapcs_vfpcc i8 @umaxi8(i8 %a, i8 %b) {
 ; CHECK-LABEL: umaxi8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    uxtb r1, r1
 ; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    uxtb r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, hi
 ; CHECK-NEXT:    bx lr
@@ -300,8 +304,8 @@ declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone
 define arm_aapcs_vfpcc i16 @umaxi16(i16 %a, i16 %b) {
 ; CHECK-LABEL: umaxi16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    uxth r1, r1
 ; CHECK-NEXT:    uxth r0, r0
+; CHECK-NEXT:    uxth r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, hi
 ; CHECK-NEXT:    bx lr
@@ -328,8 +332,10 @@ define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    subs.w r12, r2, r0
 ; CHECK-NEXT:    sbcs.w r12, r3, r1
-; CHECK-NEXT:    csel r0, r0, r2, lo
-; CHECK-NEXT:    csel r1, r1, r3, lo
+; CHECK-NEXT:    cset r12, lo
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    bx lr
   %c = call i64 @llvm.umax.i64(i64 %a, i64 %b)
   ret i64 %c
@@ -476,8 +482,10 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    subs.w r12, r2, r0
 ; CHECK-NEXT:    sbcs.w r12, r3, r1
-; CHECK-NEXT:    csel r0, r0, r2, lo
-; CHECK-NEXT:    csel r1, r1, r3, lo
+; CHECK-NEXT:    cset r12, lo
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -559,8 +567,8 @@ declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone
 define arm_aapcs_vfpcc i8 @smini8(i8 %a, i8 %b) {
 ; CHECK-LABEL: smini8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    sxtb r0, r0
+; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, lt
 ; CHECK-NEXT:    bx lr
@@ -573,8 +581,8 @@ declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone
 define arm_aapcs_vfpcc i16 @smini16(i16 %a, i16 %b) {
 ; CHECK-LABEL: smini16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    sxth r1, r1
 ; CHECK-NEXT:    sxth r0, r0
+; CHECK-NEXT:    sxth r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, lt
 ; CHECK-NEXT:    bx lr
@@ -601,8 +609,10 @@ define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    subs.w r12, r0, r2
 ; CHECK-NEXT:    sbcs.w r12, r1, r3
-; CHECK-NEXT:    csel r0, r0, r2, lt
-; CHECK-NEXT:    csel r1, r1, r3, lt
+; CHECK-NEXT:    cset r12, lt
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    bx lr
   %c = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   ret i64 %c
@@ -756,8 +766,10 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    subs.w r12, r0, r2
 ; CHECK-NEXT:    sbcs.w r12, r1, r3
-; CHECK-NEXT:    csel r0, r0, r2, lt
-; CHECK-NEXT:    csel r1, r1, r3, lt
+; CHECK-NEXT:    cset r12, lt
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -839,8 +851,8 @@ declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone
 define arm_aapcs_vfpcc i8 @umini8(i8 %a, i8 %b) {
 ; CHECK-LABEL: umini8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    uxtb r1, r1
 ; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    uxtb r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, lo
 ; CHECK-NEXT:    bx lr
@@ -853,8 +865,8 @@ declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone
 define arm_aapcs_vfpcc i16 @umini16(i16 %a, i16 %b) {
 ; CHECK-LABEL: umini16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    uxth r1, r1
 ; CHECK-NEXT:    uxth r0, r0
+; CHECK-NEXT:    uxth r1, r1
 ; CHECK-NEXT:    cmp r0, r1
 ; CHECK-NEXT:    csel r0, r0, r1, lo
 ; CHECK-NEXT:    bx lr
@@ -881,8 +893,10 @@ define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    subs.w r12, r0, r2
 ; CHECK-NEXT:    sbcs.w r12, r1, r3
-; CHECK-NEXT:    csel r0, r0, r2, lo
-; CHECK-NEXT:    csel r1, r1, r3, lo
+; CHECK-NEXT:    cset r12, lo
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    bx lr
   %c = call i64 @llvm.umin.i64(i64 %a, i64 %b)
   ret i64 %c
@@ -1029,8 +1043,10 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    subs.w r12, r0, r2
 ; CHECK-NEXT:    sbcs.w r12, r1, r3
-; CHECK-NEXT:    csel r0, r0, r2, lo
-; CHECK-NEXT:    csel r1, r1, r3, lo
+; CHECK-NEXT:    cset r12, lo
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    csel r1, r1, r3, ne
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
   %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
index 43ed5eefbf4c77..70957ca950d71f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
@@ -13,8 +13,8 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
 ; CHECK-NEXT:    movs r6, #2
 ; CHECK-NEXT:    lsrs r7, r2, #3
 ; CHECK-NEXT:    rsb r6, r6, r2, lsr #3
+; CHECK-NEXT:    movs r5, #0
 ; CHECK-NEXT:    cmp r7, #2
-; CHECK-NEXT:    mov.w r5, #0
 ; CHECK-NEXT:    csel r7, r6, r5, hs
 ; CHECK-NEXT:    add.w lr, r7, #1
 ; CHECK-NEXT:    mov r4, r5

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
index 0bec2b100911cf..101b49fea488a8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
@@ -28,12 +28,11 @@ define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4f32(<4 x float> %src1, <4 x float>
 ; CHECK-MVE-NEXT:    csetm r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
-; CHECK-MVE-NEXT:    vmov q2[2], q2[0], r1, r0
-; CHECK-MVE-NEXT:    csetm r0, ne
+; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-MVE-NEXT:    csetm r2, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    csetm r1, ne
-; CHECK-MVE-NEXT:    vmov q2[3], q2[1], r1, r0
-; CHECK-MVE-NEXT:    vmov q0, q2
+; CHECK-MVE-NEXT:    csetm r3, ne
+; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r3, r2
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: sext_v4i1_v4f32:
@@ -66,49 +65,49 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
 ; CHECK-MVE-LABEL: sext_v8i1_v8f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .save {r4, lr}
-; CHECK-MVE-NEXT:    push {r4, lr}
+; CHECK-MVE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-MVE-NEXT:    vcmp.f16 s3, s7
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s10, s2
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s3
 ; CHECK-MVE-NEXT:    vcmp.f16 s10, s8
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
-; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
 ; CHECK-MVE-NEXT:    csetm r12, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s10, s8
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s2
 ; CHECK-MVE-NEXT:    csetm lr, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s5
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
 ; CHECK-MVE-NEXT:    csetm r2, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s6, s2
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s3
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s5
 ; CHECK-MVE-NEXT:    csetm r3, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s2
+; CHECK-MVE-NEXT:    vmovx.f16 s2, s4
 ; CHECK-MVE-NEXT:    csetm r0, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s0
 ; CHECK-MVE-NEXT:    csetm r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s2
 ; CHECK-MVE-NEXT:    csetm r4, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vmov.16 q0[0], r4
-; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
-; CHECK-MVE-NEXT:    vmov.16 q0[2], r1
-; CHECK-MVE-NEXT:    vmov.16 q0[3], r2
-; CHECK-MVE-NEXT:    vmov.16 q0[4], r3
-; CHECK-MVE-NEXT:    vmov.16 q0[5], lr
+; CHECK-MVE-NEXT:    csetm r5, ne
+; CHECK-MVE-NEXT:    vmov.16 q0[1], r5
+; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
+; CHECK-MVE-NEXT:    vmov.16 q0[3], r1
+; CHECK-MVE-NEXT:    vmov.16 q0[4], r2
+; CHECK-MVE-NEXT:    vmov.16 q0[5], r3
 ; CHECK-MVE-NEXT:    vmov.16 q0[6], r12
-; CHECK-MVE-NEXT:    csetm r0, ne
-; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
-; CHECK-MVE-NEXT:    pop {r4, pc}
+; CHECK-MVE-NEXT:    vmov.16 q0[7], lr
+; CHECK-MVE-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; CHECK-MVEFP-LABEL: sext_v8i1_v8f32:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
@@ -199,22 +198,22 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) {
 ; CHECK-MVEFP-NEXT:    vpush {d8, d9}
 ; CHECK-MVEFP-NEXT:    vmov q4, q0
 ; CHECK-MVEFP-NEXT:    vldr d0, .LCPI6_0
-; CHECK-MVEFP-NEXT:    vmov r0, r1, d8
+; CHECK-MVEFP-NEXT:    vmov r0, r1, d9
 ; CHECK-MVEFP-NEXT:    vmov r4, r5, d0
 ; CHECK-MVEFP-NEXT:    mov r2, r4
 ; CHECK-MVEFP-NEXT:    mov r3, r5
 ; CHECK-MVEFP-NEXT:    bl __aeabi_dcmpeq
 ; CHECK-MVEFP-NEXT:    mov r6, r0
-; CHECK-MVEFP-NEXT:    vmov r0, r1, d9
+; CHECK-MVEFP-NEXT:    vmov r0, r1, d8
 ; CHECK-MVEFP-NEXT:    mov r2, r4
 ; CHECK-MVEFP-NEXT:    mov r3, r5
 ; CHECK-MVEFP-NEXT:    bl __aeabi_dcmpeq
-; CHECK-MVEFP-NEXT:    cmp r0, #0
-; CHECK-MVEFP-NEXT:    csetm r0, eq
 ; CHECK-MVEFP-NEXT:    cmp r6, #0
 ; CHECK-MVEFP-NEXT:    csetm r1, eq
-; CHECK-MVEFP-NEXT:    vmov q0[2], q0[0], r1, r0
-; CHECK-MVEFP-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-MVEFP-NEXT:    cmp r0, #0
+; CHECK-MVEFP-NEXT:    csetm r0, eq
+; CHECK-MVEFP-NEXT:    vmov q0[2], q0[0], r0, r1
+; CHECK-MVEFP-NEXT:    vmov q0[3], q0[1], r0, r1
 ; CHECK-MVEFP-NEXT:    vpop {d8, d9}
 ; CHECK-MVEFP-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-MVEFP-NEXT:    .p2align 3
@@ -246,22 +245,22 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4f32(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-MVE-LABEL: zext_v4i1_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    vmov.i32 q2, #0x1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    csetm r0, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    csetm r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
-; CHECK-MVE-NEXT:    vmov q3[2], q3[0], r1, r0
-; CHECK-MVE-NEXT:    csetm r0, ne
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    csetm r2, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    csetm r1, ne
-; CHECK-MVE-NEXT:    vmov q3[3], q3[1], r1, r0
-; CHECK-MVE-NEXT:    vand q0, q3, q2
+; CHECK-MVE-NEXT:    csetm r3, ne
+; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r3, r2
+; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-MVE-NEXT:    vand q0, q0, q2
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: zext_v4i1_v4f32:
@@ -294,51 +293,51 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
 ; CHECK-MVE-LABEL: zext_v8i1_v8f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .save {r4, lr}
-; CHECK-MVE-NEXT:    push {r4, lr}
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s7
+; CHECK-MVE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s3
+; CHECK-MVE-NEXT:    vcmp.f16 s10, s8
 ; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s7
 ; CHECK-MVE-NEXT:    vmovx.f16 s10, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s10, s8
 ; CHECK-MVE-NEXT:    csetm r12, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s10, s8
+; CHECK-MVE-NEXT:    csetm lr, ne
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s2, s5
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
-; CHECK-MVE-NEXT:    csetm lr, ne
+; CHECK-MVE-NEXT:    csetm r2, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s6, s2
 ; CHECK-MVE-NEXT:    vmovx.f16 s2, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
-; CHECK-MVE-NEXT:    csetm r2, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s5
 ; CHECK-MVE-NEXT:    csetm r3, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s6, s2
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s3
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s5
 ; CHECK-MVE-NEXT:    csetm r0, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    vcmp.f16 s6, s2
 ; CHECK-MVE-NEXT:    csetm r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s0
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
 ; CHECK-MVE-NEXT:    vmov.i16 q0, #0x1
 ; CHECK-MVE-NEXT:    csetm r4, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov.16 q1[0], r4
-; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
-; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
-; CHECK-MVE-NEXT:    vmov.16 q1[3], r3
-; CHECK-MVE-NEXT:    vmov.16 q1[4], r2
-; CHECK-MVE-NEXT:    vmov.16 q1[5], lr
-; CHECK-MVE-NEXT:    vmov.16 q1[6], r12
-; CHECK-MVE-NEXT:    csetm r0, ne
-; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
+; CHECK-MVE-NEXT:    csetm r5, ne
+; CHECK-MVE-NEXT:    vmov.16 q1[0], r5
+; CHECK-MVE-NEXT:    vmov.16 q1[1], r4
+; CHECK-MVE-NEXT:    vmov.16 q1[2], r1
+; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
+; CHECK-MVE-NEXT:    vmov.16 q1[4], r3
+; CHECK-MVE-NEXT:    vmov.16 q1[5], r2
+; CHECK-MVE-NEXT:    vmov.16 q1[6], lr
+; CHECK-MVE-NEXT:    vmov.16 q1[7], r12
 ; CHECK-MVE-NEXT:    vand q0, q1, q0
-; CHECK-MVE-NEXT:    pop {r4, pc}
+; CHECK-MVE-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; CHECK-MVEFP-LABEL: zext_v8i1_v8f32:
 ; CHECK-MVEFP:       @ %bb.0: @ %entry
@@ -615,24 +614,24 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
 ; CHECK-MVE-LABEL: fptoui_v4i1_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s3
+; CHECK-MVE-NEXT:    vldr s8, .LCPI20_0
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s2, s2
-; CHECK-MVE-NEXT:    vldr s10, .LCPI20_0
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s1
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s3
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s10, s1
+; CHECK-MVE-NEXT:    vmov.f32 s4, #1.000000e+00
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
-; CHECK-MVE-NEXT:    vmov.f32 s8, #1.000000e+00
-; CHECK-MVE-NEXT:    vmov r3, s2
-; CHECK-MVE-NEXT:    vmov r2, s6
-; CHECK-MVE-NEXT:    vmov r1, s4
+; CHECK-MVE-NEXT:    vmov r0, s6
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmov r0, s2
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s8, s4
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmov r0, s10
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s8, s4
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    cmp r3, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s8
-; CHECK-MVE-NEXT:    cmp r2, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s1, s10, s8
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s10, s8
+; CHECK-MVE-NEXT:    vseleq.f32 s1, s8, s4
 ; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s0, s10, s8
+; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ; CHECK-MVE-NEXT:    .p2align 2
 ; CHECK-MVE-NEXT:  @ %bb.1:
@@ -655,27 +654,24 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
 ; CHECK-MVE-LABEL: fptosi_v4i1_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s8, s3
+; CHECK-MVE-NEXT:    vldr s10, .LCPI21_0
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s2, s2
-; CHECK-MVE-NEXT:    vldr s8, .LCPI21_0
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s1
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s10, s3
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s1
+; CHECK-MVE-NEXT:    vmov.f32 s4, #1.000000e+00
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
-; CHECK-MVE-NEXT:    vmov.f32 s6, #1.000000e+00
-; CHECK-MVE-NEXT:    vmov r3, s2
-; CHECK-MVE-NEXT:    vmov r2, s4
-; CHECK-MVE-NEXT:    vmov r1, s10
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov r0, s2
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s10, s4
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov r0, s6
+; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s4
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vmov r0, s0
-; CHECK-MVE-NEXT:    lsls r3, r3, #31
-; CHECK-MVE-NEXT:    lsl.w r2, r2, #31
-; CHECK-MVE-NEXT:    vseleq.f32 s2, s8, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
-; CHECK-MVE-NEXT:    lsl.w r1, r1, #31
-; CHECK-MVE-NEXT:    vseleq.f32 s1, s8, s6
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    lsl.w r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s8, s6
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f32 s1, s10, s4
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vseleq.f32 s0, s10, s4
 ; CHECK-MVE-NEXT:    bx lr
 ; CHECK-MVE-NEXT:    .p2align 2
 ; CHECK-MVE-NEXT:  @ %bb.1:
@@ -805,45 +801,45 @@ define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) {
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s0
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s0, s0
-; CHECK-MVE-NEXT:    vmov r0, s4
-; CHECK-MVE-NEXT:    vmov r1, s0
 ; CHECK-MVE-NEXT:    vldr.16 s8, .LCPI24_0
+; CHECK-MVE-NEXT:    vmov r0, s0
 ; CHECK-MVE-NEXT:    vmov.f16 s6, #1.000000e+00
-; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
-; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov r1, s10
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
 ; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s0, s8, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s10, s3
-; CHECK-MVE-NEXT:    vins.f16 s0, s4
-; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s1
 ; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
+; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s0, s8, s6
+; CHECK-MVE-NEXT:    vins.f16 s0, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT:    vmov r0, s10
 ; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s1, s8, s6
-; CHECK-MVE-NEXT:    vins.f16 s1, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s2
 ; CHECK-MVE-NEXT:    vmovx.f16 s2, s2
-; CHECK-MVE-NEXT:    vmov r0, s4
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s2, s2
-; CHECK-MVE-NEXT:    vmov r1, s2
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov r1, s10
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
 ; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s2, s8, s6
-; CHECK-MVE-NEXT:    vins.f16 s2, s4
+; CHECK-MVE-NEXT:    vmov r0, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s1, s8, s6
+; CHECK-MVE-NEXT:    vins.f16 s1, s10
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s3
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vseleq.f16 s2, s8, s6
+; CHECK-MVE-NEXT:    vins.f16 s2, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s3
+; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
+; CHECK-MVE-NEXT:    vmov r0, s10
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vmov r0, s4
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
 ; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s8, s6
-; CHECK-MVE-NEXT:    vins.f16 s3, s4
+; CHECK-MVE-NEXT:    vins.f16 s3, s10
 ; CHECK-MVE-NEXT:    bx lr
 ; CHECK-MVE-NEXT:    .p2align 1
 ; CHECK-MVE-NEXT:  @ %bb.1:
@@ -869,49 +865,45 @@ define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) {
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s0
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s0, s0
-; CHECK-MVE-NEXT:    vmov r0, s4
-; CHECK-MVE-NEXT:    vmov r1, s0
 ; CHECK-MVE-NEXT:    vldr.16 s8, .LCPI25_0
+; CHECK-MVE-NEXT:    vmov r0, s0
 ; CHECK-MVE-NEXT:    vmov.f16 s6, #1.000000e+00
-; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
-; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s0, s8, s6
-; CHECK-MVE-NEXT:    vmov r1, s10
-; CHECK-MVE-NEXT:    vins.f16 s0, s4
-; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s1
 ; CHECK-MVE-NEXT:    vmov r0, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s10, s3
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
+; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s1
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s0, s8, s6
+; CHECK-MVE-NEXT:    vins.f16 s0, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT:    vmov r0, s10
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s1, s8, s6
-; CHECK-MVE-NEXT:    vins.f16 s1, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s2
 ; CHECK-MVE-NEXT:    vmovx.f16 s2, s2
-; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s2, s2
-; CHECK-MVE-NEXT:    vmov r1, s2
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s2, s8, s6
-; CHECK-MVE-NEXT:    vmov r1, s10
-; CHECK-MVE-NEXT:    vins.f16 s2, s4
+; CHECK-MVE-NEXT:    vmov r0, s2
+; CHECK-MVE-NEXT:    vseleq.f16 s1, s8, s6
+; CHECK-MVE-NEXT:    vins.f16 s1, s10
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
 ; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s3
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s2, s8, s6
+; CHECK-MVE-NEXT:    vins.f16 s2, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s3
+; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
+; CHECK-MVE-NEXT:    vmov r0, s10
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vmov r0, s4
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s10, s8, s6
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s8, s6
-; CHECK-MVE-NEXT:    vins.f16 s3, s4
+; CHECK-MVE-NEXT:    vins.f16 s3, s10
 ; CHECK-MVE-NEXT:    bx lr
 ; CHECK-MVE-NEXT:    .p2align 1
 ; CHECK-MVE-NEXT:  @ %bb.1:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll
index ff5ee8929aae8f..46406aeebfa4ee 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll
@@ -378,21 +378,23 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: cmpeqz_v2i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    vmov r1, r2, d0
-; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    vmov r1, r2, d2
 ; CHECK-NEXT:    orrs r1, r2
-; CHECK-NEXT:    csinc r0, r0, zr, ne
+; CHECK-NEXT:    cset r1, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csinc r0, r1, zr, ne
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
-; CHECK-NEXT:    vmov r0, r2, d3
+; CHECK-NEXT:    vmov r0, r2, d1
 ; CHECK-NEXT:    orrs r0, r2
-; CHECK-NEXT:    vmov r2, r3, d1
-; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    vmov r2, r3, d3
 ; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    csinc r0, r0, zr, ne
+; CHECK-NEXT:    cset r2, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csinc r0, r2, zr, ne
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
index 9400f24e7192c8..bf6468baac22bb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
@@ -4,15 +4,15 @@
 define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) {
 ; CHECK-LABEL: arm_min_helium_f32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    .save {r4, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r6, r7, lr}
+; CHECK-NEXT:    movs r6, #0
 ; CHECK-NEXT:    mov.w r12, #4
-; CHECK-NEXT:    vidup.u32 q2, r4, #1
-; CHECK-NEXT:    movw r5, #54437
-; CHECK-NEXT:    movt r5, #21352
-; CHECK-NEXT:    vdup.32 q1, r5
+; CHECK-NEXT:    vidup.u32 q2, r6, #1
+; CHECK-NEXT:    movw r4, #54437
+; CHECK-NEXT:    movt r4, #21352
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    vdup.32 q1, r4
 ; CHECK-NEXT:    dlstp.32 lr, r1
 ; CHECK-NEXT:  .LBB0_1: @ %do.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
@@ -33,7 +33,7 @@ define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResul
 ; CHECK-NEXT:    vminv.u32 r1, q0
 ; CHECK-NEXT:    str r1, [r3]
 ; CHECK-NEXT:    vstr s8, [r2]
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    pop {r4, r6, r7, pc}
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  @ %bb.3:
 ; CHECK-NEXT:  .LCPI0_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
index a990cee1f5fb31..f70af5661f4c90 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
@@ -70,27 +70,29 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: cmpeqz_v2i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r0, r1, d4
 ; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    vmov r1, r2, d2
-; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    vmov r1, r2, d0
 ; CHECK-NEXT:    orrs r1, r2
-; CHECK-NEXT:    vmov r2, r3, d4
+; CHECK-NEXT:    vmov r2, r3, d2
 ; CHECK-NEXT:    cset r1, eq
 ; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    csel r0, r0, r1, eq
+; CHECK-NEXT:    cset r2, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r0, r1, r2, eq
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
-; CHECK-NEXT:    vmov r0, r2, d1
-; CHECK-NEXT:    orrs r0, r2
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    cset r12, eq
+; CHECK-NEXT:    vmov r0, r2, d5
+; CHECK-NEXT:    orr.w r12, r0, r2
+; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    vmov r3, r0, d5
+; CHECK-NEXT:    vmov r3, r0, d3
 ; CHECK-NEXT:    cset r2, eq
 ; CHECK-NEXT:    orrs r0, r3
-; CHECK-NEXT:    csel r0, r12, r2, eq
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r2, r0, eq
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
@@ -174,27 +176,29 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpnez_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: cmpnez_v2i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r0, r1, d4
 ; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    vmov r1, r2, d2
-; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    vmov r1, r2, d0
 ; CHECK-NEXT:    orrs r1, r2
-; CHECK-NEXT:    vmov r2, r3, d4
+; CHECK-NEXT:    vmov r2, r3, d2
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    csel r0, r0, r1, ne
+; CHECK-NEXT:    cset r2, ne
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csel r0, r1, r2, ne
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
-; CHECK-NEXT:    vmov r0, r2, d1
-; CHECK-NEXT:    orrs r0, r2
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    cset r12, ne
+; CHECK-NEXT:    vmov r0, r2, d5
+; CHECK-NEXT:    orr.w r12, r0, r2
+; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    vmov r3, r0, d5
+; CHECK-NEXT:    vmov r3, r0, d3
 ; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    orrs r0, r3
-; CHECK-NEXT:    csel r0, r12, r2, ne
+; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csel r0, r2, r0, ne
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
@@ -280,19 +284,19 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpsltz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: cmpsltz_v2i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov r2, s9
+; CHECK-NEXT:    vmov r0, s9
 ; CHECK-NEXT:    movs r3, #0
-; CHECK-NEXT:    vmov r0, s1
-; CHECK-NEXT:    vmov r1, s5
-; CHECK-NEXT:    cmp.w r3, r2, lsr #31
-; CHECK-NEXT:    vmov r2, s7
-; CHECK-NEXT:    csel r0, r0, r1, ne
-; CHECK-NEXT:    vmov r1, s3
-; CHECK-NEXT:    asr.w r12, r0, #31
-; CHECK-NEXT:    vmov r0, s11
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r2, s5
 ; CHECK-NEXT:    cmp.w r3, r0, lsr #31
-; CHECK-NEXT:    bfi r3, r12, #0, #8
 ; CHECK-NEXT:    csel r0, r1, r2, ne
+; CHECK-NEXT:    vmov r1, s11
+; CHECK-NEXT:    asr.w r12, r0, #31
+; CHECK-NEXT:    vmov r2, s3
+; CHECK-NEXT:    vmov r0, s7
+; CHECK-NEXT:    cmp.w r3, r1, lsr #31
+; CHECK-NEXT:    bfi r3, r12, #0, #8
+; CHECK-NEXT:    csel r0, r2, r0, ne
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r3
@@ -377,34 +381,34 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64 %c) {
 ; CHECK-LABEL: cmpeqz_v2i1_i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    vmov r2, r3, d2
-; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    vmov r3, r4, d3
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    orr.w r3, r0, r1
+; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    vmov r1, r2, d3
 ; CHECK-NEXT:    csetm r12, eq
-; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    orrs r3, r4
-; CHECK-NEXT:    vmov r4, r3, d0
-; CHECK-NEXT:    csetm r5, eq
-; CHECK-NEXT:    orrs r3, r4
-; CHECK-NEXT:    vmov r3, r4, d1
-; CHECK-NEXT:    csetm lr, eq
-; CHECK-NEXT:    orrs r3, r4
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    vmov r1, r2, d0
 ; CHECK-NEXT:    csetm r4, eq
-; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    beq .LBB15_2
+; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    vmov r1, r2, d1
+; CHECK-NEXT:    csetm lr, eq
+; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    cbz r3, .LBB15_2
 ; CHECK-NEXT:  @ %bb.1: @ %select.false
-; CHECK-NEXT:    bfi r2, r12, #0, #8
-; CHECK-NEXT:    bfi r2, r5, #8, #8
+; CHECK-NEXT:    bfi r0, r12, #0, #8
+; CHECK-NEXT:    bfi r0, r4, #8, #8
 ; CHECK-NEXT:    b .LBB15_3
 ; CHECK-NEXT:  .LBB15_2:
-; CHECK-NEXT:    bfi r2, lr, #0, #8
-; CHECK-NEXT:    bfi r2, r4, #8, #8
+; CHECK-NEXT:    bfi r0, lr, #0, #8
+; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:  .LBB15_3: @ %select.end
-; CHECK-NEXT:    vmsr p0, r2
+; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %c1 = icmp eq <2 x i64> %a, zeroinitializer
   %c2 = icmp eq <2 x i64> %b, zeroinitializer

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
index f4a0d5120305a1..0ff262e6b53ab5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
@@ -458,24 +458,26 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: cmpeqz_v2i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    vmov r1, r2, d0
-; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    vmov r1, r2, d2
 ; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    cset r1, eq
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    eoreq r0, r0, #1
-; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    eoreq r1, r1, #1
+; CHECK-NEXT:    rsbs r0, r1, #0
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
-; CHECK-NEXT:    vmov r0, r2, d3
+; CHECK-NEXT:    vmov r0, r2, d1
 ; CHECK-NEXT:    orrs r0, r2
-; CHECK-NEXT:    vmov r2, r3, d1
-; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    vmov r2, r3, d3
 ; CHECK-NEXT:    orrs r2, r3
+; CHECK-NEXT:    cset r2, eq
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    it eq
-; CHECK-NEXT:    eoreq r0, r0, #1
-; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    eoreq r2, r2, #1
+; CHECK-NEXT:    rsbs r0, r2, #0
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 29b56639bd7698..8eb941371f9937 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -6,99 +6,102 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, #12
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    beq.w .LBB0_8
 ; CHECK-NEXT:  @ %bb.1: @ %entry
-; CHECK-NEXT:    mov r11, r2
 ; CHECK-NEXT:    cmp r3, #1
 ; CHECK-NEXT:    bne .LBB0_3
 ; CHECK-NEXT:  @ %bb.2:
-; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    mov r12, r0
 ; CHECK-NEXT:    mov r8, r1
-; CHECK-NEXT:    mov r10, r11
+; CHECK-NEXT:    mov r10, r2
 ; CHECK-NEXT:    b .LBB0_6
 ; CHECK-NEXT:  .LBB0_3: @ %vector.ph
-; CHECK-NEXT:    bic r2, r3, #1
-; CHECK-NEXT:    adr r4, .LCPI0_0
-; CHECK-NEXT:    subs r7, r2, #2
-; CHECK-NEXT:    movs r6, #1
 ; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    add.w r10, r11, r2, lsl #2
+; CHECK-NEXT:    bic r3, r3, #1
+; CHECK-NEXT:    subs r7, r3, #2
+; CHECK-NEXT:    movs r6, #1
+; CHECK-NEXT:    adr r4, .LCPI0_0
+; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    add.w lr, r6, r7, lsr #1
-; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT:    add.w r8, r1, r2, lsl #2
-; CHECK-NEXT:    add.w r12, r0, r2, lsl #2
+; CHECK-NEXT:    add.w r10, r2, r3, lsl #2
+; CHECK-NEXT:    add.w r8, r1, r3, lsl #2
+; CHECK-NEXT:    add.w r12, r0, r3, lsl #2
 ; CHECK-NEXT:    vldrw.u32 q0, [r4]
 ; CHECK-NEXT:    vmvn.i32 q1, #0x80000000
 ; CHECK-NEXT:  .LBB0_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrd r4, r2, [r0], #8
+; CHECK-NEXT:    ldrd r4, r3, [r0], #8
 ; CHECK-NEXT:    movs r5, #0
 ; CHECK-NEXT:    ldrd r7, r6, [r1], #8
-; CHECK-NEXT:    smull r4, r7, r7, r4
-; CHECK-NEXT:    asrl r4, r7, #31
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    smull r4, r11, r7, r4
+; CHECK-NEXT:    asrl r4, r11, #31
 ; CHECK-NEXT:    rsbs.w r9, r4, #-2147483648
 ; CHECK-NEXT:    mov.w r9, #-1
-; CHECK-NEXT:    sbcs.w r3, r9, r7
+; CHECK-NEXT:    sbcs.w r3, r9, r11
 ; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r5, r3, #0, #8
-; CHECK-NEXT:    smull r2, r3, r6, r2
-; CHECK-NEXT:    asrl r2, r3, #31
-; CHECK-NEXT:    rsbs.w r6, r2, #-2147483648
-; CHECK-NEXT:    vmov q2[2], q2[0], r4, r2
-; CHECK-NEXT:    sbcs.w r6, r9, r3
-; CHECK-NEXT:    vmov q2[3], q2[1], r7, r3
-; CHECK-NEXT:    csetm r6, lt
-; CHECK-NEXT:    bfi r5, r6, #8, #8
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    smull r6, r3, r6, r3
+; CHECK-NEXT:    asrl r6, r3, #31
+; CHECK-NEXT:    rsbs.w r7, r6, #-2147483648
+; CHECK-NEXT:    vmov q2[2], q2[0], r4, r6
+; CHECK-NEXT:    sbcs.w r7, r9, r3
+; CHECK-NEXT:    vmov q2[3], q2[1], r11, r3
+; CHECK-NEXT:    csetm r7, lt
+; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    bfi r5, r7, #8, #8
 ; CHECK-NEXT:    vmsr p0, r5
-; CHECK-NEXT:    mvn r5, #-2147483648
 ; CHECK-NEXT:    vpsel q2, q2, q0
-; CHECK-NEXT:    vmov r2, r3, d4
-; CHECK-NEXT:    subs r2, r2, r5
-; CHECK-NEXT:    sbcs r2, r3, #0
-; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    csetm r2, lt
-; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    vmov r2, r4, d5
-; CHECK-NEXT:    subs r2, r2, r5
-; CHECK-NEXT:    sbcs r2, r4, #0
-; CHECK-NEXT:    csetm r2, lt
-; CHECK-NEXT:    bfi r3, r2, #8, #8
-; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov r3, r4, d4
+; CHECK-NEXT:    subs r3, r3, r6
+; CHECK-NEXT:    sbcs r3, r4, #0
+; CHECK-NEXT:    mov.w r4, #0
+; CHECK-NEXT:    csetm r3, lt
+; CHECK-NEXT:    bfi r4, r3, #0, #8
+; CHECK-NEXT:    vmov r3, r5, d5
+; CHECK-NEXT:    subs r3, r3, r6
+; CHECK-NEXT:    sbcs r3, r5, #0
+; CHECK-NEXT:    csetm r3, lt
+; CHECK-NEXT:    bfi r4, r3, #8, #8
+; CHECK-NEXT:    vmsr p0, r4
 ; CHECK-NEXT:    vpsel q2, q2, q1
-; CHECK-NEXT:    vmov r2, s10
-; CHECK-NEXT:    vmov r3, s8
-; CHECK-NEXT:    strd r3, r2, [r11], #8
+; CHECK-NEXT:    vmov r3, s10
+; CHECK-NEXT:    vmov r4, s8
+; CHECK-NEXT:    strd r4, r3, [r2], #8
 ; CHECK-NEXT:    le lr, .LBB0_4
 ; CHECK-NEXT:  @ %bb.5: @ %middle.block
-; CHECK-NEXT:    ldrd r2, r3, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT:    cmp r2, r3
+; CHECK-NEXT:    ldrd r7, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT:    cmp r7, r3
 ; CHECK-NEXT:    beq .LBB0_8
 ; CHECK-NEXT:  .LBB0_6: @ %for.body.preheader
-; CHECK-NEXT:    sub.w lr, r3, r2
+; CHECK-NEXT:    sub.w lr, r3, r7
 ; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:    mov.w r1, #-2147483648
-; CHECK-NEXT:    mvn r3, #-2147483648
+; CHECK-NEXT:    mvn r2, #-2147483648
 ; CHECK-NEXT:  .LBB0_7: @ %for.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr r2, [r12], #4
+; CHECK-NEXT:    ldr r3, [r12], #4
 ; CHECK-NEXT:    ldr r4, [r8], #4
-; CHECK-NEXT:    smull r2, r5, r4, r2
-; CHECK-NEXT:    asrl r2, r5, #31
-; CHECK-NEXT:    subs r4, r1, r2
-; CHECK-NEXT:    sbcs.w r4, r0, r5
-; CHECK-NEXT:    csel r2, r2, r1, lt
-; CHECK-NEXT:    csel r4, r5, r0, lt
-; CHECK-NEXT:    subs r5, r2, r3
-; CHECK-NEXT:    sbcs r4, r4, #0
-; CHECK-NEXT:    csel r2, r2, r3, lt
-; CHECK-NEXT:    str r2, [r10], #4
+; CHECK-NEXT:    smull r4, r3, r4, r3
+; CHECK-NEXT:    asrl r4, r3, #31
+; CHECK-NEXT:    subs r5, r1, r4
+; CHECK-NEXT:    sbcs.w r5, r0, r3
+; CHECK-NEXT:    cset r5, lt
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csel r4, r4, r1, ne
+; CHECK-NEXT:    csel r3, r3, r0, ne
+; CHECK-NEXT:    subs r5, r4, r2
+; CHECK-NEXT:    sbcs r3, r3, #0
+; CHECK-NEXT:    csel r3, r4, r2, lt
+; CHECK-NEXT:    str r3, [r10], #4
 ; CHECK-NEXT:    le lr, .LBB0_7
 ; CHECK-NEXT:  .LBB0_8: @ %for.cond.cleanup
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    add sp, #12
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.9:
@@ -319,8 +322,10 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
 ; CHECK-NEXT:    asrl r4, r1, #31
 ; CHECK-NEXT:    subs r5, r3, r4
 ; CHECK-NEXT:    sbcs.w r5, r0, r1
-; CHECK-NEXT:    csel r4, r4, r3, lt
-; CHECK-NEXT:    csel r1, r1, r0, lt
+; CHECK-NEXT:    cset r5, lt
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csel r4, r4, r3, ne
+; CHECK-NEXT:    csel r1, r1, r0, ne
 ; CHECK-NEXT:    subs r5, r4, r2
 ; CHECK-NEXT:    sbcs r1, r1, #0
 ; CHECK-NEXT:    csel r1, r4, r2, lt

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
index cc856730d90cf1..6f2539e3cad9aa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
@@ -5,25 +5,25 @@
 define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -41,13 +41,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_one_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -57,13 +57,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -82,25 +82,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, gt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -118,25 +118,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_oge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ge
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -154,25 +154,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_olt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -190,25 +190,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ole_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ls
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -226,13 +226,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -242,13 +242,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float>
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -295,25 +295,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, hi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -331,25 +331,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_uge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, pl
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -367,25 +367,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ult_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, lt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -403,25 +403,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ule_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -439,25 +439,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ord_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -476,25 +476,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_uno_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
 ; CHECK-MVE-NEXT:    cset r1, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
 ; CHECK-MVE-NEXT:    cset r2, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vs
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index 586b731c934be3..d42c393743f4f3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -5,25 +5,25 @@
 define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -44,13 +44,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_one_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -60,13 +60,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -88,25 +88,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, gt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -127,25 +127,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_oge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ge
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -166,25 +166,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_olt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -205,25 +205,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ole_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ls
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -244,13 +244,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -260,13 +260,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -319,25 +319,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, hi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -358,25 +358,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_uge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, pl
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -397,25 +397,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ult_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, lt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -436,25 +436,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ule_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -475,25 +475,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ord_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -515,25 +515,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_uno_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    cset r0, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
 ; CHECK-MVE-NEXT:    cset r1, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
 ; CHECK-MVE-NEXT:    cset r2, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vs
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1614,25 +1614,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1653,13 +1653,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_one_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1669,13 +1669,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %sr
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1697,25 +1697,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, gt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1736,25 +1736,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_oge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ge
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1775,25 +1775,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_olt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1814,25 +1814,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ole_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ls
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1853,13 +1853,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1869,13 +1869,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %sr
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1928,25 +1928,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, hi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1967,25 +1967,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_uge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, pl
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2006,25 +2006,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ult_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, lt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2045,25 +2045,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ule_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2084,25 +2084,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ord_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2124,25 +2124,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_uno_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    cset r0, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
 ; CHECK-MVE-NEXT:    cset r1, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
 ; CHECK-MVE-NEXT:    cset r2, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vs
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
index de8b413bf24e55..718657839d38db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
@@ -5,25 +5,25 @@
 define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -41,13 +41,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_one_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -57,13 +57,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -82,25 +82,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, gt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -118,25 +118,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_oge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ge
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -154,25 +154,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_olt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -190,25 +190,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ole_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ls
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -226,13 +226,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -242,13 +242,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float>
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -295,25 +295,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, hi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -331,25 +331,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_uge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, pl
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -367,25 +367,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ult_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, lt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -403,25 +403,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ule_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -439,25 +439,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_ord_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
 ; CHECK-MVE-NEXT:    cset r0, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
 ; CHECK-MVE-NEXT:    cset r1, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s2
 ; CHECK-MVE-NEXT:    cset r2, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -476,25 +476,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_uno_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
 ; CHECK-MVE-NEXT:    cset r0, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
 ; CHECK-MVE-NEXT:    cset r1, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s2
 ; CHECK-MVE-NEXT:    cset r2, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vs
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1530,25 +1530,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, eq
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1566,13 +1566,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_one_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1582,13 +1582,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x floa
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1607,25 +1607,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, mi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, mi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1643,25 +1643,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_oge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, ls
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ls
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1679,25 +1679,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_olt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, gt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, gt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1715,25 +1715,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ole_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, ge
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, ge
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1751,13 +1751,13 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, eq
 ; CHECK-MVE-NEXT:    csinc r0, r0, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, eq
 ; CHECK-MVE-NEXT:    csinc r1, r1, zr, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1767,13 +1767,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x floa
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, eq
 ; CHECK-MVE-NEXT:    csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1820,25 +1820,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, lt
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, lt
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1856,25 +1856,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_uge_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, le
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, le
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1892,25 +1892,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ult_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, hi
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, hi
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1928,25 +1928,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ule_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    cset r0, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
+; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
 ; CHECK-MVE-NEXT:    cset r1, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
 ; CHECK-MVE-NEXT:    cset r2, pl
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, pl
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1964,25 +1964,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_ord_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
 ; CHECK-MVE-NEXT:    cset r0, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
 ; CHECK-MVE-NEXT:    cset r1, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s2
 ; CHECK-MVE-NEXT:    cset r2, vc
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vc
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2001,25 +2001,25 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_uno_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s0
 ; CHECK-MVE-NEXT:    cset r0, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s1
+; CHECK-MVE-NEXT:    vcmp.f32 s3, s3
 ; CHECK-MVE-NEXT:    cset r1, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s2
 ; CHECK-MVE-NEXT:    cset r2, vs
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    cset r3, vs
+; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
 ; CHECK-MVE-NEXT:    cmp r3, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r2, #0
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
index 35e578e425e746..898380760bd4d2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
@@ -499,12 +499,16 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) {
 ; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    subs.w r4, r2, r12
 ; CHECK-NEXT:    sbcs.w r4, r3, lr
-; CHECK-NEXT:    csel r2, r2, r12, lo
-; CHECK-NEXT:    csel r3, r3, lr, lo
+; CHECK-NEXT:    cset r4, lo
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r2, r2, r12, ne
+; CHECK-NEXT:    csel r3, r3, lr, ne
 ; CHECK-NEXT:    subs r4, r2, r0
 ; CHECK-NEXT:    sbcs.w r4, r3, r1
-; CHECK-NEXT:    csel r0, r2, r0, lo
-; CHECK-NEXT:    csel r1, r3, r1, lo
+; CHECK-NEXT:    cset r4, lo
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r0, r2, r0, ne
+; CHECK-NEXT:    csel r1, r3, r1, ne
 ; CHECK-NEXT:    pop {r4, pc}
   %x = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
   %cmp = icmp ult i64 %x, %min
@@ -521,12 +525,16 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) {
 ; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    subs.w r4, r2, r12
 ; CHECK-NEXT:    sbcs.w r4, r3, lr
-; CHECK-NEXT:    csel r2, r2, r12, lt
-; CHECK-NEXT:    csel r3, r3, lr, lt
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r2, r2, r12, ne
+; CHECK-NEXT:    csel r3, r3, lr, ne
 ; CHECK-NEXT:    subs r4, r2, r0
 ; CHECK-NEXT:    sbcs.w r4, r3, r1
-; CHECK-NEXT:    csel r0, r2, r0, lt
-; CHECK-NEXT:    csel r1, r3, r1, lt
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r0, r2, r0, ne
+; CHECK-NEXT:    csel r1, r3, r1, ne
 ; CHECK-NEXT:    pop {r4, pc}
   %x = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
   %cmp = icmp slt i64 %x, %min
@@ -543,12 +551,16 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) {
 ; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    subs.w r4, r2, r12
 ; CHECK-NEXT:    sbcs.w r4, r3, lr
-; CHECK-NEXT:    csel r2, r12, r2, lo
-; CHECK-NEXT:    csel r3, lr, r3, lo
+; CHECK-NEXT:    cset r4, lo
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r2, r12, r2, ne
+; CHECK-NEXT:    csel r3, lr, r3, ne
 ; CHECK-NEXT:    subs r4, r0, r2
 ; CHECK-NEXT:    sbcs.w r4, r1, r3
-; CHECK-NEXT:    csel r0, r2, r0, lo
-; CHECK-NEXT:    csel r1, r3, r1, lo
+; CHECK-NEXT:    cset r4, lo
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r0, r2, r0, ne
+; CHECK-NEXT:    csel r1, r3, r1, ne
 ; CHECK-NEXT:    pop {r4, pc}
   %x = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
   %cmp = icmp ugt i64 %x, %max
@@ -565,12 +577,16 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) {
 ; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    subs.w r4, r2, r12
 ; CHECK-NEXT:    sbcs.w r4, r3, lr
-; CHECK-NEXT:    csel r2, r12, r2, lt
-; CHECK-NEXT:    csel r3, lr, r3, lt
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r2, r12, r2, ne
+; CHECK-NEXT:    csel r3, lr, r3, ne
 ; CHECK-NEXT:    subs r4, r0, r2
 ; CHECK-NEXT:    sbcs.w r4, r1, r3
-; CHECK-NEXT:    csel r0, r2, r0, lt
-; CHECK-NEXT:    csel r1, r3, r1, lt
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r0, r2, r0, ne
+; CHECK-NEXT:    csel r1, r3, r1, ne
 ; CHECK-NEXT:    pop {r4, pc}
   %x = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
   %cmp = icmp sgt i64 %x, %max

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
index bf0d92b5e0303e..642ff69ded33f5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
@@ -502,8 +502,8 @@ define <2 x i64> @large_i128(<2 x double> %x) {
 ; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    csel r0, r0, r7, ne
 ; CHECK-NEXT:    csel r3, r3, r7, ne
-; CHECK-NEXT:    csel r1, r1, r7, ne
 ; CHECK-NEXT:    csel r2, r2, r9, ne
+; CHECK-NEXT:    csel r1, r1, r7, ne
 ; CHECK-NEXT:    rsbs r7, r0, #0
 ; CHECK-NEXT:    sbcs.w r7, r4, r1
 ; CHECK-NEXT:    sbcs.w r2, r4, r2
@@ -521,8 +521,8 @@ define <2 x i64> @large_i128(<2 x double> %x) {
 ; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    csel r0, r0, r5, ne
 ; CHECK-NEXT:    csel r3, r3, r5, ne
-; CHECK-NEXT:    csel r1, r1, r5, ne
 ; CHECK-NEXT:    csel r2, r2, r9, ne
+; CHECK-NEXT:    csel r1, r1, r5, ne
 ; CHECK-NEXT:    rsbs r5, r0, #0
 ; CHECK-NEXT:    sbcs.w r5, r4, r1
 ; CHECK-NEXT:    sbcs.w r2, r4, r2

diff  --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index fe1d06cb39e16d..cff16c300e7036 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -8,119 +8,121 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; THUMBV7-NEXT:    .pad #44
 ; THUMBV7-NEXT:    sub sp, #44
-; THUMBV7-NEXT:    ldr.w r8, [sp, #88]
-; THUMBV7-NEXT:    mov r9, r0
-; THUMBV7-NEXT:    ldr r7, [sp, #96]
-; THUMBV7-NEXT:    ldr.w lr, [sp, #100]
-; THUMBV7-NEXT:    umull r0, r5, r2, r8
-; THUMBV7-NEXT:    ldr r4, [sp, #80]
-; THUMBV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r1, r0, r3, r7
-; THUMBV7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r0, r11, lr, r2
-; THUMBV7-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT:    ldr r1, [sp, #92]
-; THUMBV7-NEXT:    str r0, [sp] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r0, r10, r7, r2
-; THUMBV7-NEXT:    mov r7, r1
-; THUMBV7-NEXT:    umull r6, r12, r1, r4
-; THUMBV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr.w lr, [sp, #88]
+; THUMBV7-NEXT:    mov r11, r0
+; THUMBV7-NEXT:    ldr r4, [sp, #96]
+; THUMBV7-NEXT:    ldr.w r12, [sp, #80]
+; THUMBV7-NEXT:    umull r1, r5, r2, lr
+; THUMBV7-NEXT:    umull r7, r6, r3, r4
+; THUMBV7-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r1, [sp, #100]
+; THUMBV7-NEXT:    umull r4, r0, r4, r2
+; THUMBV7-NEXT:    str r7, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r7, r1, r1, r2
+; THUMBV7-NEXT:    str r4, [sp, #24] @ 4-byte Spill
+; THUMBV7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
 ; THUMBV7-NEXT:    ldr r0, [sp, #84]
-; THUMBV7-NEXT:    str r6, [sp, #24] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r6, r1, r0, r8
-; THUMBV7-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r6, r2, r2, r7
-; THUMBV7-NEXT:    mov r7, r4
-; THUMBV7-NEXT:    strd r6, r2, [sp, #8] @ 8-byte Folded Spill
-; THUMBV7-NEXT:    umull r2, r6, r4, r8
+; THUMBV7-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r7, [sp, #92]
+; THUMBV7-NEXT:    umull r10, r8, r0, lr
+; THUMBV7-NEXT:    umull r4, r9, r7, r12
+; THUMBV7-NEXT:    str r4, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r4, r0, r12, lr
+; THUMBV7-NEXT:    mov.w r12, #0
+; THUMBV7-NEXT:    umlal r5, r12, r3, lr
+; THUMBV7-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; THUMBV7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r4, r2, r2, r7
+; THUMBV7-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV7-NEXT:    str r4, [sp, #28] @ 4-byte Spill
 ; THUMBV7-NEXT:    str r2, [sp, #36] @ 4-byte Spill
-; THUMBV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT:    str r6, [sp, #28] @ 4-byte Spill
-; THUMBV7-NEXT:    movs r6, #0
-; THUMBV7-NEXT:    str.w r2, [r9]
-; THUMBV7-NEXT:    umlal r5, r6, r3, r8
+; THUMBV7-NEXT:    str.w r0, [r11]
+; THUMBV7-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
 ; THUMBV7-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT:    ldr r4, [sp] @ 4-byte Reload
-; THUMBV7-NEXT:    add r4, r2
-; THUMBV7-NEXT:    adds.w r2, r10, r4
-; THUMBV7-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT:    add r2, r0
+; THUMBV7-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; THUMBV7-NEXT:    adds.w lr, r0, r2
 ; THUMBV7-NEXT:    mov.w r2, #0
-; THUMBV7-NEXT:    adc r2, r2, #0
-; THUMBV7-NEXT:    cmp.w r12, #0
-; THUMBV7-NEXT:    str r2, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w r12, #1
+; THUMBV7-NEXT:    adc r0, r2, #0
+; THUMBV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT:    add.w r4, r10, r0
+; THUMBV7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT:    adds r4, r4, r0
+; THUMBV7-NEXT:    adc r0, r2, #0
+; THUMBV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; THUMBV7-NEXT:    adds.w r10, r2, r0
+; THUMBV7-NEXT:    mov r2, r3
+; THUMBV7-NEXT:    adc.w r0, r4, lr
+; THUMBV7-NEXT:    ldr.w lr, [sp, #100]
 ; THUMBV7-NEXT:    cmp r1, #0
-; THUMBV7-NEXT:    ldr r2, [sp, #96]
+; THUMBV7-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r1, #1
-; THUMBV7-NEXT:    orrs.w r10, r7, r0
-; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w r10, #1
-; THUMBV7-NEXT:    orrs.w r7, r2, lr
-; THUMBV7-NEXT:    ldr r2, [sp, #92]
+; THUMBV7-NEXT:    cmp r3, #0
+; THUMBV7-NEXT:    mov r0, lr
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r7, #1
-; THUMBV7-NEXT:    cmp r0, #0
+; THUMBV7-NEXT:    movne r2, #1
+; THUMBV7-NEXT:    cmp.w lr, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r0, #1
-; THUMBV7-NEXT:    cmp r2, #0
-; THUMBV7-NEXT:    mov r4, r2
-; THUMBV7-NEXT:    mov r8, r2
-; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r4, #1
-; THUMBV7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV7-NEXT:    ands r0, r4
-; THUMBV7-NEXT:    movs r4, #0
-; THUMBV7-NEXT:    adds r5, r5, r2
-; THUMBV7-NEXT:    str.w r5, [r9, #4]
-; THUMBV7-NEXT:    orr.w r0, r0, r1
-; THUMBV7-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; THUMBV7-NEXT:    and.w r5, r10, r7
-; THUMBV7-NEXT:    orr.w r0, r0, r12
-; THUMBV7-NEXT:    mov.w r12, #0
-; THUMBV7-NEXT:    add r1, r2
-; THUMBV7-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV7-NEXT:    adcs r2, r6
-; THUMBV7-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT:    adc r7, r4, #0
-; THUMBV7-NEXT:    adds r1, r1, r6
-; THUMBV7-NEXT:    umlal r2, r7, r3, r8
-; THUMBV7-NEXT:    adc r4, r4, #0
-; THUMBV7-NEXT:    orrs r0, r4
-; THUMBV7-NEXT:    orrs r0, r5
-; THUMBV7-NEXT:    ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload
+; THUMBV7-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
+; THUMBV7-NEXT:    ands r0, r2
+; THUMBV7-NEXT:    orrs r1, r0
 ; THUMBV7-NEXT:    adds r5, r5, r4
-; THUMBV7-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT:    adcs r1, r4
-; THUMBV7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
-; THUMBV7-NEXT:    cmp r4, #0
+; THUMBV7-NEXT:    str.w r5, [r11, #4]
+; THUMBV7-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV7-NEXT:    mov.w r5, #0
+; THUMBV7-NEXT:    adcs.w r0, r0, r12
+; THUMBV7-NEXT:    adc r2, r5, #0
+; THUMBV7-NEXT:    cmp r6, #0
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r4, #1
-; THUMBV7-NEXT:    cmp r3, #0
+; THUMBV7-NEXT:    movne r6, #1
+; THUMBV7-NEXT:    orrs r1, r6
+; THUMBV7-NEXT:    ldr r6, [sp, #84]
+; THUMBV7-NEXT:    umlal r0, r2, r3, r7
+; THUMBV7-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; THUMBV7-NEXT:    cmp r7, #0
+; THUMBV7-NEXT:    it ne
+; THUMBV7-NEXT:    movne r7, #1
+; THUMBV7-NEXT:    orrs r1, r3
+; THUMBV7-NEXT:    mov r3, r6
+; THUMBV7-NEXT:    cmp r6, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r3, #1
-; THUMBV7-NEXT:    cmp.w lr, #0
+; THUMBV7-NEXT:    cmp.w r8, #0
+; THUMBV7-NEXT:    and.w r3, r3, r7
+; THUMBV7-NEXT:    ldr r7, [sp, #80]
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w lr, #1
-; THUMBV7-NEXT:    cmp.w r11, #0
+; THUMBV7-NEXT:    movne.w r8, #1
+; THUMBV7-NEXT:    cmp.w r9, #0
+; THUMBV7-NEXT:    it ne
+; THUMBV7-NEXT:    movne.w r9, #1
+; THUMBV7-NEXT:    orrs r7, r6
+; THUMBV7-NEXT:    ldr r6, [sp, #96]
+; THUMBV7-NEXT:    it ne
+; THUMBV7-NEXT:    movne r7, #1
+; THUMBV7-NEXT:    orr.w r3, r3, r8
+; THUMBV7-NEXT:    orrs.w r6, r6, lr
+; THUMBV7-NEXT:    orr.w r3, r3, r9
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w r11, #1
-; THUMBV7-NEXT:    adds r2, r2, r5
-; THUMBV7-NEXT:    and.w r3, r3, lr
-; THUMBV7-NEXT:    str.w r2, [r9, #8]
-; THUMBV7-NEXT:    adcs r1, r7
-; THUMBV7-NEXT:    str.w r1, [r9, #12]
-; THUMBV7-NEXT:    orr.w r1, r3, r11
-; THUMBV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT:    orr.w r1, r1, r4
-; THUMBV7-NEXT:    orr.w r1, r1, r2
+; THUMBV7-NEXT:    movne r6, #1
+; THUMBV7-NEXT:    adds.w r0, r0, r10
+; THUMBV7-NEXT:    str.w r0, [r11, #8]
+; THUMBV7-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT:    adcs r0, r2
+; THUMBV7-NEXT:    str.w r0, [r11, #12]
+; THUMBV7-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV7-NEXT:    and.w r2, r7, r6
+; THUMBV7-NEXT:    orr.w r0, r0, r3
+; THUMBV7-NEXT:    orr.w r0, r0, r2
 ; THUMBV7-NEXT:    orr.w r0, r0, r1
-; THUMBV7-NEXT:    adc r1, r12, #0
+; THUMBV7-NEXT:    adc r1, r5, #0
 ; THUMBV7-NEXT:    orrs r0, r1
 ; THUMBV7-NEXT:    and r0, r0, #1
-; THUMBV7-NEXT:    strb.w r0, [r9, #16]
+; THUMBV7-NEXT:    strb.w r0, [r11, #16]
 ; THUMBV7-NEXT:    add sp, #44
 ; THUMBV7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 start:

diff  --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index 55e917159fce9b..161adf7e7d7639 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -4,32 +4,33 @@
 define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
 ; THUMBV7-LABEL: mulodi_test:
 ; THUMBV7:       @ %bb.0: @ %start
-; THUMBV7-NEXT:    .save {r4, r5, r7, lr}
-; THUMBV7-NEXT:    push {r4, r5, r7, lr}
+; THUMBV7-NEXT:    .save {r4, r5, r6, lr}
+; THUMBV7-NEXT:    push {r4, r5, r6, lr}
 ; THUMBV7-NEXT:    umull r12, lr, r3, r0
+; THUMBV7-NEXT:    movs r6, #0
+; THUMBV7-NEXT:    umull r4, r5, r1, r2
+; THUMBV7-NEXT:    umull r0, r2, r0, r2
+; THUMBV7-NEXT:    add r4, r12
+; THUMBV7-NEXT:    adds.w r12, r2, r4
+; THUMBV7-NEXT:    adc r2, r6, #0
 ; THUMBV7-NEXT:    cmp r3, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r3, #1
 ; THUMBV7-NEXT:    cmp r1, #0
-; THUMBV7-NEXT:    umull r0, r4, r0, r2
-; THUMBV7-NEXT:    umull r2, r5, r1, r2
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r1, #1
-; THUMBV7-NEXT:    ands r1, r3
 ; THUMBV7-NEXT:    cmp r5, #0
+; THUMBV7-NEXT:    and.w r1, r1, r3
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r5, #1
 ; THUMBV7-NEXT:    orrs r1, r5
 ; THUMBV7-NEXT:    cmp.w lr, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne.w lr, #1
-; THUMBV7-NEXT:    orr.w r3, r1, lr
-; THUMBV7-NEXT:    add.w r1, r2, r12
-; THUMBV7-NEXT:    movs r2, #0
-; THUMBV7-NEXT:    adds r1, r1, r4
-; THUMBV7-NEXT:    adc r2, r2, #0
-; THUMBV7-NEXT:    orrs r2, r3
-; THUMBV7-NEXT:    pop {r4, r5, r7, pc}
+; THUMBV7-NEXT:    orr.w r1, r1, lr
+; THUMBV7-NEXT:    orrs r2, r1
+; THUMBV7-NEXT:    mov r1, r12
+; THUMBV7-NEXT:    pop {r4, r5, r6, pc}
 start:
   %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
   %1 = extractvalue { i64, i1 } %0, 0


        


More information about the llvm-commits mailing list