[llvm] 2a5e1da - Revert "[ARM] Stop gluing ALU nodes to branches / selects" (#118232)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 1 14:02:31 PST 2024
Author: Martin Storsjö
Date: 2024-12-02T00:02:25+02:00
New Revision: 2a5e1da57a42fa2fc081bbc11970871a1eecb3b3
URL: https://github.com/llvm/llvm-project/commit/2a5e1da57a42fa2fc081bbc11970871a1eecb3b3
DIFF: https://github.com/llvm/llvm-project/commit/2a5e1da57a42fa2fc081bbc11970871a1eecb3b3.diff
LOG: Revert "[ARM] Stop gluing ALU nodes to branches / selects" (#118232)
Reverts llvm/llvm-project#116970.
This change broke Wine compiled for armv7, causing segfaults when
starting Wine. See llvm/llvm-project#116970 for more detailed discussion
about the issue.
Added:
Modified:
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrFormats.td
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb.td
llvm/lib/Target/ARM/ARMInstrThumb2.td
llvm/lib/Target/ARM/ARMInstrVFP.td
llvm/test/CodeGen/ARM/add-like-or.ll
llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
llvm/test/CodeGen/ARM/atomic-64bit.ll
llvm/test/CodeGen/ARM/atomic-ops-v8.ll
llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
llvm/test/CodeGen/ARM/bfi.ll
llvm/test/CodeGen/ARM/cmov_fp16.ll
llvm/test/CodeGen/ARM/cse-call.ll
llvm/test/CodeGen/ARM/cttz.ll
llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
llvm/test/CodeGen/ARM/fcmp-xo.ll
llvm/test/CodeGen/ARM/fpclamptosat.ll
llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
llvm/test/CodeGen/ARM/fpscr-multi-use.ll
llvm/test/CodeGen/ARM/fptoi-sat-store.ll
llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
llvm/test/CodeGen/ARM/funnel-shift-rot.ll
llvm/test/CodeGen/ARM/funnel-shift.ll
llvm/test/CodeGen/ARM/ifcvt1.ll
llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
llvm/test/CodeGen/ARM/neon_vabd.ll
llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
llvm/test/CodeGen/ARM/sadd_sat.ll
llvm/test/CodeGen/ARM/sadd_sat_plus.ll
llvm/test/CodeGen/ARM/select.ll
llvm/test/CodeGen/ARM/select_const.ll
llvm/test/CodeGen/ARM/shift-i64.ll
llvm/test/CodeGen/ARM/ssub_sat.ll
llvm/test/CodeGen/ARM/ssub_sat_plus.ll
llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
llvm/test/CodeGen/ARM/uadd_sat.ll
llvm/test/CodeGen/ARM/uadd_sat_plus.ll
llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
llvm/test/CodeGen/ARM/usub_sat.ll
llvm/test/CodeGen/ARM/usub_sat_plus.ll
llvm/test/CodeGen/ARM/vselect_imax.ll
llvm/test/CodeGen/ARM/wide-compares.ll
llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
llvm/test/CodeGen/Thumb/select.ll
llvm/test/CodeGen/Thumb/smul_fix_sat.ll
llvm/test/CodeGen/Thumb/stack-guard-xo.ll
llvm/test/CodeGen/Thumb/umul_fix_sat.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
llvm/test/CodeGen/Thumb2/float-ops.ll
llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/Thumb2/mve-fmas.ll
llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
llvm/test/CodeGen/Thumb2/mve-pred-or.ll
llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index fd024ffdeefde4..73ee8cf81adcd6 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -111,6 +111,13 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+ bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
+ const ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
+ Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
+ return true;
+ }
+
bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
@@ -4116,15 +4123,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- SDValue Flags = N->getOperand(3);
+ SDValue N3 = N->getOperand(3);
+ SDValue InGlue = N->getOperand(4);
assert(N1.getOpcode() == ISD::BasicBlock);
assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned)N2->getAsZExtVal();
- if (Flags.getOpcode() == ARMISD::CMPZ) {
- if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
- SDValue Int = Flags.getOperand(0);
+ if (InGlue.getOpcode() == ARMISD::CMPZ) {
+ if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ SDValue Int = InGlue.getOperand(0);
uint64_t ID = Int->getConstantOperandVal(1);
// Handle low-overhead loops.
@@ -4146,15 +4155,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ReplaceUses(N, LoopEnd);
CurDAG->RemoveDeadNode(N);
- CurDAG->RemoveDeadNode(Flags.getNode());
+ CurDAG->RemoveDeadNode(InGlue.getNode());
CurDAG->RemoveDeadNode(Int.getNode());
return;
}
}
bool SwitchEQNEToPLMI;
- SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
- Flags = N->getOperand(3);
+ SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
+ InGlue = N->getOperand(4);
if (SwitchEQNEToPLMI) {
switch ((ARMCC::CondCodes)CC) {
@@ -4170,18 +4179,25 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
- SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
- Chain.getValue(1)};
- CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ MVT::Glue, Ops);
+ Chain = SDValue(ResNode, 0);
+ if (N->getNumValues() == 2) {
+ InGlue = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InGlue);
+ }
+ ReplaceUses(SDValue(N, 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
+ CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::CMPZ: {
// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
// This allows us to avoid materializing the expensive negative constant.
- // The CMPZ #0 is useless and will be peepholed away but we need to keep
- // it for its flags output.
+ // The CMPZ #0 is useless and will be peepholed away but we need to keep it
+ // for its glue output.
SDValue X = N->getOperand(0);
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
@@ -4208,7 +4224,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
- CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
+ CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
}
}
// Other cases are autogenerated.
@@ -4216,11 +4232,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV: {
- SDValue Flags = N->getOperand(3);
+ SDValue InGlue = N->getOperand(4);
- if (Flags.getOpcode() == ARMISD::CMPZ) {
+ if (InGlue.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
- SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
+ SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
if (SwitchEQNEToPLMI) {
SDValue ARMcc = N->getOperand(2);
@@ -4237,9 +4253,10 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
- N->getOperand(3)};
+ N->getOperand(3), N->getOperand(4)};
CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
}
+
}
// Other cases are autogenerated.
break;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c9250e4ed3422c..6b290135c5bcba 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4924,11 +4924,14 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
LHS.getConstantOperandVal(1) < 31) {
unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
- SDValue Shift =
- DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
- LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
+ SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ LHS.getOperand(0),
+ DAG.getConstant(ShiftAmt, dl, MVT::i32));
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Shift.getValue(1), SDValue());
ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
- return Shift.getValue(1);
+ return Chain.getValue(1);
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
@@ -4960,7 +4963,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
@@ -4975,7 +4978,24 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
else
Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
FlagsVT, LHS);
- return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ SDLoc DL(Cmp);
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ SDValue Flags = Cmp.getOperand(0);
+ assert((Flags.getOpcode() == ARMISD::CMPFP ||
+ Flags.getOpcode() == ARMISD::CMPFPw0) &&
+ "unexpected operand of FMSTAT");
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags);
}
// This function returns three things: the arithmetic computation itself
@@ -5003,7 +5023,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
case ISD::SADDO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
break;
case ISD::UADDO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
@@ -5012,17 +5032,17 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ARMISD::ADDC, dl,
DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
.getValue(0);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
break;
case ISD::SSUBO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
break;
case ISD::USUBO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
break;
case ISD::UMULO:
// We generate a UMUL_LOHI and then check if the high word is 0.
@@ -5030,7 +5050,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
@@ -5041,7 +5061,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Value.getValue(0),
DAG.getConstant(31, dl, MVT::i32)));
@@ -5061,14 +5081,15 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDLoc dl(Op);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
EVT VT = Op.getValueType();
- SDValue Overflow =
- DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
+ SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
+ ARMcc, CCR, OverflowCmp);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -5205,9 +5226,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
- return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
+ return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
+ OverflowCmp, DAG);
}
// Convert:
@@ -5235,9 +5258,14 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
False = SelectTrue;
}
- if (True.getNode() && False.getNode())
- return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
- Cond.getOperand(3), DAG);
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Op.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+ assert(True.getValueType() == VT);
+ return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
+ }
}
}
@@ -5302,8 +5330,8 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
- SDValue TrueVal, SDValue ARMcc,
- SDValue Flags, SelectionDAG &DAG) const {
+ SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+ SDValue Cmp, SelectionDAG &DAG) const {
if (!Subtarget->hasFP64() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
@@ -5316,13 +5344,15 @@ SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue FalseHigh = FalseVal.getValue(1);
SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
- ARMcc, Flags);
+ ARMcc, CCR, Cmp);
SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
- ARMcc, Flags);
+ ARMcc, CCR, duplicateCmp(Cmp, DAG));
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+ } else {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
}
static bool isGTorGE(ISD::CondCode CC) {
@@ -5595,11 +5625,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
// Choose GE over PL, which vsel does now support
if (ARMcc->getAsZExtVal() == ARMCC::PL)
ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
- return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -5629,10 +5660,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
- SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
- Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
@@ -5733,8 +5767,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
bitcastf32Toi32(RHS, DAG), Mask);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
- Cmp);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
}
SDValue LHS1, LHS2;
@@ -5745,8 +5780,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
- return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
}
return SDValue();
@@ -5780,8 +5816,9 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
CondCode = ARMCC::getOppositeCondition(CondCode);
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
OverflowCmp);
}
@@ -5833,15 +5870,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
CondCode = ARMCC::getOppositeCondition(CondCode);
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
}
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
OverflowCmp);
}
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
}
if (getTargetMachine().Options.UnsafeFPMath &&
@@ -5856,12 +5896,14 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
- SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
- SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
- SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
- Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
}
return Res;
}
@@ -6366,6 +6408,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -6380,8 +6423,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Lo =
- DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
+ ARMcc, CCR, CmpLo);
SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue HiBigShift = Opc == ISD::SRA
@@ -6390,8 +6433,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
: DAG.getConstant(0, dl, VT);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Hi =
- DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+ ARMcc, CCR, CmpHi);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -6409,6 +6452,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -6422,14 +6466,14 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Hi =
- DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+ ARMcc, CCR, CmpHi);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
- DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
+ DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -7016,8 +7060,11 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
SDValue ARMcc = DAG.getConstant(
IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
+ Cmp.getValue(1), SDValue());
return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
- Cmp.getValue(1));
+ CCR, Chain.getValue(1));
}
/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
@@ -10566,14 +10613,21 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
+ // in CMPFP and CMPFPE, but instead it should be made explicit by these
+ // instructions using a chain instead of glue. This would also fix the problem
+ // here (and also in LowerSELECT_CC) where we generate two comparisons when
+ // CondCode2 != AL.
SDValue True = DAG.getConstant(1, dl, VT);
SDValue False = DAG.getConstant(0, dl, VT);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
- SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
+ SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
- Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
+ Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
+ Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
}
return DAG.getMergeValues({Result, Chain}, dl);
}
@@ -15003,7 +15057,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
}
// Check that N is CMPZ(CSINC(0, 0, CC, X)),
-// or CMPZ(CMOV(1, 0, CC, X))
+// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
// return X if valid.
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
@@ -15027,22 +15081,22 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
- return CSInc.getOperand(3);
+ return CSInc.getOperand(4);
}
if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
CC = ARMCC::getOppositeCondition(
(ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
- return CSInc.getOperand(3);
+ return CSInc.getOperand(4);
}
return SDValue();
}
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
// Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
- // t92: flags = ARMISD::CMPZ t74, 0
+ // t92: glue = ARMISD::CMPZ t74, 0
// t93: i32 = ARMISD::CSINC 0, 0, 1, t92
- // t96: flags = ARMISD::CMPZ t93, 0
+ // t96: glue = ARMISD::CMPZ t93, 0
// t114: i32 = ARMISD::CSINV 0, 0, 0, t96
ARMCC::CondCodes Cond;
if (SDValue C = IsCMPZCSINC(N, Cond))
@@ -18133,7 +18187,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDValue Op0 = CMOV->getOperand(0);
SDValue Op1 = CMOV->getOperand(1);
auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
- SDValue CmpZ = CMOV->getOperand(3);
+ SDValue CmpZ = CMOV->getOperand(4);
// The compare must be against zero.
if (!isNullConstant(CmpZ->getOperand(1)))
@@ -18377,11 +18431,12 @@ static SDValue PerformHWLoopCombine(SDNode *N,
/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
SDValue
ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
- SDValue Cmp = N->getOperand(3);
+ SDValue Cmp = N->getOperand(4);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at NE cases.
return SDValue();
+ EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
@@ -18390,17 +18445,17 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
SDValue ARMcc = N->getOperand(2);
ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
- // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
- // -> (brcond Chain BB CC Flags)
+ // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
+ // -> (brcond Chain BB CC CPSR Cmp)
if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
LHS->getOperand(0)->hasOneUse() &&
isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
- LHS->getOperand(0)->getOperand(2),
- LHS->getOperand(0)->getOperand(3));
+ return DAG.getNode(
+ ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
+ LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
}
return SDValue();
@@ -18409,7 +18464,7 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
- SDValue Cmp = N->getOperand(3);
+ SDValue Cmp = N->getOperand(4);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at EQ and NE cases.
return SDValue();
@@ -18449,38 +18504,42 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
/// FIXME: Turn this into a target neutral optimization?
SDValue Res;
if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
- Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+ N->getOperand(3), Cmp);
} else if (CC == ARMCC::EQ && TrueVal == RHS) {
SDValue ARMcc;
SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
- Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+ N->getOperand(3), NewCmp);
}
- // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
- // -> (cmov F T CC Flags)
+ // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
+ // -> (cmov F T CC CPSR Cmp)
if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
isNullConstant(RHS)) {
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- LHS->getOperand(2), LHS->getOperand(3));
+ LHS->getOperand(2), LHS->getOperand(3),
+ LHS->getOperand(4));
}
if (!VT.isInteger())
return SDValue();
// Fold away an unneccessary CMPZ/CMOV
- // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
- // if C1==EQ -> CMOV A, B, C2, D
- // if C1==NE -> CMOV A, B, NOT(C2), D
+ // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
+ // if C1==EQ -> CMOV A, B, C2, $cpsr, D
+ // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
if (N->getConstantOperandVal(2) == ARMCC::EQ ||
N->getConstantOperandVal(2) == ARMCC::NE) {
ARMCC::CondCodes Cond;
- if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
+ if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
if (N->getConstantOperandVal(2) == ARMCC::NE)
Cond = ARMCC::getOppositeCondition(Cond);
return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
N->getOperand(1),
- DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
+ DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
+ N->getOperand(3), C);
}
}
@@ -18520,8 +18579,10 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
SDValue Sub =
DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+ SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
- Sub.getValue(1));
+ N->getOperand(3), CPSRGlue.getValue(1));
FalseVal = Sub;
}
} else if (isNullConstant(TrueVal)) {
@@ -18532,9 +18593,11 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
SDValue Sub =
DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+ SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
DAG.getConstant(ARMCC::NE, dl, MVT::i32),
- Sub.getValue(1));
+ N->getOperand(3), CPSRGlue.getValue(1));
FalseVal = Sub;
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 4fa600e0cfcc40..344a0ad91e5178 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -978,11 +978,13 @@ class VectorType;
bool isUnsupportedFloatingType(EVT VT) const;
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
- SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const;
+ SDValue ARMcc, SDValue CCR, SDValue Cmp,
+ SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl, bool Signaling = false) const;
+ SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 041601748b1f73..d0678f378da1ea 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -167,6 +167,16 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
let DecoderMethod = "DecodePredicateOperand";
}
+// Selectable predicate operand for CMOV instructions. We can't use a normal
+// predicate because the default values interfere with instruction selection. In
+// all other respects it is identical though: pseudo-instruction expansion
+// relies on the MachineOperands being compatible.
+def cmovpred : Operand<i32>, PredicateOp,
+ ComplexPattern<i32, 2, "SelectCMOVPred"> {
+ let MIOperandInfo = (ops i32imm, i32imm);
+ let PrintMethod = "printPredicateOperand";
+}
+
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
def CCOutOperand : AsmOperandClass {
let Name = "CCOut";
@@ -1124,9 +1134,6 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
-class ARMV6T2Pat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsARM, HasV6T2];
-}
class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [HasVFP2];
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 718cb964ab7c3b..db38b43279b866 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -14,9 +14,6 @@
// ARM specific DAG Nodes.
//
-/// Value type used for "condition code" operands.
-defvar CondCodeVT = i32;
-
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
defvar FlagsVT = i32;
@@ -32,19 +29,12 @@ def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_ARMCMov : SDTypeProfile<1, 4, [
- /* any */ // result
- SDTCisSameAs<1, 0>, // value on false
- SDTCisSameAs<2, 0>, // value on true
- SDTCisVT<3, CondCodeVT>, // condition code
- SDTCisVT<4, FlagsVT>, // in flags
-]>;
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
-def SDT_ARMBrcond : SDTypeProfile<0, 2, [
- SDTCisVT<0, OtherVT>, // target basic block
- SDTCisVT<1, CondCodeVT>, // condition code
- SDTCisVT<2, FlagsVT>, // in flags
-]>;
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
def SDT_ARMBrJT : SDTypeProfile<0, 2,
[SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -63,11 +53,7 @@ def SDT_ARMAnd : SDTypeProfile<1, 2,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>]>;
-def SDT_ARMCmp : SDTypeProfile<1, 2, [
- SDTCisVT<0, FlagsVT>, // out flags
- SDTCisInt<1>, // lhs
- SDTCisSameAs<2, 1> // rhs
-]>;
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
@@ -138,17 +124,15 @@ def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
-def SDT_ARMCSel : SDTypeProfile<1, 4, [
- /* any */ // result
- SDTCisSameAs<1, 0>, // lhs
- SDTCisSameAs<2, 0>, // rhs
- SDTCisVT<3, CondCodeVT>, // condition code
- SDTCisVT<3, FlagsVT> // in flags
-]>;
+def SDT_ARMCSel : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<3>,
+ SDTCisVT<3, i32>]>;
-def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>;
-def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>;
-def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>;
+def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel, [SDNPOptInGlue]>;
+def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel, [SDNPOptInGlue]>;
+def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel, [SDNPOptInGlue]>;
def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>,
SDTCisSameAs<0, 1>,
@@ -189,13 +173,15 @@ def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>;
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInGlue]>;
def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
-def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>;
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
[SDNPHasChain]>;
@@ -205,11 +191,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
-def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp>;
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutGlue]>;
-def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp>;
+def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp,
+ [SDNPOutGlue]>;
-def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>;
+def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+ [SDNPOutGlue, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
@@ -1787,7 +1776,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
string rrDecoderMethod = ""> {
def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
- [(set CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>,
+ [(opnode GPR:$Rn, mod_imm:$imm)]>,
Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
@@ -1801,7 +1790,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
}
def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
opc, "\t$Rn, $Rm",
- [(set CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+ [(opnode GPR:$Rn, GPR:$Rm)]>,
Sched<[WriteCMP, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rm;
@@ -1819,7 +1808,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
def rsi : AI1<opcod, (outs),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
opc, "\t$Rn, $shift",
- [(set CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
Sched<[WriteCMPsi, ReadALU]> {
bits<4> Rn;
bits<12> shift;
@@ -1836,7 +1825,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
def rsr : AI1<opcod, (outs),
(ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
opc, "\t$Rn, $shift",
- [(set CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift))]>,
+ [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
Sched<[WriteCMPsr, ReadALU]> {
bits<4> Rn;
bits<12> shift;
@@ -4954,7 +4943,7 @@ def : ARMPat<(ARMcmpZ so_reg_reg:$rhs, 0),
let isCompare = 1, Defs = [CPSR] in {
def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
"cmn", "\t$Rn, $imm",
- [(set CPSR, (ARMcmn GPR:$Rn, mod_imm:$imm))]>,
+ [(ARMcmn GPR:$Rn, mod_imm:$imm)]>,
Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
@@ -4970,8 +4959,8 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
// CMN register-register/shift
def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
"cmn", "\t$Rn, $Rm",
- [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPR:$Rn, GPR:$Rm))]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rm;
let isCommutable = 1;
@@ -4988,8 +4977,8 @@ def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
def CMNzrsi : AI1<0b1011, (outs),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
"cmn", "\t$Rn, $shift",
- [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPR:$Rn, so_reg_imm:$shift))]>,
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, so_reg_imm:$shift)]>,
Sched<[WriteCMPsi, ReadALU]> {
bits<4> Rn;
bits<12> shift;
@@ -5007,8 +4996,8 @@ def CMNzrsi : AI1<0b1011, (outs),
def CMNzrsr : AI1<0b1011, (outs),
(ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
"cmn", "\t$Rn, $shift",
- [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, so_reg_reg:$shift))]>,
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, so_reg_reg:$shift)]>,
Sched<[WriteCMPsr, ReadALU]> {
bits<4> Rn;
bits<12> shift;
@@ -5063,74 +5052,65 @@ let hasSideEffects = 0 in {
let isCommutable = 1, isSelect = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, GPR:$Rm, pred:$p),
- 4, IIC_iCMOVr, []>,
+ (ins GPR:$false, GPR:$Rm, cmovpred:$p),
+ 4, IIC_iCMOVr,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_imm:$shift, pred:$p),
- 4, IIC_iCMOVsr, []>,
+ (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd,
+ (ARMcmov GPR:$false, so_reg_imm:$shift,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_reg:$shift, pred:$p),
- 4, IIC_iCMOVsr, []>,
+ (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
def MOVCCi16
: ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
- 4, IIC_iMOVi, []>,
+ (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
Sched<[WriteALU]>;
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, mod_imm:$imm, pred:$p),
- 4, IIC_iCMOVi, []>,
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
// Two instruction predicate mov immediate.
let isMoveImm = 1 in
def MOVCCi32imm
: ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm:$src, pred:$p),
- 8, IIC_iCMOVix2, []>,
+ (ins GPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
let isMoveImm = 1 in
def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, mod_imm:$imm, pred:$p),
- 4, IIC_iCMOVi, []>,
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
} // hasSideEffects
-// The following patterns have to be defined out-of-line because the number
-// of instruction operands does not match the number of SDNode operands
-// (`pred` counts as one operand).
-
-def : ARMPat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR),
- (MOVCCr $false, $Rm, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, so_reg_imm:$shift, imm:$cc, CPSR),
- (MOVCCsi $false, so_reg_imm:$shift, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, so_reg_reg:$shift, imm:$cc, CPSR),
- (MOVCCsr $false, so_reg_reg:$shift, imm:$cc, CPSR)>;
-
-def : ARMV6T2Pat<(ARMcmov i32:$false, imm0_65535:$imm, imm:$cc, CPSR),
- (MOVCCi16 $false, imm0_65535:$imm, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, mod_imm:$imm, imm:$cc, CPSR),
- (MOVCCi $false, mod_imm:$imm, imm:$cc, CPSR)>;
-
-def : ARMPat<(ARMcmov i32:$false, mod_imm_not:$imm, imm:$cc, CPSR),
- (MVNCCi $false, mod_imm_not:$imm, imm:$cc, CPSR)>;
-
-def : ARMV6T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR),
- (MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>;
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index cc7fc743fe4f92..b92f42874bbddb 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -23,7 +23,8 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
}]>;
def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; }
-def imm_sr : Operand<i32>, ImmLeaf<i32, [{
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
}], imm_sr_XFORM> {
let PrintMethod = "printThumbSRImm";
@@ -1107,14 +1108,13 @@ let isCompare = 1, Defs = [CPSR] in {
// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
// IIC_iCMPr,
// "cmn", "\t$lhs, $rhs",
-// [(set CPSR, (ARMcmp tGPR:$lhs, (ineg tGPR:$rhs)))]>;
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
def tCMNz : // A8.6.33
T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iCMPr,
"cmn", "\t$Rn, $Rm",
- [(set CPSR, (ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm)))]>,
- Sched<[WriteCMP]>;
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>;
} // isCompare = 1, Defs = [CPSR]
@@ -1122,7 +1122,7 @@ def tCMNz : // A8.6.33
let isCompare = 1, Defs = [CPSR] in {
def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
"cmp", "\t$Rn, $imm8",
- [(set CPSR, (ARMcmp tGPR:$Rn, imm0_255:$imm8))]>,
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> {
// A8.6.35
bits<3> Rn;
@@ -1136,7 +1136,7 @@ def tCMPr : // A8.6.36 T1
T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iCMPr,
"cmp", "\t$Rn, $Rm",
- [(set CPSR, (ARMcmp tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteCMP]>;
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>;
def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
"cmp", "\t$Rn, $Rm", []>,
@@ -1423,7 +1423,7 @@ let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
def tTST : // A8.6.230
T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
"tst", "\t$Rn, $Rm",
- [(set CPSR, (ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0))]>,
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
Sched<[WriteALU]>;
// A8.8.247 UDF - Undefined (Encoding T1)
@@ -1466,11 +1466,9 @@ def tUXTH : // A8.6.264
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
- PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$p),
- NoItinerary, []>;
-
-def : Pat<(ARMcmov tGPR:$false, tGPR:$true, imm:$cc, CPSR),
- (tMOVCCr_pseudo $false, $true, imm:$cc, CPSR)>;
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p),
+ NoItinerary,
+ [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>;
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 99617e53d657a9..aa5c0a58897688 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1144,8 +1144,7 @@ let isCompare = 1, Defs = [CPSR] in {
def ri : T2OneRegCmpImm<
(outs), (ins LHSGPR:$Rn, t2_so_imm:$imm), iii,
opc, ".w\t$Rn, $imm",
- [(set CPSR, (opnode LHSGPR:$Rn, t2_so_imm:$imm))]>,
- Sched<[WriteCMP]> {
+ [(opnode LHSGPR:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -1157,8 +1156,7 @@ let isCompare = 1, Defs = [CPSR] in {
def rr : T2TwoRegCmp<
(outs), (ins LHSGPR:$Rn, rGPR:$Rm), iir,
opc, ".w\t$Rn, $Rm",
- [(set CPSR, (opnode LHSGPR:$Rn, rGPR:$Rm))]>,
- Sched<[WriteCMP]> {
+ [(opnode LHSGPR:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -1172,7 +1170,7 @@ let isCompare = 1, Defs = [CPSR] in {
def rs : T2OneRegCmpShiftedReg<
(outs), (ins LHSGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rn, $ShiftedRm",
- [(set CPSR, (opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ [(opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm)]>,
Sched<[WriteCMPsi]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -3479,7 +3477,7 @@ let isCompare = 1, Defs = [CPSR] in {
def t2CMNri : T2OneRegCmpImm<
(outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
"cmn", ".w\t$Rn, $imm",
- [(set CPSR, (ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm)))]>,
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>,
Sched<[WriteCMP, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -3492,9 +3490,8 @@ let isCompare = 1, Defs = [CPSR] in {
def t2CMNzrr : T2TwoRegCmp<
(outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
"cmn", ".w\t$Rn, $Rm",
- [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, rGPR:$Rm))]>,
- Sched<[WriteCMP, ReadALU, ReadALU]> {
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1000;
@@ -3508,8 +3505,8 @@ let isCompare = 1, Defs = [CPSR] in {
def t2CMNzrs : T2OneRegCmpShiftedReg<
(outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
"cmn", ".w\t$Rn, $ShiftedRm",
- [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>,
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
Sched<[WriteCMPsi, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -3545,84 +3542,67 @@ let hasSideEffects = 0 in {
let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, pred:$p),
- 4, IIC_iCMOVr, []>,
+ (ins rGPR:$false, rGPR:$Rm, cmovpred:$p),
+ 4, IIC_iCMOVr,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
def t2MOVCCi
: t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
- 4, IIC_iCMOVi, []>,
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isCodeGenOnly = 1 in {
let isMoveImm = 1 in
def t2MOVCCi16
: t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, imm0_65535_expr:$imm, pred:$p),
- 4, IIC_iCMOVi, []>,
+ (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
def t2MVNCCi
: t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
- 4, IIC_iCMOVi, []>,
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd,
+ (ARMcmov rGPR:$false, t2_so_imm_not:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-class MOVCCShPseudo
+class MOVCCShPseudo<SDPatternOperator opnode, Operand ty>
: t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, pred:$p),
- 4, IIC_iCMOVsi, []>,
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVsi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,
+ (opnode rGPR:$Rm, (i32 ty:$imm)),
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-def t2MOVCClsl : MOVCCShPseudo;
-def t2MOVCClsr : MOVCCShPseudo;
-def t2MOVCCasr : MOVCCShPseudo;
-def t2MOVCCror : MOVCCShPseudo;
+def t2MOVCClsl : MOVCCShPseudo<shl, imm0_31>;
+def t2MOVCClsr : MOVCCShPseudo<srl, imm_sr>;
+def t2MOVCCasr : MOVCCShPseudo<sra, imm_sr>;
+def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>;
let isMoveImm = 1 in
def t2MOVCCi32imm
: t2PseudoInst<(outs rGPR:$dst),
- (ins rGPR:$false, i32imm:$src, pred:$p),
- 8, IIC_iCMOVix2, []>,
+ (ins rGPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src,
+ cmovpred:$p))]>,
RegConstraint<"$false = $dst">;
} // isCodeGenOnly = 1
} // hasSideEffects
-// The following patterns have to be defined out-of-line because the number
-// of instruction operands does not match the number of SDNode operands
-// (`pred` counts as one operand).
-
-def : T2Pat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR),
- (t2MOVCCr $false, $Rm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, t2_so_imm:$imm, imm:$cc, CPSR),
- (t2MOVCCi $false, t2_so_imm:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, imm0_65535:$imm, imm:$cc, CPSR),
- (t2MOVCCi16 $false, imm0_65535:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, t2_so_imm_not:$imm, imm:$cc, CPSR),
- (t2MVNCCi $false, t2_so_imm_not:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (shl i32:$Rm, imm0_31:$imm), imm:$cc, CPSR),
- (t2MOVCClsl $false, $Rm, imm0_31:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (srl i32:$Rm, imm_sr:$imm), imm:$cc, CPSR),
- (t2MOVCClsr $false, $Rm, imm_sr:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (sra i32:$Rm, imm_sr:$imm), imm:$cc, CPSR),
- (t2MOVCCasr $false, $Rm, imm_sr:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, (rotr i32:$Rm, imm0_31:$imm), imm:$cc, CPSR),
- (t2MOVCCror $false, $Rm, imm0_31:$imm, imm:$cc, CPSR)>;
-
-def : T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR),
- (t2MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>;
-
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
//
@@ -5726,53 +5706,51 @@ def t2CSINC : CS<"csinc", 0b1001>;
def t2CSINV : CS<"csinv", 0b1010>;
def t2CSNEG : CS<"csneg", 0b1011>;
-def ARMcsinc_su
- : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, node:$flags),
- (ARMcsinc node:$lhs, node:$rhs, node:$cc, node:$flags), [{
+def ARMcsinc_su : PatFrag<(ops node:$lhs, node:$rhs, node:$cond),
+ (ARMcsinc node:$lhs, node:$rhs, node:$cond), [{
return N->hasOneUse();
}]>;
let Predicates = [HasV8_1MMainline] in {
multiclass CSPats<SDNode Node, Instruction Insn> {
- def : T2Pat<(Node GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc, CPSR),
- (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
- def : T2Pat<(Node (i32 0), GPRwithZR:$fval, imm:$cc, CPSR),
- (Insn ZR, GPRwithZR:$fval, imm:$cc)>;
- def : T2Pat<(Node GPRwithZR:$tval, (i32 0), imm:$cc, CPSR),
- (Insn GPRwithZR:$tval, ZR, imm:$cc)>;
- def : T2Pat<(Node (i32 0), (i32 0), imm:$cc, CPSR),
- (Insn ZR, ZR, imm:$cc)>;
+ def : T2Pat<(Node GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm),
+ (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+ def : T2Pat<(Node (i32 0), GPRwithZR:$fval, imm0_31:$imm),
+ (Insn ZR, GPRwithZR:$fval, imm0_31:$imm)>;
+ def : T2Pat<(Node GPRwithZR:$tval, (i32 0), imm0_31:$imm),
+ (Insn GPRwithZR:$tval, ZR, imm0_31:$imm)>;
+ def : T2Pat<(Node (i32 0), (i32 0), imm0_31:$imm),
+ (Insn ZR, ZR, imm0_31:$imm)>;
}
defm : CSPats<ARMcsinc, t2CSINC>;
defm : CSPats<ARMcsinv, t2CSINV>;
defm : CSPats<ARMcsneg, t2CSNEG>;
- def : T2Pat<(ARMcmov (i32 1), (i32 0), imm:$cc, CPSR),
- (t2CSINC ZR, ZR, imm:$cc)>;
- def : T2Pat<(ARMcmov (i32 -1), (i32 0), imm:$cc, CPSR),
- (t2CSINV ZR, ZR, imm:$cc)>;
- def : T2Pat<(ARMcmov (i32 0), (i32 1), imm:$cc, CPSR),
- (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$cc))>;
- def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR),
- (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov (i32 1), (i32 0), cmovpred:$imm),
+ (t2CSINC ZR, ZR, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov (i32 -1), (i32 0), cmovpred:$imm),
+ (t2CSINV ZR, ZR, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov (i32 0), (i32 1), cmovpred:$imm),
+ (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$imm))>;
+ def : T2Pat<(ARMcmov (i32 0), (i32 -1), cmovpred:$imm),
+ (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$imm))>;
multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
- def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR),
- (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
- def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, imm:$cc, CPSR),
+ def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm),
+ (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, cmovpred:$imm),
(Insn GPRwithZR:$tval, GPRwithZR:$fval,
- (i32 (inv_cond_XFORM imm:$cc)))>;
+ (i32 (inv_cond_XFORM imm:$imm)))>;
}
defm : ModifiedV8_1CSEL<t2CSINC, (add rGPR:$fval, 1)>;
defm : ModifiedV8_1CSEL<t2CSINV, (xor rGPR:$fval, -1)>;
defm : ModifiedV8_1CSEL<t2CSNEG, (sub 0, rGPR:$fval)>;
- def : T2Pat<(ARMcmov (topbitsallzero32:$Rn), (i32 1), imm:$cc, CPSR),
- (t2CSINC $Rn, ZR, (inv_cond_XFORM imm:$cc))>;
- def : T2Pat<(and (topbitsallzero32:$Rn),
- (ARMcsinc_su (i32 0), (i32 0), imm:$cc, CPSR)),
- (t2CSEL ZR, $Rn, imm:$cc)>;
+ def : T2Pat<(ARMcmov (topbitsallzero32:$Rn), (i32 1), cmovpred:$imm),
+ (t2CSINC $Rn, ZR, (inv_cond_XFORM imm:$imm))>;
+ def : T2Pat<(and (topbitsallzero32:$Rn), (ARMcsinc_su (i32 0), (i32 0), cmovpred:$imm)),
+ (t2CSEL ZR, $Rn, $imm)>;
}
// CS aliases.
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 754517f3bc4d5b..a29753909ea992 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -34,10 +34,10 @@ def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>;
def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT",
- SDTypeProfile<1, 1, [
- SDTCisVT<0, FlagsVT>, // out flags
- SDTCisVT<1, FlagsVT> // in flags
- ]>
+ SDTypeProfile<0, 1, [
+ SDTCisVT<0, FlagsVT> // in flags
+ ]>,
+ [SDNPOutGlue] // TODO: Change Glue to a normal result.
>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
@@ -562,21 +562,19 @@ multiclass vsel_inst<string op, bits<2> opc, int CC> {
def H : AHbInp<0b11100, opc, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
- [(set (f16 HPR:$Sd),
- (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC, CPSR))]>,
+ [(set (f16 HPR:$Sd), (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC))]>,
Requires<[HasFullFP16]>;
def S : ASbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
- [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC, CPSR))]>,
+ [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
Requires<[HasFPARMv8]>;
def D : ADbInp<0b11100, opc, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
- [(set DPR:$Dd,
- (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC, CPSR))]>,
+ [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
Requires<[HasFPARMv8, HasDPVFP]>;
}
}
@@ -2463,35 +2461,25 @@ def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))
//
let hasSideEffects = 0 in {
-def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
- IIC_fpUNA64, []>,
+def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
+ IIC_fpUNA64,
+ [(set (f64 DPR:$Dd),
+ (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
-def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
- IIC_fpUNA32, []>,
+def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
+ IIC_fpUNA32,
+ [(set (f32 SPR:$Sd),
+ (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
-def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, pred:$p),
- IIC_fpUNA16, []>,
+def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
+ IIC_fpUNA16,
+ [(set (f16 HPR:$Sd),
+ (ARMcmov (f16 HPR:$Sn), (f16 HPR:$Sm), cmovpred:$p))]>,
RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>;
} // hasSideEffects
-// The following patterns have to be defined out-of-line because the number
-// of instruction operands does not match the number of SDNode operands
-// (`pred` counts as one operand).
-
-def : Pat<(ARMcmov f64:$Dn, f64:$Dm, imm:$cc, CPSR),
- (VMOVDcc $Dn, $Dm, imm:$cc, CPSR)>,
- Requires<[HasFPRegs64]>;
-
-def : Pat<(ARMcmov f32:$Sn, f32:$Sm, imm:$cc, CPSR),
- (VMOVScc $Sn, $Sm, imm:$cc, CPSR)>,
- Requires<[HasFPRegs]>;
-
-def : Pat<(ARMcmov f16:$Sn, f16:$Sm, imm:$cc, CPSR),
- (VMOVHcc $Sn, $Sm, imm:$cc, CPSR)>,
- Requires<[HasFPRegs]>; // FIXME: Shouldn't this be HasFPRegs16?
-
//===----------------------------------------------------------------------===//
// Move from VFP System Register to ARM core register.
//
@@ -2522,7 +2510,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
"vmrs", "\tAPSR_nzcv, fpscr",
- [(set CPSR, (arm_fmstat FPSCR_NZCV))]>;
+ [(arm_fmstat FPSCR_NZCV)]>;
// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index c0ddee83880410..5de03a92afeb42 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -29,8 +29,8 @@ define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) {
; CHECK-T2: @ %bb.0:
; CHECK-T2-NEXT: .save {r4, lr}
; CHECK-T2-NEXT: push {r4, lr}
-; CHECK-T2-NEXT: bic r4, r2, #3
; CHECK-T2-NEXT: lsls r0, r0, #31
+; CHECK-T2-NEXT: bic r4, r2, #3
; CHECK-T2-NEXT: it ne
; CHECK-T2-NEXT: bicne r4, r1, #6
; CHECK-T2-NEXT: mov r0, r4
@@ -144,12 +144,12 @@ define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) {
;
; CHECK-A-LABEL: test_add_i12:
; CHECK-A: @ %bb.0:
-; CHECK-A-NEXT: bfc r0, #0, #12
; CHECK-A-NEXT: bfc r1, #0, #13
+; CHECK-A-NEXT: bfc r0, #0, #12
; CHECK-A-NEXT: tst r2, #1
-; CHECK-A-NEXT: movne r1, r0
-; CHECK-A-NEXT: movw r0, #854
-; CHECK-A-NEXT: orr r0, r1, r0
+; CHECK-A-NEXT: moveq r0, r1
+; CHECK-A-NEXT: movw r1, #854
+; CHECK-A-NEXT: orr r0, r0, r1
; CHECK-A-NEXT: bx lr
%tmp = and i32 %a, -4096
%tmp1 = and i32 %b, -8192
diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
index 75416475289f31..b6adc995091cea 100644
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -1965,34 +1965,32 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3
; ARM-ENABLE-NEXT: push {r4, r7, lr}
; ARM-ENABLE-NEXT: add r7, sp, #4
-; ARM-ENABLE-NEXT: sub r4, sp, #24
+; ARM-ENABLE-NEXT: sub r4, sp, #16
; ARM-ENABLE-NEXT: bfc r4, #0, #4
; ARM-ENABLE-NEXT: mov sp, r4
; ARM-ENABLE-NEXT: ldr r1, [r7, #8]
-; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00
; ARM-ENABLE-NEXT: mov r2, r3
; ARM-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
-; ARM-ENABLE-NEXT: vmov d9, r3, r1
; ARM-ENABLE-NEXT: vmov s16, r0
; ARM-ENABLE-NEXT: mov r0, r3
+; ARM-ENABLE-NEXT: vmov d9, r3, r1
; ARM-ENABLE-NEXT: mov r3, r1
-; ARM-ENABLE-NEXT: vstr d10, [r4, #16]
-; ARM-ENABLE-NEXT: vadd.f64 d10, d9, d16
; ARM-ENABLE-NEXT: bl _pow
; ARM-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00
; ARM-ENABLE-NEXT: mov r4, sp
-; ARM-ENABLE-NEXT: vmov.f64 d17, d9
-; ARM-ENABLE-NEXT: vmov d16, r0, r1
+; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00
+; ARM-ENABLE-NEXT: vadd.f64 d16, d9, d16
; ARM-ENABLE-NEXT: vcmp.f32 s16, s0
; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr
-; ARM-ENABLE-NEXT: vadd.f64 d16, d16, d16
-; ARM-ENABLE-NEXT: vmovgt.f64 d17, d10
-; ARM-ENABLE-NEXT: vcmp.f64 d17, d9
+; ARM-ENABLE-NEXT: vmov d17, r0, r1
+; ARM-ENABLE-NEXT: vmov.f64 d18, d9
+; ARM-ENABLE-NEXT: vadd.f64 d17, d17, d17
+; ARM-ENABLE-NEXT: vmovgt.f64 d18, d16
+; ARM-ENABLE-NEXT: vcmp.f64 d18, d9
; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr
-; ARM-ENABLE-NEXT: vmovne.f64 d9, d16
+; ARM-ENABLE-NEXT: vmovne.f64 d9, d17
; ARM-ENABLE-NEXT: vcvt.f32.f64 s0, d9
; ARM-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128]
-; ARM-ENABLE-NEXT: vldr d10, [r4, #16]
; ARM-ENABLE-NEXT: sub sp, r7, #4
; ARM-ENABLE-NEXT: pop {r4, r7, lr}
; ARM-ENABLE-NEXT: vmov r0, s0
@@ -2014,33 +2012,32 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; ARM-DISABLE-NEXT: @ %bb.0: @ %bb
; ARM-DISABLE-NEXT: push {r4, r7, lr}
; ARM-DISABLE-NEXT: add r7, sp, #4
-; ARM-DISABLE-NEXT: sub r4, sp, #24
+; ARM-DISABLE-NEXT: sub r4, sp, #16
; ARM-DISABLE-NEXT: bfc r4, #0, #4
; ARM-DISABLE-NEXT: mov sp, r4
; ARM-DISABLE-NEXT: tst r2, #1
; ARM-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
-; ARM-DISABLE-NEXT: vstr d10, [r4, #16]
; ARM-DISABLE-NEXT: beq LBB12_2
; ARM-DISABLE-NEXT: @ %bb.1: @ %bb3
; ARM-DISABLE-NEXT: ldr r1, [r7, #8]
-; ARM-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00
-; ARM-DISABLE-NEXT: mov r2, r3
-; ARM-DISABLE-NEXT: vmov d9, r3, r1
; ARM-DISABLE-NEXT: vmov s16, r0
; ARM-DISABLE-NEXT: mov r0, r3
+; ARM-DISABLE-NEXT: mov r2, r3
+; ARM-DISABLE-NEXT: vmov d9, r3, r1
; ARM-DISABLE-NEXT: mov r3, r1
-; ARM-DISABLE-NEXT: vadd.f64 d10, d9, d16
; ARM-DISABLE-NEXT: bl _pow
; ARM-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00
-; ARM-DISABLE-NEXT: vmov.f64 d17, d9
-; ARM-DISABLE-NEXT: vmov d16, r0, r1
+; ARM-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00
+; ARM-DISABLE-NEXT: vadd.f64 d16, d9, d16
; ARM-DISABLE-NEXT: vcmp.f32 s16, s0
; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr
-; ARM-DISABLE-NEXT: vadd.f64 d16, d16, d16
-; ARM-DISABLE-NEXT: vmovgt.f64 d17, d10
-; ARM-DISABLE-NEXT: vcmp.f64 d17, d9
+; ARM-DISABLE-NEXT: vmov d17, r0, r1
+; ARM-DISABLE-NEXT: vmov.f64 d18, d9
+; ARM-DISABLE-NEXT: vadd.f64 d17, d17, d17
+; ARM-DISABLE-NEXT: vmovgt.f64 d18, d16
+; ARM-DISABLE-NEXT: vcmp.f64 d18, d9
; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr
-; ARM-DISABLE-NEXT: vmovne.f64 d9, d16
+; ARM-DISABLE-NEXT: vmovne.f64 d9, d17
; ARM-DISABLE-NEXT: vcvt.f32.f64 s0, d9
; ARM-DISABLE-NEXT: b LBB12_3
; ARM-DISABLE-NEXT: LBB12_2:
@@ -2049,7 +2046,6 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; ARM-DISABLE-NEXT: mov r4, sp
; ARM-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128]
; ARM-DISABLE-NEXT: vmov r0, s0
-; ARM-DISABLE-NEXT: vldr d10, [r4, #16]
; ARM-DISABLE-NEXT: sub sp, r7, #4
; ARM-DISABLE-NEXT: pop {r4, r7, pc}
; ARM-DISABLE-NEXT: .p2align 2
@@ -2068,36 +2064,34 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; THUMB-ENABLE-NEXT: @ %bb.1: @ %bb3
; THUMB-ENABLE-NEXT: push {r4, r7, lr}
; THUMB-ENABLE-NEXT: add r7, sp, #4
-; THUMB-ENABLE-NEXT: sub.w r4, sp, #24
+; THUMB-ENABLE-NEXT: sub.w r4, sp, #16
; THUMB-ENABLE-NEXT: bfc r4, #0, #4
; THUMB-ENABLE-NEXT: mov sp, r4
; THUMB-ENABLE-NEXT: ldr r1, [r7, #8]
-; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00
; THUMB-ENABLE-NEXT: mov r2, r3
; THUMB-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
-; THUMB-ENABLE-NEXT: vmov d9, r3, r1
; THUMB-ENABLE-NEXT: vmov s16, r0
; THUMB-ENABLE-NEXT: mov r0, r3
+; THUMB-ENABLE-NEXT: vmov d9, r3, r1
; THUMB-ENABLE-NEXT: mov r3, r1
-; THUMB-ENABLE-NEXT: vstr d10, [r4, #16]
-; THUMB-ENABLE-NEXT: vadd.f64 d10, d9, d16
; THUMB-ENABLE-NEXT: bl _pow
; THUMB-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00
; THUMB-ENABLE-NEXT: mov r4, sp
-; THUMB-ENABLE-NEXT: vmov.f64 d17, d9
-; THUMB-ENABLE-NEXT: vmov d16, r0, r1
+; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00
+; THUMB-ENABLE-NEXT: vmov.f64 d18, d9
; THUMB-ENABLE-NEXT: vcmp.f32 s16, s0
+; THUMB-ENABLE-NEXT: vadd.f64 d16, d9, d16
; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr
; THUMB-ENABLE-NEXT: it gt
-; THUMB-ENABLE-NEXT: vmovgt.f64 d17, d10
-; THUMB-ENABLE-NEXT: vcmp.f64 d17, d9
-; THUMB-ENABLE-NEXT: vadd.f64 d16, d16, d16
+; THUMB-ENABLE-NEXT: vmovgt.f64 d18, d16
+; THUMB-ENABLE-NEXT: vcmp.f64 d18, d9
+; THUMB-ENABLE-NEXT: vmov d17, r0, r1
; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr
+; THUMB-ENABLE-NEXT: vadd.f64 d17, d17, d17
; THUMB-ENABLE-NEXT: it ne
-; THUMB-ENABLE-NEXT: vmovne.f64 d9, d16
+; THUMB-ENABLE-NEXT: vmovne.f64 d9, d17
; THUMB-ENABLE-NEXT: vcvt.f32.f64 s0, d9
; THUMB-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128]
-; THUMB-ENABLE-NEXT: vldr d10, [r4, #16]
; THUMB-ENABLE-NEXT: subs r4, r7, #4
; THUMB-ENABLE-NEXT: mov sp, r4
; THUMB-ENABLE-NEXT: pop.w {r4, r7, lr}
@@ -2120,35 +2114,34 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; THUMB-DISABLE-NEXT: @ %bb.0: @ %bb
; THUMB-DISABLE-NEXT: push {r4, r7, lr}
; THUMB-DISABLE-NEXT: add r7, sp, #4
-; THUMB-DISABLE-NEXT: sub.w r4, sp, #24
+; THUMB-DISABLE-NEXT: sub.w r4, sp, #16
; THUMB-DISABLE-NEXT: bfc r4, #0, #4
; THUMB-DISABLE-NEXT: mov sp, r4
; THUMB-DISABLE-NEXT: lsls r1, r2, #31
; THUMB-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
-; THUMB-DISABLE-NEXT: vstr d10, [r4, #16]
; THUMB-DISABLE-NEXT: beq LBB12_2
; THUMB-DISABLE-NEXT: @ %bb.1: @ %bb3
; THUMB-DISABLE-NEXT: ldr r1, [r7, #8]
-; THUMB-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00
-; THUMB-DISABLE-NEXT: mov r2, r3
-; THUMB-DISABLE-NEXT: vmov d9, r3, r1
; THUMB-DISABLE-NEXT: vmov s16, r0
; THUMB-DISABLE-NEXT: mov r0, r3
+; THUMB-DISABLE-NEXT: mov r2, r3
+; THUMB-DISABLE-NEXT: vmov d9, r3, r1
; THUMB-DISABLE-NEXT: mov r3, r1
-; THUMB-DISABLE-NEXT: vadd.f64 d10, d9, d16
; THUMB-DISABLE-NEXT: bl _pow
; THUMB-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00
-; THUMB-DISABLE-NEXT: vmov.f64 d17, d9
-; THUMB-DISABLE-NEXT: vmov d16, r0, r1
+; THUMB-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00
+; THUMB-DISABLE-NEXT: vmov.f64 d18, d9
; THUMB-DISABLE-NEXT: vcmp.f32 s16, s0
+; THUMB-DISABLE-NEXT: vadd.f64 d16, d9, d16
; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr
; THUMB-DISABLE-NEXT: it gt
-; THUMB-DISABLE-NEXT: vmovgt.f64 d17, d10
-; THUMB-DISABLE-NEXT: vcmp.f64 d17, d9
-; THUMB-DISABLE-NEXT: vadd.f64 d16, d16, d16
+; THUMB-DISABLE-NEXT: vmovgt.f64 d18, d16
+; THUMB-DISABLE-NEXT: vcmp.f64 d18, d9
+; THUMB-DISABLE-NEXT: vmov d17, r0, r1
; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr
+; THUMB-DISABLE-NEXT: vadd.f64 d17, d17, d17
; THUMB-DISABLE-NEXT: it ne
-; THUMB-DISABLE-NEXT: vmovne.f64 d9, d16
+; THUMB-DISABLE-NEXT: vmovne.f64 d9, d17
; THUMB-DISABLE-NEXT: vcvt.f32.f64 s0, d9
; THUMB-DISABLE-NEXT: b LBB12_3
; THUMB-DISABLE-NEXT: LBB12_2:
@@ -2156,9 +2149,8 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; THUMB-DISABLE-NEXT: LBB12_3: @ %bb13
; THUMB-DISABLE-NEXT: mov r4, sp
; THUMB-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128]
-; THUMB-DISABLE-NEXT: vmov r0, s0
-; THUMB-DISABLE-NEXT: vldr d10, [r4, #16]
; THUMB-DISABLE-NEXT: subs r4, r7, #4
+; THUMB-DISABLE-NEXT: vmov r0, s0
; THUMB-DISABLE-NEXT: mov sp, r4
; THUMB-DISABLE-NEXT: pop {r4, r7, pc}
; THUMB-DISABLE-NEXT: .p2align 2
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index ca9939c0f8c552..ab9e1dfd1cfb19 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -278,9 +278,12 @@ define i64 @test10(ptr %ptr, i64 %val) {
; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]]
; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]]
; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]]
-; CHECK: movge [[OUT_HI]], [[REG2]]
+; CHECK: mov [[CMP:[a-z0-9]+]], #0
+; CHECK: movwge [[CMP]], #1
+; CHECK: cmp [[CMP]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movge [[OUT_LO]], [[REG1]]
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK: cmp
; CHECK: bne
@@ -294,10 +297,12 @@ define i64 @test10(ptr %ptr, i64 %val) {
; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movge.w [[CMP]], #1
+; CHECK-THUMB: cmp.w [[CMP]], #0
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt ge
-; CHECK-THUMB: movge [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movge [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -318,9 +323,12 @@ define i64 @test11(ptr %ptr, i64 %val) {
; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]]
; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]]
; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]]
-; CHECK: movhs [[OUT_HI]], [[REG2]]
+; CHECK: mov [[CMP:[a-z0-9]+]], #0
+; CHECK: movwhs [[CMP]], #1
+; CHECK: cmp [[CMP]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movhs [[OUT_LO]], [[REG1]]
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK: cmp
; CHECK: bne
@@ -334,10 +342,12 @@ define i64 @test11(ptr %ptr, i64 %val) {
; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movhs.w [[CMP]], #1
+; CHECK-THUMB: cmp.w [[CMP]], #0
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt hs
-; CHECK-THUMB: movhs [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movhs [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -358,9 +368,12 @@ define i64 @test12(ptr %ptr, i64 %val) {
; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]]
; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]]
; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]]
-; CHECK: movlt [[OUT_HI]], [[REG2]]
+; CHECK: mov [[CMP:[a-z0-9]+]], #0
+; CHECK: movwlt [[CMP]], #1
+; CHECK: cmp [[CMP]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movlt [[OUT_LO]], [[REG1]]
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK: cmp
; CHECK: bne
@@ -374,10 +387,12 @@ define i64 @test12(ptr %ptr, i64 %val) {
; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movlt.w [[CMP]], #1
+; CHECK-THUMB: cmp.w [[CMP]], #0
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt lt
-; CHECK-THUMB: movlt [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movlt [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -398,9 +413,12 @@ define i64 @test13(ptr %ptr, i64 %val) {
; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]]
; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]]
; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]]
-; CHECK: movlo [[OUT_HI]], [[REG2]]
+; CHECK: mov [[CMP:[a-z0-9]+]], #0
+; CHECK: movwlo [[CMP]], #1
+; CHECK: cmp [[CMP]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
-; CHECK: movlo [[OUT_LO]], [[REG1]]
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK: cmp
; CHECK: bne
@@ -414,10 +432,12 @@ define i64 @test13(ptr %ptr, i64 %val) {
; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]]
; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]]
+; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0
+; CHECK-THUMB: movlo.w [[CMP]], #1
+; CHECK-THUMB: cmp.w [[CMP]], #0
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
-; CHECK-THUMB: itt lo
-; CHECK-THUMB: movlo [[OUT_HI]], [[REG2]]
-; CHECK-THUMB: movlo [[OUT_LO]], [[REG1]]
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
index d48b070aa862e3..0a467c2b70acf2 100644
--- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -672,9 +672,12 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind {
; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movge [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwge [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movge [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
@@ -782,9 +785,12 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind {
; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movlt [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwlt [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movlt [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
@@ -892,9 +898,12 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movhs [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwhs [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movhs [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
@@ -1002,9 +1011,12 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]]
; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]]
-; CHECK-ARM: movlo [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movwlo [[CMP:r[0-9]+|lr]], #1
+; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
-; CHECK-ARM: movlo [[MINLO]], [[OLD1]]
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
diff --git a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
index 8706728c4b8416..62711ee6834898 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
@@ -68,23 +68,27 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_usub_cond_i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r11, lr}
-; CHECK-NEXT: push {r4, r5, r11, lr}
-; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: dmb ish
; CHECK-NEXT: .LBB3_1: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrexd r0, r1, [r12]
-; CHECK-NEXT: subs r4, r0, r2
-; CHECK-NEXT: sbcs r5, r1, r3
-; CHECK-NEXT: movlo r5, r1
-; CHECK-NEXT: movlo r4, r0
-; CHECK-NEXT: strexd lr, r4, r5, [r12]
-; CHECK-NEXT: cmp lr, #0
+; CHECK-NEXT: ldrexd r4, r5, [r0]
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: subs r6, r4, r2
+; CHECK-NEXT: sbcs r7, r5, r3
+; CHECK-NEXT: movwhs r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r7, r5
+; CHECK-NEXT: moveq r6, r4
+; CHECK-NEXT: strexd r1, r6, r7, [r0]
+; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: dmb ish
-; CHECK-NEXT: pop {r4, r5, r11, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
ret i64 %result
}
@@ -160,7 +164,7 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: subs r6, r4, r2
; CHECK-NEXT: sbcs r7, r5, r3
; CHECK-NEXT: adc r1, r12, #0
-; CHECK-NEXT: teq r1, #1
+; CHECK-NEXT: eors r1, r1, #1
; CHECK-NEXT: movwne r7, #0
; CHECK-NEXT: movwne r6, #0
; CHECK-NEXT: strexd r1, r6, r7, [r0]
diff --git a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
index 433fb325a7349f..243ec4deecdb84 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
@@ -69,25 +69,29 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: dmb ish
; CHECK-NEXT: .LBB3_1: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrexd r0, r1, [r12]
-; CHECK-NEXT: adds r6, r0, #1
-; CHECK-NEXT: adc r7, r1, #0
-; CHECK-NEXT: subs r4, r0, r2
-; CHECK-NEXT: sbcs r4, r1, r3
-; CHECK-NEXT: movwhs r7, #0
-; CHECK-NEXT: movwhs r6, #0
-; CHECK-NEXT: strexd r4, r6, r7, [r12]
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: ldrexd r4, r5, [r0]
+; CHECK-NEXT: adds r6, r4, #1
+; CHECK-NEXT: adc r7, r5, #0
+; CHECK-NEXT: subs r1, r4, r2
+; CHECK-NEXT: sbcs r1, r5, r3
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwhs r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movwne r7, #0
+; CHECK-NEXT: movwne r6, #0
+; CHECK-NEXT: strexd r1, r6, r7, [r0]
+; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: dmb ish
-; CHECK-NEXT: pop {r4, r6, r7, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
ret i64 %result
}
@@ -98,8 +102,8 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: dmb ish
; CHECK-NEXT: .LBB4_1: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrexb r12, [r0]
; CHECK-NEXT: uxtb r3, r1
+; CHECK-NEXT: ldrexb r12, [r0]
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: subls r3, r12, #1
@@ -122,8 +126,8 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: dmb ish
; CHECK-NEXT: .LBB5_1: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrexh r12, [r0]
; CHECK-NEXT: uxth r3, r1
+; CHECK-NEXT: ldrexh r12, [r0]
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: subls r3, r12, #1
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index f633315822cc3d..a38ade7cdbf06b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -1422,8 +1422,8 @@ define i8 @test_max_i8() {
; CHECK-ARM8-NEXT: @ Child Loop BB7_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: sxtb r0, r1
-; CHECK-ARM8-NEXT: cmp r0, #1
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r0, #1
; CHECK-ARM8-NEXT: movgt r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8
@@ -1468,8 +1468,8 @@ define i8 @test_max_i8() {
; CHECK-ARM6-NEXT: @ Child Loop BB7_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: sxtb r0, r1
-; CHECK-ARM6-NEXT: cmp r0, #1
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r0, #1
; CHECK-ARM6-NEXT: movgt r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI7_0
; CHECK-ARM6-NEXT: uxtb r1, r1
@@ -1518,8 +1518,8 @@ define i8 @test_max_i8() {
; CHECK-THUMB7-NEXT: @ Child Loop BB7_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: sxtb r0, r1
-; CHECK-THUMB7-NEXT: cmp r0, #1
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r0, #1
; CHECK-THUMB7-NEXT: it gt
; CHECK-THUMB7-NEXT: movgt r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8
@@ -1643,8 +1643,8 @@ define i8 @test_min_i8() {
; CHECK-ARM8-NEXT: @ Child Loop BB8_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: sxtb r0, r1
-; CHECK-ARM8-NEXT: cmp r0, #2
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r0, #2
; CHECK-ARM8-NEXT: movlt r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8
@@ -1689,8 +1689,8 @@ define i8 @test_min_i8() {
; CHECK-ARM6-NEXT: @ Child Loop BB8_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: sxtb r0, r1
-; CHECK-ARM6-NEXT: cmp r0, #2
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r0, #2
; CHECK-ARM6-NEXT: movlt r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI8_0
; CHECK-ARM6-NEXT: uxtb r1, r1
@@ -1739,8 +1739,8 @@ define i8 @test_min_i8() {
; CHECK-THUMB7-NEXT: @ Child Loop BB8_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: sxtb r0, r1
-; CHECK-THUMB7-NEXT: cmp r0, #2
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r0, #2
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8
@@ -1866,8 +1866,8 @@ define i8 @test_umax_i8() {
; CHECK-ARM8-NEXT: @ Child Loop BB9_2 Depth 2
; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: uxtb r1, r12
-; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: mov lr, #1
+; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: movhi lr, r12
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8
@@ -1913,8 +1913,8 @@ define i8 @test_umax_i8() {
; CHECK-ARM6-NEXT: @ Child Loop BB9_2 Depth 2
; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: uxtb r1, r12
-; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: mov lr, #1
+; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: movhi lr, r12
; CHECK-ARM6-NEXT: ldr r3, .LCPI9_0
; CHECK-ARM6-NEXT: uxtb r12, r12
@@ -1964,8 +1964,8 @@ define i8 @test_umax_i8() {
; CHECK-THUMB7-NEXT: @ Child Loop BB9_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: uxtb r1, r4
-; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: it hi
; CHECK-THUMB7-NEXT: movhi r12, r4
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8
@@ -2091,8 +2091,8 @@ define i8 @test_umin_i8() {
; CHECK-ARM8-NEXT: @ Child Loop BB10_2 Depth 2
; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: uxtb r1, r12
-; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: mov lr, #1
+; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: movlo lr, r12
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8
@@ -2138,8 +2138,8 @@ define i8 @test_umin_i8() {
; CHECK-ARM6-NEXT: @ Child Loop BB10_2 Depth 2
; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: uxtb r1, r12
-; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: mov lr, #1
+; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: movlo lr, r12
; CHECK-ARM6-NEXT: ldr r3, .LCPI10_0
; CHECK-ARM6-NEXT: uxtb r12, r12
@@ -2189,8 +2189,8 @@ define i8 @test_umin_i8() {
; CHECK-THUMB7-NEXT: @ Child Loop BB10_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: uxtb r1, r4
-; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: it lo
; CHECK-THUMB7-NEXT: movlo r12, r4
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8
@@ -3709,8 +3709,8 @@ define i16 @test_max_i16() {
; CHECK-ARM8-NEXT: @ Child Loop BB18_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: sxth r0, r1
-; CHECK-ARM8-NEXT: cmp r0, #1
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r0, #1
; CHECK-ARM8-NEXT: movgt r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16
@@ -3755,8 +3755,8 @@ define i16 @test_max_i16() {
; CHECK-ARM6-NEXT: @ Child Loop BB18_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: sxth r0, r1
-; CHECK-ARM6-NEXT: cmp r0, #1
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r0, #1
; CHECK-ARM6-NEXT: movgt r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI18_0
; CHECK-ARM6-NEXT: uxth r1, r1
@@ -3805,8 +3805,8 @@ define i16 @test_max_i16() {
; CHECK-THUMB7-NEXT: @ Child Loop BB18_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: sxth r0, r1
-; CHECK-THUMB7-NEXT: cmp r0, #1
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r0, #1
; CHECK-THUMB7-NEXT: it gt
; CHECK-THUMB7-NEXT: movgt r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16
@@ -3930,8 +3930,8 @@ define i16 @test_min_i16() {
; CHECK-ARM8-NEXT: @ Child Loop BB19_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: sxth r0, r1
-; CHECK-ARM8-NEXT: cmp r0, #2
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r0, #2
; CHECK-ARM8-NEXT: movlt r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16
@@ -3976,8 +3976,8 @@ define i16 @test_min_i16() {
; CHECK-ARM6-NEXT: @ Child Loop BB19_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: sxth r0, r1
-; CHECK-ARM6-NEXT: cmp r0, #2
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r0, #2
; CHECK-ARM6-NEXT: movlt r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI19_0
; CHECK-ARM6-NEXT: uxth r1, r1
@@ -4026,8 +4026,8 @@ define i16 @test_min_i16() {
; CHECK-THUMB7-NEXT: @ Child Loop BB19_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: sxth r0, r1
-; CHECK-THUMB7-NEXT: cmp r0, #2
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r0, #2
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16
@@ -4153,8 +4153,8 @@ define i16 @test_umax_i16() {
; CHECK-ARM8-NEXT: @ Child Loop BB20_2 Depth 2
; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: uxth r1, r12
-; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: mov lr, #1
+; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: movhi lr, r12
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16
@@ -4200,8 +4200,8 @@ define i16 @test_umax_i16() {
; CHECK-ARM6-NEXT: @ Child Loop BB20_2 Depth 2
; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: uxth r1, r12
-; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: mov lr, #1
+; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: movhi lr, r12
; CHECK-ARM6-NEXT: ldr r3, .LCPI20_0
; CHECK-ARM6-NEXT: uxth r12, r12
@@ -4251,8 +4251,8 @@ define i16 @test_umax_i16() {
; CHECK-THUMB7-NEXT: @ Child Loop BB20_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: uxth r1, r4
-; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: it hi
; CHECK-THUMB7-NEXT: movhi r12, r4
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16
@@ -4378,8 +4378,8 @@ define i16 @test_umin_i16() {
; CHECK-ARM8-NEXT: @ Child Loop BB21_2 Depth 2
; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM8-NEXT: uxth r1, r12
-; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: mov lr, #1
+; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: movlo lr, r12
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16
@@ -4425,8 +4425,8 @@ define i16 @test_umin_i16() {
; CHECK-ARM6-NEXT: @ Child Loop BB21_2 Depth 2
; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
; CHECK-ARM6-NEXT: uxth r1, r12
-; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: mov lr, #1
+; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: movlo lr, r12
; CHECK-ARM6-NEXT: ldr r3, .LCPI21_0
; CHECK-ARM6-NEXT: uxth r12, r12
@@ -4476,8 +4476,8 @@ define i16 @test_umin_i16() {
; CHECK-THUMB7-NEXT: @ Child Loop BB21_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; CHECK-THUMB7-NEXT: uxth r1, r4
-; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: it lo
; CHECK-THUMB7-NEXT: movlo r12, r4
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16
@@ -5939,8 +5939,8 @@ define i32 @test_max_i32() {
; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM8-NEXT: @ Child Loop BB29_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: movgt r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32
@@ -5982,8 +5982,8 @@ define i32 @test_max_i32() {
; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM6-NEXT: @ Child Loop BB29_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: movgt r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI29_0
; CHECK-ARM6-NEXT: .LBB29_2: @ %atomicrmw.start
@@ -6029,8 +6029,8 @@ define i32 @test_max_i32() {
; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1
; CHECK-THUMB7-NEXT: @ Child Loop BB29_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: it gt
; CHECK-THUMB7-NEXT: movgt r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32
@@ -6148,8 +6148,8 @@ define i32 @test_min_i32() {
; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM8-NEXT: @ Child Loop BB30_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: movlt r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32
@@ -6191,8 +6191,8 @@ define i32 @test_min_i32() {
; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM6-NEXT: @ Child Loop BB30_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: movlt r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI30_0
; CHECK-ARM6-NEXT: .LBB30_2: @ %atomicrmw.start
@@ -6238,8 +6238,8 @@ define i32 @test_min_i32() {
; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1
; CHECK-THUMB7-NEXT: @ Child Loop BB30_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32
@@ -6357,8 +6357,8 @@ define i32 @test_umax_i32() {
; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM8-NEXT: @ Child Loop BB31_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r1, #1
; CHECK-ARM8-NEXT: movhi r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32
@@ -6400,8 +6400,8 @@ define i32 @test_umax_i32() {
; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM6-NEXT: @ Child Loop BB31_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r1, #1
; CHECK-ARM6-NEXT: movhi r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI31_0
; CHECK-ARM6-NEXT: .LBB31_2: @ %atomicrmw.start
@@ -6447,8 +6447,8 @@ define i32 @test_umax_i32() {
; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1
; CHECK-THUMB7-NEXT: @ Child Loop BB31_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #1
; CHECK-THUMB7-NEXT: it hi
; CHECK-THUMB7-NEXT: movhi r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32
@@ -6566,8 +6566,8 @@ define i32 @test_umin_i32() {
; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM8-NEXT: @ Child Loop BB32_2 Depth 2
; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: mov r12, #1
+; CHECK-ARM8-NEXT: cmp r1, #2
; CHECK-ARM8-NEXT: movlo r12, r1
; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32
; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32
@@ -6609,8 +6609,8 @@ define i32 @test_umin_i32() {
; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1
; CHECK-ARM6-NEXT: @ Child Loop BB32_2 Depth 2
; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: mov r12, #1
+; CHECK-ARM6-NEXT: cmp r1, #2
; CHECK-ARM6-NEXT: movlo r12, r1
; CHECK-ARM6-NEXT: ldr r3, .LCPI32_0
; CHECK-ARM6-NEXT: .LBB32_2: @ %atomicrmw.start
@@ -6656,8 +6656,8 @@ define i32 @test_umin_i32() {
; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1
; CHECK-THUMB7-NEXT: @ Child Loop BB32_2 Depth 2
; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: mov.w r12, #1
+; CHECK-THUMB7-NEXT: cmp r1, #2
; CHECK-THUMB7-NEXT: it lo
; CHECK-THUMB7-NEXT: movlo r12, r1
; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32
@@ -8342,7 +8342,8 @@ define i64 @test_max_i64() {
; CHECK-ARM8-NEXT: mov r0, #0
; CHECK-ARM8-NEXT: movwlt r0, #1
; CHECK-ARM8-NEXT: mov r10, #1
-; CHECK-ARM8-NEXT: movlt r10, r2
+; CHECK-ARM8-NEXT: cmp r0, #0
+; CHECK-ARM8-NEXT: movne r10, r2
; CHECK-ARM8-NEXT: cmp r0, #0
; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -8409,7 +8410,8 @@ define i64 @test_max_i64() {
; CHECK-ARM6-NEXT: mov r0, #0
; CHECK-ARM6-NEXT: movlt r0, #1
; CHECK-ARM6-NEXT: mov r10, #1
-; CHECK-ARM6-NEXT: movlt r10, r2
+; CHECK-ARM6-NEXT: cmp r0, #0
+; CHECK-ARM6-NEXT: movne r10, r2
; CHECK-ARM6-NEXT: cmp r0, #0
; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -8481,8 +8483,9 @@ define i64 @test_max_i64() {
; CHECK-THUMB7-NEXT: mov r8, r2
; CHECK-THUMB7-NEXT: mov r9, r1
; CHECK-THUMB7-NEXT: mov.w r10, #1
-; CHECK-THUMB7-NEXT: it lt
-; CHECK-THUMB7-NEXT: movlt r10, r2
+; CHECK-THUMB7-NEXT: cmp r0, #0
+; CHECK-THUMB7-NEXT: it ne
+; CHECK-THUMB7-NEXT: movne r10, r2
; CHECK-THUMB7-NEXT: cmp r0, #0
; CHECK-THUMB7-NEXT: it ne
; CHECK-THUMB7-NEXT: movne r0, r1
@@ -8578,7 +8581,7 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blt .LBB40_5
+; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB40_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
@@ -8655,7 +8658,8 @@ define i64 @test_min_i64() {
; CHECK-ARM8-NEXT: mov r0, #0
; CHECK-ARM8-NEXT: movwlt r0, #1
; CHECK-ARM8-NEXT: mov r10, #1
-; CHECK-ARM8-NEXT: movlt r10, r2
+; CHECK-ARM8-NEXT: cmp r0, #0
+; CHECK-ARM8-NEXT: movne r10, r2
; CHECK-ARM8-NEXT: cmp r0, #0
; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -8722,7 +8726,8 @@ define i64 @test_min_i64() {
; CHECK-ARM6-NEXT: mov r0, #0
; CHECK-ARM6-NEXT: movlt r0, #1
; CHECK-ARM6-NEXT: mov r10, #1
-; CHECK-ARM6-NEXT: movlt r10, r2
+; CHECK-ARM6-NEXT: cmp r0, #0
+; CHECK-ARM6-NEXT: movne r10, r2
; CHECK-ARM6-NEXT: cmp r0, #0
; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -8794,8 +8799,9 @@ define i64 @test_min_i64() {
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r0, #1
; CHECK-THUMB7-NEXT: mov.w r10, #1
-; CHECK-THUMB7-NEXT: it lt
-; CHECK-THUMB7-NEXT: movlt r10, r2
+; CHECK-THUMB7-NEXT: cmp r0, #0
+; CHECK-THUMB7-NEXT: it ne
+; CHECK-THUMB7-NEXT: movne r10, r2
; CHECK-THUMB7-NEXT: cmp r0, #0
; CHECK-THUMB7-NEXT: it ne
; CHECK-THUMB7-NEXT: movne r0, r1
@@ -8891,7 +8897,7 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blt .LBB41_5
+; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB41_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
@@ -8968,7 +8974,8 @@ define i64 @test_umax_i64() {
; CHECK-ARM8-NEXT: mov r0, #0
; CHECK-ARM8-NEXT: movwlo r0, #1
; CHECK-ARM8-NEXT: mov r10, #1
-; CHECK-ARM8-NEXT: movlo r10, r2
+; CHECK-ARM8-NEXT: cmp r0, #0
+; CHECK-ARM8-NEXT: movne r10, r2
; CHECK-ARM8-NEXT: cmp r0, #0
; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -9035,7 +9042,8 @@ define i64 @test_umax_i64() {
; CHECK-ARM6-NEXT: mov r0, #0
; CHECK-ARM6-NEXT: movlo r0, #1
; CHECK-ARM6-NEXT: mov r10, #1
-; CHECK-ARM6-NEXT: movlo r10, r2
+; CHECK-ARM6-NEXT: cmp r0, #0
+; CHECK-ARM6-NEXT: movne r10, r2
; CHECK-ARM6-NEXT: cmp r0, #0
; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -9107,8 +9115,9 @@ define i64 @test_umax_i64() {
; CHECK-THUMB7-NEXT: mov r8, r2
; CHECK-THUMB7-NEXT: mov r9, r1
; CHECK-THUMB7-NEXT: mov.w r10, #1
-; CHECK-THUMB7-NEXT: it lo
-; CHECK-THUMB7-NEXT: movlo r10, r2
+; CHECK-THUMB7-NEXT: cmp r0, #0
+; CHECK-THUMB7-NEXT: it ne
+; CHECK-THUMB7-NEXT: movne r10, r2
; CHECK-THUMB7-NEXT: cmp r0, #0
; CHECK-THUMB7-NEXT: it ne
; CHECK-THUMB7-NEXT: movne r0, r1
@@ -9204,7 +9213,7 @@ define i64 @test_umax_i64() {
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blo .LBB42_5
+; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB42_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
@@ -9281,7 +9290,8 @@ define i64 @test_umin_i64() {
; CHECK-ARM8-NEXT: mov r0, #0
; CHECK-ARM8-NEXT: movwlo r0, #1
; CHECK-ARM8-NEXT: mov r10, #1
-; CHECK-ARM8-NEXT: movlo r10, r2
+; CHECK-ARM8-NEXT: cmp r0, #0
+; CHECK-ARM8-NEXT: movne r10, r2
; CHECK-ARM8-NEXT: cmp r0, #0
; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -9348,7 +9358,8 @@ define i64 @test_umin_i64() {
; CHECK-ARM6-NEXT: mov r0, #0
; CHECK-ARM6-NEXT: movlo r0, #1
; CHECK-ARM6-NEXT: mov r10, #1
-; CHECK-ARM6-NEXT: movlo r10, r2
+; CHECK-ARM6-NEXT: cmp r0, #0
+; CHECK-ARM6-NEXT: movne r10, r2
; CHECK-ARM6-NEXT: cmp r0, #0
; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
@@ -9420,8 +9431,9 @@ define i64 @test_umin_i64() {
; CHECK-THUMB7-NEXT: it lo
; CHECK-THUMB7-NEXT: movlo r0, #1
; CHECK-THUMB7-NEXT: mov.w r10, #1
-; CHECK-THUMB7-NEXT: it lo
-; CHECK-THUMB7-NEXT: movlo r10, r2
+; CHECK-THUMB7-NEXT: cmp r0, #0
+; CHECK-THUMB7-NEXT: it ne
+; CHECK-THUMB7-NEXT: movne r10, r2
; CHECK-THUMB7-NEXT: cmp r0, #0
; CHECK-THUMB7-NEXT: it ne
; CHECK-THUMB7-NEXT: movne r0, r1
@@ -9517,7 +9529,7 @@ define i64 @test_umin_i64() {
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blo .LBB43_5
+; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB43_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll
index 5aeb99695a5fe8..91a74e535a2218 100644
--- a/llvm/test/CodeGen/ARM/bfi.ll
+++ b/llvm/test/CodeGen/ARM/bfi.ll
@@ -204,11 +204,10 @@ define i32 @f12(i32 %x, i32 %y) {
define i32 @f13(i32 %x, i32 %y) {
; CHECK-LABEL: f13:
; CHECK: @ %bb.0:
-; CHECK-NEXT: and r0, r0, #4
-; CHECK-NEXT: bic r1, r1, #255
-; CHECK-NEXT: cmp r0, #42
-; CHECK-NEXT: orrne r1, r1, #16
-; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: and r2, r0, #4
+; CHECK-NEXT: bic r0, r1, #255
+; CHECK-NEXT: cmp r2, #42
+; CHECK-NEXT: orrne r0, r0, #16
; CHECK-NEXT: bx lr
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
%and = and i32 %x, 4
diff --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll
index fb8da3724ede8f..805955d3e83069 100644
--- a/llvm/test/CodeGen/ARM/cmov_fp16.ll
+++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll
@@ -5,12 +5,12 @@
define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_ne:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s0, r1
; CHECK-NEXT: cmp r2, r3
-; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcvt.f16.u32 s0, s0
; CHECK-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-NEXT: vseleq.f16 s0, s2, s0
+; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -26,12 +26,12 @@ entry:
define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_eq:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: cmp r2, r3
-; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vcvt.f16.u32 s0, s0
; CHECK-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-NEXT: vseleq.f16 s0, s2, s0
+; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -47,12 +47,12 @@ entry:
define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_gt:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: cmp r2, r3
-; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vcvt.f16.u32 s0, s0
; CHECK-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -68,12 +68,12 @@ entry:
define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_ge:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: cmp r2, r3
-; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vcvt.f16.u32 s0, s0
; CHECK-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-NEXT: vselge.f16 s0, s2, s0
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -89,12 +89,12 @@ entry:
define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_lt:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s0, r1
; CHECK-NEXT: cmp r2, r3
-; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcvt.f16.u32 s0, s0
; CHECK-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-NEXT: vselge.f16 s0, s2, s0
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -110,12 +110,12 @@ entry:
define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_le:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov s0, r1
; CHECK-NEXT: cmp r2, r3
-; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcvt.f16.u32 s0, s0
; CHECK-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -131,25 +131,25 @@ entry:
define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_hi:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r1
+; CHECK-THUMB-NEXT: vmov s0, r1
; CHECK-THUMB-NEXT: cmp r2, r3
-; CHECK-THUMB-NEXT: vmov s0, r0
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it hi
-; CHECK-THUMB-NEXT: vmovhi.f32 s2, s0
-; CHECK-THUMB-NEXT: vmov.f16 r0, s2
+; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ARM-LABEL: test_hi:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s0, r0
+; CHECK-ARM-NEXT: vmov s0, r1
; CHECK-ARM-NEXT: cmp r2, r3
-; CHECK-ARM-NEXT: vmov s2, r1
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT: vmovhi.f32 s2, s0
-; CHECK-ARM-NEXT: vmov.f16 r0, s2
+; CHECK-ARM-NEXT: vmovhi.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
entry:
%x.half = uitofp i32 %x to half
@@ -164,25 +164,25 @@ entry:
define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_hs:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r1
+; CHECK-THUMB-NEXT: vmov s0, r1
; CHECK-THUMB-NEXT: cmp r2, r3
-; CHECK-THUMB-NEXT: vmov s0, r0
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it hs
-; CHECK-THUMB-NEXT: vmovhs.f32 s2, s0
-; CHECK-THUMB-NEXT: vmov.f16 r0, s2
+; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ARM-LABEL: test_hs:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s0, r0
+; CHECK-ARM-NEXT: vmov s0, r1
; CHECK-ARM-NEXT: cmp r2, r3
-; CHECK-ARM-NEXT: vmov s2, r1
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT: vmovhs.f32 s2, s0
-; CHECK-ARM-NEXT: vmov.f16 r0, s2
+; CHECK-ARM-NEXT: vmovhs.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
entry:
%x.half = uitofp i32 %x to half
@@ -197,25 +197,25 @@ entry:
define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_lo:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r1
+; CHECK-THUMB-NEXT: vmov s0, r1
; CHECK-THUMB-NEXT: cmp r2, r3
-; CHECK-THUMB-NEXT: vmov s0, r0
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it lo
-; CHECK-THUMB-NEXT: vmovlo.f32 s2, s0
-; CHECK-THUMB-NEXT: vmov.f16 r0, s2
+; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ARM-LABEL: test_lo:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s0, r0
+; CHECK-ARM-NEXT: vmov s0, r1
; CHECK-ARM-NEXT: cmp r2, r3
-; CHECK-ARM-NEXT: vmov s2, r1
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT: vmovlo.f32 s2, s0
-; CHECK-ARM-NEXT: vmov.f16 r0, s2
+; CHECK-ARM-NEXT: vmovlo.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
entry:
%x.half = uitofp i32 %x to half
@@ -230,25 +230,25 @@ entry:
define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_ls:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r1
+; CHECK-THUMB-NEXT: vmov s0, r1
; CHECK-THUMB-NEXT: cmp r2, r3
-; CHECK-THUMB-NEXT: vmov s0, r0
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it ls
-; CHECK-THUMB-NEXT: vmovls.f32 s2, s0
-; CHECK-THUMB-NEXT: vmov.f16 r0, s2
+; CHECK-THUMB-NEXT: vmovls.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ARM-LABEL: test_ls:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s0, r0
+; CHECK-ARM-NEXT: vmov s0, r1
; CHECK-ARM-NEXT: cmp r2, r3
-; CHECK-ARM-NEXT: vmov s2, r1
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
-; CHECK-ARM-NEXT: vmovls.f32 s2, s0
-; CHECK-ARM-NEXT: vmov.f16 r0, s2
+; CHECK-ARM-NEXT: vmovls.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
entry:
%x.half = uitofp i32 %x to half
diff --git a/llvm/test/CodeGen/ARM/cse-call.ll b/llvm/test/CodeGen/ARM/cse-call.ll
index 25fa477e5c2d38..71cfa3b9da9302 100644
--- a/llvm/test/CodeGen/ARM/cse-call.ll
+++ b/llvm/test/CodeGen/ARM/cse-call.ll
@@ -25,9 +25,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
; CHECK-T1: cmp
; CHECK: S_trimzeros
-; CHECK-T1: S_trimzeros
-; CHECK-NOT: moveq
-; CHECK-T1-NOT: beq
+; CHECK: cmp
; CHECK: strlen
@F_floatmul.man1 = external global [200 x i8], align 1
diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 76adc61c5971fd..d9663a1c148fc5 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -223,39 +223,42 @@ define i64 @test_i64(i64 %a) {
; CHECK-6M: @ %bb.0:
; CHECK-6M-NEXT: .save {r4, r5, r7, lr}
; CHECK-6M-NEXT: push {r4, r5, r7, lr}
-; CHECK-6M-NEXT: mov r2, r0
; CHECK-6M-NEXT: ldr r5, .LCPI3_0
-; CHECK-6M-NEXT: adr r3, .LCPI3_1
-; CHECK-6M-NEXT: movs r0, #32
-; CHECK-6M-NEXT: cmp r1, #0
-; CHECK-6M-NEXT: mov r4, r0
-; CHECK-6M-NEXT: beq .LBB3_2
+; CHECK-6M-NEXT: adr r4, .LCPI3_1
+; CHECK-6M-NEXT: movs r3, #32
+; CHECK-6M-NEXT: cmp r0, #0
+; CHECK-6M-NEXT: mov r2, r3
+; CHECK-6M-NEXT: bne .LBB3_5
; CHECK-6M-NEXT: @ %bb.1:
-; CHECK-6M-NEXT: rsbs r4, r1, #0
-; CHECK-6M-NEXT: ands r4, r1
-; CHECK-6M-NEXT: muls r4, r5, r4
-; CHECK-6M-NEXT: lsrs r1, r4, #27
-; CHECK-6M-NEXT: ldrb r4, [r3, r1]
+; CHECK-6M-NEXT: cmp r1, #0
+; CHECK-6M-NEXT: bne .LBB3_6
; CHECK-6M-NEXT: .LBB3_2:
-; CHECK-6M-NEXT: adds r4, #32
-; CHECK-6M-NEXT: rsbs r1, r2, #0
-; CHECK-6M-NEXT: ands r1, r2
-; CHECK-6M-NEXT: muls r5, r1, r5
-; CHECK-6M-NEXT: lsrs r1, r5, #27
-; CHECK-6M-NEXT: cmp r2, #0
-; CHECK-6M-NEXT: bne .LBB3_5
-; CHECK-6M-NEXT: @ %bb.3:
-; CHECK-6M-NEXT: beq .LBB3_6
+; CHECK-6M-NEXT: cmp r0, #0
+; CHECK-6M-NEXT: bne .LBB3_4
+; CHECK-6M-NEXT: .LBB3_3:
+; CHECK-6M-NEXT: adds r3, #32
+; CHECK-6M-NEXT: mov r2, r3
; CHECK-6M-NEXT: .LBB3_4:
; CHECK-6M-NEXT: movs r1, #0
+; CHECK-6M-NEXT: mov r0, r2
; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
; CHECK-6M-NEXT: .LBB3_5:
-; CHECK-6M-NEXT: ldrb r0, [r3, r1]
-; CHECK-6M-NEXT: bne .LBB3_4
+; CHECK-6M-NEXT: rsbs r2, r0, #0
+; CHECK-6M-NEXT: ands r2, r0
+; CHECK-6M-NEXT: muls r2, r5, r2
+; CHECK-6M-NEXT: lsrs r2, r2, #27
+; CHECK-6M-NEXT: ldrb r2, [r4, r2]
+; CHECK-6M-NEXT: cmp r1, #0
+; CHECK-6M-NEXT: beq .LBB3_2
; CHECK-6M-NEXT: .LBB3_6:
-; CHECK-6M-NEXT: mov r0, r4
-; CHECK-6M-NEXT: movs r1, #0
-; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT: rsbs r3, r1, #0
+; CHECK-6M-NEXT: ands r3, r1
+; CHECK-6M-NEXT: muls r5, r3, r5
+; CHECK-6M-NEXT: lsrs r1, r5, #27
+; CHECK-6M-NEXT: ldrb r3, [r4, r1]
+; CHECK-6M-NEXT: cmp r0, #0
+; CHECK-6M-NEXT: beq .LBB3_3
+; CHECK-6M-NEXT: b .LBB3_4
; CHECK-6M-NEXT: .p2align 2
; CHECK-6M-NEXT: @ %bb.7:
; CHECK-6M-NEXT: .LCPI3_0:
@@ -267,39 +270,40 @@ define i64 @test_i64(i64 %a) {
; CHECK-8MBASE: @ %bb.0:
; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr}
; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT: mov r2, r0
; CHECK-8MBASE-NEXT: movw r5, #46385
; CHECK-8MBASE-NEXT: movt r5, #1916
-; CHECK-8MBASE-NEXT: adr r3, .LCPI3_0
-; CHECK-8MBASE-NEXT: movs r0, #32
-; CHECK-8MBASE-NEXT: mov r4, r0
-; CHECK-8MBASE-NEXT: cbz r1, .LBB3_2
+; CHECK-8MBASE-NEXT: adr r4, .LCPI3_0
+; CHECK-8MBASE-NEXT: movs r3, #32
+; CHECK-8MBASE-NEXT: mov r2, r3
+; CHECK-8MBASE-NEXT: cbnz r0, .LBB3_5
; CHECK-8MBASE-NEXT: @ %bb.1:
-; CHECK-8MBASE-NEXT: rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT: ands r4, r1
-; CHECK-8MBASE-NEXT: muls r4, r5, r4
-; CHECK-8MBASE-NEXT: lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1]
+; CHECK-8MBASE-NEXT: cbnz r1, .LBB3_6
; CHECK-8MBASE-NEXT: .LBB3_2:
-; CHECK-8MBASE-NEXT: adds r4, #32
-; CHECK-8MBASE-NEXT: rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT: ands r1, r2
-; CHECK-8MBASE-NEXT: muls r5, r1, r5
-; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
-; CHECK-8MBASE-NEXT: cmp r2, #0
-; CHECK-8MBASE-NEXT: bne .LBB3_5
-; CHECK-8MBASE-NEXT: @ %bb.3:
-; CHECK-8MBASE-NEXT: beq .LBB3_6
+; CHECK-8MBASE-NEXT: cbnz r0, .LBB3_4
+; CHECK-8MBASE-NEXT: .LBB3_3:
+; CHECK-8MBASE-NEXT: adds r3, #32
+; CHECK-8MBASE-NEXT: mov r2, r3
; CHECK-8MBASE-NEXT: .LBB3_4:
; CHECK-8MBASE-NEXT: movs r1, #0
+; CHECK-8MBASE-NEXT: mov r0, r2
; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
; CHECK-8MBASE-NEXT: .LBB3_5:
-; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT: bne .LBB3_4
+; CHECK-8MBASE-NEXT: rsbs r2, r0, #0
+; CHECK-8MBASE-NEXT: ands r2, r0
+; CHECK-8MBASE-NEXT: muls r2, r5, r2
+; CHECK-8MBASE-NEXT: lsrs r2, r2, #27
+; CHECK-8MBASE-NEXT: ldrb r2, [r4, r2]
+; CHECK-8MBASE-NEXT: cmp r1, #0
+; CHECK-8MBASE-NEXT: beq .LBB3_2
; CHECK-8MBASE-NEXT: .LBB3_6:
-; CHECK-8MBASE-NEXT: mov r0, r4
-; CHECK-8MBASE-NEXT: movs r1, #0
-; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT: rsbs r3, r1, #0
+; CHECK-8MBASE-NEXT: ands r3, r1
+; CHECK-8MBASE-NEXT: muls r5, r3, r5
+; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT: ldrb r3, [r4, r1]
+; CHECK-8MBASE-NEXT: cmp r0, #0
+; CHECK-8MBASE-NEXT: beq .LBB3_3
+; CHECK-8MBASE-NEXT: b .LBB3_4
; CHECK-8MBASE-NEXT: .p2align 2
; CHECK-8MBASE-NEXT: @ %bb.7:
; CHECK-8MBASE-NEXT: .LCPI3_0:
@@ -490,39 +494,42 @@ define i64 @test_i64_zero_undef(i64 %a) {
; CHECK-6M: @ %bb.0:
; CHECK-6M-NEXT: .save {r4, r5, r7, lr}
; CHECK-6M-NEXT: push {r4, r5, r7, lr}
-; CHECK-6M-NEXT: mov r2, r0
; CHECK-6M-NEXT: ldr r5, .LCPI7_0
-; CHECK-6M-NEXT: adr r3, .LCPI7_1
-; CHECK-6M-NEXT: movs r0, #32
-; CHECK-6M-NEXT: cmp r1, #0
-; CHECK-6M-NEXT: mov r4, r0
-; CHECK-6M-NEXT: beq .LBB7_2
+; CHECK-6M-NEXT: adr r4, .LCPI7_1
+; CHECK-6M-NEXT: movs r3, #32
+; CHECK-6M-NEXT: cmp r0, #0
+; CHECK-6M-NEXT: mov r2, r3
+; CHECK-6M-NEXT: bne .LBB7_5
; CHECK-6M-NEXT: @ %bb.1:
-; CHECK-6M-NEXT: rsbs r4, r1, #0
-; CHECK-6M-NEXT: ands r4, r1
-; CHECK-6M-NEXT: muls r4, r5, r4
-; CHECK-6M-NEXT: lsrs r1, r4, #27
-; CHECK-6M-NEXT: ldrb r4, [r3, r1]
+; CHECK-6M-NEXT: cmp r1, #0
+; CHECK-6M-NEXT: bne .LBB7_6
; CHECK-6M-NEXT: .LBB7_2:
-; CHECK-6M-NEXT: adds r4, #32
-; CHECK-6M-NEXT: rsbs r1, r2, #0
-; CHECK-6M-NEXT: ands r1, r2
-; CHECK-6M-NEXT: muls r5, r1, r5
-; CHECK-6M-NEXT: lsrs r1, r5, #27
-; CHECK-6M-NEXT: cmp r2, #0
-; CHECK-6M-NEXT: bne .LBB7_5
-; CHECK-6M-NEXT: @ %bb.3:
-; CHECK-6M-NEXT: beq .LBB7_6
+; CHECK-6M-NEXT: cmp r0, #0
+; CHECK-6M-NEXT: bne .LBB7_4
+; CHECK-6M-NEXT: .LBB7_3:
+; CHECK-6M-NEXT: adds r3, #32
+; CHECK-6M-NEXT: mov r2, r3
; CHECK-6M-NEXT: .LBB7_4:
; CHECK-6M-NEXT: movs r1, #0
+; CHECK-6M-NEXT: mov r0, r2
; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
; CHECK-6M-NEXT: .LBB7_5:
-; CHECK-6M-NEXT: ldrb r0, [r3, r1]
-; CHECK-6M-NEXT: bne .LBB7_4
+; CHECK-6M-NEXT: rsbs r2, r0, #0
+; CHECK-6M-NEXT: ands r2, r0
+; CHECK-6M-NEXT: muls r2, r5, r2
+; CHECK-6M-NEXT: lsrs r2, r2, #27
+; CHECK-6M-NEXT: ldrb r2, [r4, r2]
+; CHECK-6M-NEXT: cmp r1, #0
+; CHECK-6M-NEXT: beq .LBB7_2
; CHECK-6M-NEXT: .LBB7_6:
-; CHECK-6M-NEXT: mov r0, r4
-; CHECK-6M-NEXT: movs r1, #0
-; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT: rsbs r3, r1, #0
+; CHECK-6M-NEXT: ands r3, r1
+; CHECK-6M-NEXT: muls r5, r3, r5
+; CHECK-6M-NEXT: lsrs r1, r5, #27
+; CHECK-6M-NEXT: ldrb r3, [r4, r1]
+; CHECK-6M-NEXT: cmp r0, #0
+; CHECK-6M-NEXT: beq .LBB7_3
+; CHECK-6M-NEXT: b .LBB7_4
; CHECK-6M-NEXT: .p2align 2
; CHECK-6M-NEXT: @ %bb.7:
; CHECK-6M-NEXT: .LCPI7_0:
@@ -534,39 +541,40 @@ define i64 @test_i64_zero_undef(i64 %a) {
; CHECK-8MBASE: @ %bb.0:
; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr}
; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT: mov r2, r0
; CHECK-8MBASE-NEXT: movw r5, #46385
; CHECK-8MBASE-NEXT: movt r5, #1916
-; CHECK-8MBASE-NEXT: adr r3, .LCPI7_0
-; CHECK-8MBASE-NEXT: movs r0, #32
-; CHECK-8MBASE-NEXT: mov r4, r0
-; CHECK-8MBASE-NEXT: cbz r1, .LBB7_2
+; CHECK-8MBASE-NEXT: adr r4, .LCPI7_0
+; CHECK-8MBASE-NEXT: movs r3, #32
+; CHECK-8MBASE-NEXT: mov r2, r3
+; CHECK-8MBASE-NEXT: cbnz r0, .LBB7_5
; CHECK-8MBASE-NEXT: @ %bb.1:
-; CHECK-8MBASE-NEXT: rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT: ands r4, r1
-; CHECK-8MBASE-NEXT: muls r4, r5, r4
-; CHECK-8MBASE-NEXT: lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1]
+; CHECK-8MBASE-NEXT: cbnz r1, .LBB7_6
; CHECK-8MBASE-NEXT: .LBB7_2:
-; CHECK-8MBASE-NEXT: adds r4, #32
-; CHECK-8MBASE-NEXT: rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT: ands r1, r2
-; CHECK-8MBASE-NEXT: muls r5, r1, r5
-; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
-; CHECK-8MBASE-NEXT: cmp r2, #0
-; CHECK-8MBASE-NEXT: bne .LBB7_5
-; CHECK-8MBASE-NEXT: @ %bb.3:
-; CHECK-8MBASE-NEXT: beq .LBB7_6
+; CHECK-8MBASE-NEXT: cbnz r0, .LBB7_4
+; CHECK-8MBASE-NEXT: .LBB7_3:
+; CHECK-8MBASE-NEXT: adds r3, #32
+; CHECK-8MBASE-NEXT: mov r2, r3
; CHECK-8MBASE-NEXT: .LBB7_4:
; CHECK-8MBASE-NEXT: movs r1, #0
+; CHECK-8MBASE-NEXT: mov r0, r2
; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
; CHECK-8MBASE-NEXT: .LBB7_5:
-; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT: bne .LBB7_4
+; CHECK-8MBASE-NEXT: rsbs r2, r0, #0
+; CHECK-8MBASE-NEXT: ands r2, r0
+; CHECK-8MBASE-NEXT: muls r2, r5, r2
+; CHECK-8MBASE-NEXT: lsrs r2, r2, #27
+; CHECK-8MBASE-NEXT: ldrb r2, [r4, r2]
+; CHECK-8MBASE-NEXT: cmp r1, #0
+; CHECK-8MBASE-NEXT: beq .LBB7_2
; CHECK-8MBASE-NEXT: .LBB7_6:
-; CHECK-8MBASE-NEXT: mov r0, r4
-; CHECK-8MBASE-NEXT: movs r1, #0
-; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT: rsbs r3, r1, #0
+; CHECK-8MBASE-NEXT: ands r3, r1
+; CHECK-8MBASE-NEXT: muls r5, r3, r5
+; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT: ldrb r3, [r4, r1]
+; CHECK-8MBASE-NEXT: cmp r0, #0
+; CHECK-8MBASE-NEXT: beq .LBB7_3
+; CHECK-8MBASE-NEXT: b .LBB7_4
; CHECK-8MBASE-NEXT: .p2align 2
; CHECK-8MBASE-NEXT: @ %bb.7:
; CHECK-8MBASE-NEXT: .LCPI7_0:
diff --git a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
index 90bb02fdc0cd4b..3f2b40460917e4 100644
--- a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
+++ b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
@@ -4,12 +4,12 @@
define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
; CHECK-LABEL: fadd_select_fneg_fneg_f32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov s0, r2
+; CHECK-NEXT: vmov s0, r3
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov s2, r2
; CHECK-NEXT: vmov s4, r1
-; CHECK-NEXT: vmov s2, r3
-; CHECK-NEXT: vseleq.f32 s0, s4, s0
-; CHECK-NEXT: vsub.f32 s0, s2, s0
+; CHECK-NEXT: vseleq.f32 s2, s4, s2
+; CHECK-NEXT: vsub.f32 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%cmp = icmp eq i32 %arg0, 0
@@ -248,10 +248,10 @@ define half @fadd_select_fsub_select_f16(i32 %arg0, half %x, half %y, half %z) {
define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) {
; CHECK-LABEL: fadd_select_fneg_negk_f16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f16 s0, #4.000000e+00
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, #4.000000e+00
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vseleq.f16 s0, s2, s0
+; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 s2, r2
; CHECK-NEXT: vsub.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
@@ -266,10 +266,10 @@ define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) {
define half @fadd_select_fneg_posk_f16(i32 %arg0, half %x, half %y) {
; CHECK-LABEL: fadd_select_fneg_posk_f16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f16 s0, #-4.000000e+00
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, #-4.000000e+00
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vseleq.f16 s0, s2, s0
+; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 s2, r2
; CHECK-NEXT: vsub.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
diff --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll
index ad39cb744620de..908dbd7a11a6b6 100644
--- a/llvm/test/CodeGen/ARM/fcmp-xo.ll
+++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll
@@ -69,12 +69,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr {
define arm_aapcs_vfpcc double @double1(double %a0) local_unnamed_addr {
; CHECK-LABEL: double1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f64 d16, #1.000000e+00
-; CHECK-NEXT: vcmp.f64 d16, d0
+; CHECK-NEXT: vmov.f64 d18, #1.000000e+00
+; CHECK-NEXT: vcmp.f64 d18, d0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov.f64 d17, #5.000000e-01
-; CHECK-NEXT: vmov.f64 d18, #-5.000000e-01
-; CHECK-NEXT: vselgt.f64 d0, d18, d17
+; CHECK-NEXT: vmov.f64 d16, #5.000000e-01
+; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01
+; CHECK-NEXT: vselgt.f64 d0, d17, d16
; CHECK-NEXT: bx lr
%1 = fcmp nsz olt double %a0, 1.000000e+00
%2 = select i1 %1, double -5.000000e-01, double 5.000000e-01
@@ -87,12 +87,12 @@ define arm_aapcs_vfpcc double @double128(double %a0) local_unnamed_addr {
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: movt r0, #16480
-; CHECK-NEXT: vmov.f64 d17, #5.000000e-01
-; CHECK-NEXT: vmov d16, r1, r0
-; CHECK-NEXT: vcmp.f64 d16, d0
+; CHECK-NEXT: vmov.f64 d16, #5.000000e-01
+; CHECK-NEXT: vmov d18, r1, r0
+; CHECK-NEXT: vcmp.f64 d18, d0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov.f64 d18, #-5.000000e-01
-; CHECK-NEXT: vselgt.f64 d0, d18, d17
+; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01
+; CHECK-NEXT: vselgt.f64 d0, d17, d16
; CHECK-NEXT: bx lr
%1 = fcmp nsz olt double %a0, 128.000000e+00
%2 = select i1 %1, double -5.000000e-01, double 5.000000e-01
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 478b98dfac80f3..8bd8aa7b34dec2 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -8,24 +8,27 @@
define i32 @stest_f64i32(double %x) {
; SOFT-LABEL: stest_f64i32:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __aeabi_d2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: ldr r3, .LCPI0_0
-; SOFT-NEXT: subs r4, r0, r3
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: blt .LBB0_2
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: movs r3, #0
+; SOFT-NEXT: ldr r4, .LCPI0_0
+; SOFT-NEXT: subs r5, r0, r4
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: mov r5, r2
+; SOFT-NEXT: bge .LBB0_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB0_8
; SOFT-NEXT: .LBB0_2: @ %entry
-; SOFT-NEXT: blt .LBB0_4
-; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB0_4
+; SOFT-NEXT: .LBB0_3: @ %entry
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB0_4: @ %entry
-; SOFT-NEXT: mvns r3, r2
-; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: mvns r3, r3
; SOFT-NEXT: lsls r2, r2, #31
; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r3, r1
@@ -33,9 +36,18 @@ define i32 @stest_f64i32(double %x) {
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: .LBB0_6: @ %entry
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
+; SOFT-NEXT: .LBB0_7: @ %entry
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB0_2
+; SOFT-NEXT: .LBB0_8: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB0_3
+; SOFT-NEXT: b .LBB0_4
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: @ %bb.9:
; SOFT-NEXT: .LCPI0_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -123,33 +135,45 @@ define i32 @ustest_f64i32(double %x) {
; SOFT-NEXT: .save {r4, lr}
; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __aeabi_d2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: mvns r3, r2
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: movs r3, #0
; SOFT-NEXT: adds r4, r0, #1
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: blt .LBB2_2
+; SOFT-NEXT: sbcs r4, r3
+; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: bge .LBB2_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB2_8
; SOFT-NEXT: .LBB2_2: @ %entry
-; SOFT-NEXT: blt .LBB2_4
-; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB2_4
+; SOFT-NEXT: .LBB2_3: @ %entry
+; SOFT-NEXT: mvns r0, r3
; SOFT-NEXT: .LBB2_4: @ %entry
-; SOFT-NEXT: rsbs r3, r0, #0
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB2_7
+; SOFT-NEXT: rsbs r4, r0, #0
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: sbcs r4, r1
+; SOFT-NEXT: bge .LBB2_9
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB2_8
+; SOFT-NEXT: beq .LBB2_10
; SOFT-NEXT: .LBB2_6: @ %entry
; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB2_7:
-; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: .LBB2_7: @ %entry
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB2_2
+; SOFT-NEXT: .LBB2_8: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB2_3
+; SOFT-NEXT: b .LBB2_4
+; SOFT-NEXT: .LBB2_9: @ %entry
+; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: bne .LBB2_6
-; SOFT-NEXT: .LBB2_8: @ %entry
+; SOFT-NEXT: .LBB2_10: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
;
@@ -196,24 +220,27 @@ entry:
define i32 @stest_f32i32(float %x) {
; SOFT-LABEL: stest_f32i32:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: ldr r3, .LCPI3_0
-; SOFT-NEXT: subs r4, r0, r3
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: blt .LBB3_2
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: movs r3, #0
+; SOFT-NEXT: ldr r4, .LCPI3_0
+; SOFT-NEXT: subs r5, r0, r4
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: mov r5, r2
+; SOFT-NEXT: bge .LBB3_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB3_8
; SOFT-NEXT: .LBB3_2: @ %entry
-; SOFT-NEXT: blt .LBB3_4
-; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB3_4
+; SOFT-NEXT: .LBB3_3: @ %entry
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB3_4: @ %entry
-; SOFT-NEXT: mvns r3, r2
-; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: mvns r3, r3
; SOFT-NEXT: lsls r2, r2, #31
; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r3, r1
@@ -221,9 +248,18 @@ define i32 @stest_f32i32(float %x) {
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: .LBB3_6: @ %entry
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
+; SOFT-NEXT: .LBB3_7: @ %entry
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB3_2
+; SOFT-NEXT: .LBB3_8: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB3_3
+; SOFT-NEXT: b .LBB3_4
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: @ %bb.9:
; SOFT-NEXT: .LCPI3_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -276,33 +312,45 @@ define i32 @ustest_f32i32(float %x) {
; SOFT-NEXT: .save {r4, lr}
; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: mvns r3, r2
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: movs r3, #0
; SOFT-NEXT: adds r4, r0, #1
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: blt .LBB5_2
+; SOFT-NEXT: sbcs r4, r3
+; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: bge .LBB5_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB5_8
; SOFT-NEXT: .LBB5_2: @ %entry
-; SOFT-NEXT: blt .LBB5_4
-; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB5_4
+; SOFT-NEXT: .LBB5_3: @ %entry
+; SOFT-NEXT: mvns r0, r3
; SOFT-NEXT: .LBB5_4: @ %entry
-; SOFT-NEXT: rsbs r3, r0, #0
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB5_7
+; SOFT-NEXT: rsbs r4, r0, #0
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: sbcs r4, r1
+; SOFT-NEXT: bge .LBB5_9
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB5_8
+; SOFT-NEXT: beq .LBB5_10
; SOFT-NEXT: .LBB5_6: @ %entry
; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB5_7:
-; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: .LBB5_7: @ %entry
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB5_2
+; SOFT-NEXT: .LBB5_8: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB5_3
+; SOFT-NEXT: b .LBB5_4
+; SOFT-NEXT: .LBB5_9: @ %entry
+; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: bne .LBB5_6
-; SOFT-NEXT: .LBB5_8: @ %entry
+; SOFT-NEXT: .LBB5_10: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
;
@@ -324,26 +372,29 @@ entry:
define i32 @stest_f16i32(half %x) {
; SOFT-LABEL: stest_f16i32:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: ldr r3, .LCPI6_0
-; SOFT-NEXT: subs r4, r0, r3
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: blt .LBB6_2
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: movs r3, #0
+; SOFT-NEXT: ldr r4, .LCPI6_0
+; SOFT-NEXT: subs r5, r0, r4
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: mov r5, r2
+; SOFT-NEXT: bge .LBB6_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB6_8
; SOFT-NEXT: .LBB6_2: @ %entry
-; SOFT-NEXT: blt .LBB6_4
-; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB6_4
+; SOFT-NEXT: .LBB6_3: @ %entry
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB6_4: @ %entry
-; SOFT-NEXT: mvns r3, r2
-; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: mvns r3, r3
; SOFT-NEXT: lsls r2, r2, #31
; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r3, r1
@@ -351,9 +402,18 @@ define i32 @stest_f16i32(half %x) {
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: .LBB6_6: @ %entry
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
+; SOFT-NEXT: .LBB6_7: @ %entry
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB6_2
+; SOFT-NEXT: .LBB6_8: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB6_3
+; SOFT-NEXT: b .LBB6_4
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: @ %bb.9:
; SOFT-NEXT: .LCPI6_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -432,33 +492,45 @@ define i32 @ustest_f16i32(half %x) {
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: mvns r3, r2
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: movs r3, #0
; SOFT-NEXT: adds r4, r0, #1
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: blt .LBB8_2
+; SOFT-NEXT: sbcs r4, r3
+; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: bge .LBB8_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB8_8
; SOFT-NEXT: .LBB8_2: @ %entry
-; SOFT-NEXT: blt .LBB8_4
-; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB8_4
+; SOFT-NEXT: .LBB8_3: @ %entry
+; SOFT-NEXT: mvns r0, r3
; SOFT-NEXT: .LBB8_4: @ %entry
-; SOFT-NEXT: rsbs r3, r0, #0
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB8_7
+; SOFT-NEXT: rsbs r4, r0, #0
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: sbcs r4, r1
+; SOFT-NEXT: bge .LBB8_9
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB8_8
+; SOFT-NEXT: beq .LBB8_10
; SOFT-NEXT: .LBB8_6: @ %entry
; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB8_7:
-; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: .LBB8_7: @ %entry
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB8_2
+; SOFT-NEXT: .LBB8_8: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB8_3
+; SOFT-NEXT: b .LBB8_4
+; SOFT-NEXT: .LBB8_9: @ %entry
+; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: bne .LBB8_6
-; SOFT-NEXT: .LBB8_8: @ %entry
+; SOFT-NEXT: .LBB8_10: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
;
@@ -918,62 +990,86 @@ define i64 @stest_f64i64(double %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: movs r4, #0
-; SOFT-NEXT: mvns r5, r4
-; SOFT-NEXT: ldr r6, .LCPI18_0
-; SOFT-NEXT: adds r7, r0, #1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: ldr r0, .LCPI18_0
+; SOFT-NEXT: adds r7, r6, #1
; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: sbcs r7, r6
+; SOFT-NEXT: sbcs r7, r0
; SOFT-NEXT: mov r7, r2
-; SOFT-NEXT: sbcs r7, r4
+; SOFT-NEXT: sbcs r7, r5
; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: sbcs r7, r4
-; SOFT-NEXT: bge .LBB18_8
+; SOFT-NEXT: sbcs r7, r5
+; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: bge .LBB18_13
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB18_9
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB18_14
; SOFT-NEXT: .LBB18_2: @ %entry
-; SOFT-NEXT: bge .LBB18_10
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB18_4
; SOFT-NEXT: .LBB18_3: @ %entry
-; SOFT-NEXT: blt .LBB18_5
+; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: .LBB18_4: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: .LBB18_5: @ %entry
-; SOFT-NEXT: movs r6, #1
-; SOFT-NEXT: lsls r6, r6, #31
-; SOFT-NEXT: rsbs r7, r0, #0
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB18_6
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: mov r1, r0
+; SOFT-NEXT: .LBB18_6: @ %entry
+; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB18_8
+; SOFT-NEXT: @ %bb.7: @ %entry
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: .LBB18_8: @ %entry
+; SOFT-NEXT: lsls r3, r4, #31
+; SOFT-NEXT: rsbs r7, r6, #0
+; SOFT-NEXT: mov r7, r3
; SOFT-NEXT: sbcs r7, r1
-; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: sbcs r7, r2
-; SOFT-NEXT: sbcs r5, r3
-; SOFT-NEXT: bge .LBB18_11
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: bge .LBB18_12
-; SOFT-NEXT: .LBB18_7: @ %entry
-; SOFT-NEXT: add sp, #4
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB18_8: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: blt .LBB18_2
-; SOFT-NEXT: .LBB18_9: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB18_3
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: sbcs r0, r2
+; SOFT-NEXT: bge .LBB18_15
+; SOFT-NEXT: @ %bb.9: @ %entry
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB18_16
; SOFT-NEXT: .LBB18_10: @ %entry
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bge .LBB18_4
-; SOFT-NEXT: b .LBB18_5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB18_12
; SOFT-NEXT: .LBB18_11: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: blt .LBB18_7
+; SOFT-NEXT: mov r1, r3
; SOFT-NEXT: .LBB18_12: @ %entry
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB18_13: @ %entry
+; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB18_2
+; SOFT-NEXT: .LBB18_14: @ %entry
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB18_3
+; SOFT-NEXT: b .LBB18_4
+; SOFT-NEXT: .LBB18_15: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB18_10
+; SOFT-NEXT: .LBB18_16: @ %entry
+; SOFT-NEXT: mov r6, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB18_11
+; SOFT-NEXT: b .LBB18_12
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI18_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -1004,9 +1100,13 @@ define i64 @stest_f64i64(double %x) {
; VFP2-NEXT: sbcs.w r5, lr, r1
; VFP2-NEXT: sbcs.w r4, r2, r4
; VFP2-NEXT: sbcs r2, r3
-; VFP2-NEXT: itt ge
-; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: movge r1, lr
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f64i64:
@@ -1031,9 +1131,11 @@ define i64 @stest_f64i64(double %x) {
; FULL-NEXT: sbcs.w r4, r12, r1
; FULL-NEXT: sbcs.w r2, r3, r2
; FULL-NEXT: sbcs.w r2, r3, r5
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: it eq
+; FULL-NEXT: moveq r0, #0
+; FULL-NEXT: csel r1, r1, r12, ne
; FULL-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = fptosi double %x to i128
@@ -1054,15 +1156,24 @@ define i64 @utest_f64i64(double %x) {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: subs r2, r2, #1
; SOFT-NEXT: sbcs r3, r4
-; SOFT-NEXT: bhs .LBB19_3
+; SOFT-NEXT: blo .LBB19_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bhs .LBB19_4
-; SOFT-NEXT: .LBB19_2: @ %entry
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB19_3
+; SOFT-NEXT: b .LBB19_4
+; SOFT-NEXT: .LBB19_2:
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB19_4
; SOFT-NEXT: .LBB19_3: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: blo .LBB19_2
; SOFT-NEXT: .LBB19_4: @ %entry
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB19_6
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: .LBB19_6: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: pop {r4, pc}
;
@@ -1074,9 +1185,13 @@ define i64 @utest_f64i64(double %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: itt hs
-; VFP2-NEXT: movhs r0, r12
-; VFP2-NEXT: movhs r1, r12
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lo
+; VFP2-NEXT: movlo r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: moveq r1, r12
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utest_f64i64:
@@ -1085,10 +1200,12 @@ define i64 @utest_f64i64(double %x) {
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixunsdfti
; FULL-NEXT: subs r2, #1
-; FULL-NEXT: mov.w r12, #0
; FULL-NEXT: sbcs r2, r3, #0
-; FULL-NEXT: csel r0, r0, r12, lo
-; FULL-NEXT: csel r1, r1, r12, lo
+; FULL-NEXT: mov.w r3, #0
+; FULL-NEXT: cset r2, lo
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csel r1, r1, r3, ne
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptoui double %x to i128
@@ -1109,16 +1226,23 @@ define i64 @ustest_f64i64(double %x) {
; SOFT-NEXT: subs r6, r2, #1
; SOFT-NEXT: mov r6, r3
; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB20_9
-; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: mov r6, r4
; SOFT-NEXT: bge .LBB20_10
+; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB20_11
; SOFT-NEXT: .LBB20_2: @ %entry
-; SOFT-NEXT: bge .LBB20_11
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB20_12
; SOFT-NEXT: .LBB20_3: @ %entry
-; SOFT-NEXT: blt .LBB20_5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB20_13
; SOFT-NEXT: .LBB20_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB20_6
; SOFT-NEXT: .LBB20_5: @ %entry
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: .LBB20_6: @ %entry
; SOFT-NEXT: rsbs r6, r0, #0
; SOFT-NEXT: mov r6, r5
; SOFT-NEXT: sbcs r6, r1
@@ -1126,32 +1250,41 @@ define i64 @ustest_f64i64(double %x) {
; SOFT-NEXT: sbcs r6, r2
; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB20_12
-; SOFT-NEXT: @ %bb.6: @ %entry
+; SOFT-NEXT: bge .LBB20_14
+; SOFT-NEXT: @ %bb.7: @ %entry
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB20_13
-; SOFT-NEXT: .LBB20_7: @ %entry
-; SOFT-NEXT: beq .LBB20_14
+; SOFT-NEXT: beq .LBB20_15
; SOFT-NEXT: .LBB20_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB20_16
; SOFT-NEXT: .LBB20_9: @ %entry
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: blt .LBB20_2
+; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .LBB20_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB20_3
+; SOFT-NEXT: mov r6, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB20_2
; SOFT-NEXT: .LBB20_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB20_4
-; SOFT-NEXT: b .LBB20_5
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB20_3
; SOFT-NEXT: .LBB20_12: @ %entry
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB20_4
+; SOFT-NEXT: .LBB20_13: @ %entry
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB20_5
+; SOFT-NEXT: b .LBB20_6
+; SOFT-NEXT: .LBB20_14: @ %entry
; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB20_7
-; SOFT-NEXT: .LBB20_13: @ %entry
-; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: bne .LBB20_8
-; SOFT-NEXT: .LBB20_14: @ %entry
+; SOFT-NEXT: .LBB20_15: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB20_9
+; SOFT-NEXT: .LBB20_16: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: pop {r4, r5, r6, pc}
;
@@ -1163,11 +1296,15 @@ define i64 @ustest_f64i64(double %x) {
; VFP2-NEXT: subs.w lr, r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
-; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
-; VFP2-NEXT: movge r0, r12
+; VFP2-NEXT: mov.w lr, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt.w lr, #1
+; VFP2-NEXT: cmp.w lr, #0
+; VFP2-NEXT: itttt eq
+; VFP2-NEXT: moveq r3, r12
+; VFP2-NEXT: moveq r2, #1
+; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: rsbs.w lr, r0, #0
; VFP2-NEXT: sbcs.w lr, r12, r1
; VFP2-NEXT: sbcs.w r2, r12, r2
@@ -1185,18 +1322,20 @@ define i64 @ustest_f64i64(double %x) {
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixdfti
-; FULL-NEXT: subs.w lr, r2, #1
-; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
-; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
-; FULL-NEXT: csel r1, r1, r12, lt
+; FULL-NEXT: subs.w r12, r2, #1
+; FULL-NEXT: mov.w lr, #0
+; FULL-NEXT: sbcs r12, r3, #0
+; FULL-NEXT: cset r12, lt
+; FULL-NEXT: cmp.w r12, #0
+; FULL-NEXT: it eq
+; FULL-NEXT: moveq r2, #1
+; FULL-NEXT: csel r0, r0, lr, ne
+; FULL-NEXT: csel r12, r3, lr, ne
+; FULL-NEXT: csel r1, r1, lr, ne
; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
+; FULL-NEXT: sbcs.w r3, lr, r1
+; FULL-NEXT: sbcs.w r2, lr, r2
+; FULL-NEXT: sbcs.w r2, lr, r12
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
@@ -1217,62 +1356,86 @@ define i64 @stest_f32i64(float %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r4, #0
-; SOFT-NEXT: mvns r5, r4
-; SOFT-NEXT: ldr r6, .LCPI21_0
-; SOFT-NEXT: adds r7, r0, #1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: ldr r0, .LCPI21_0
+; SOFT-NEXT: adds r7, r6, #1
; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: sbcs r7, r6
+; SOFT-NEXT: sbcs r7, r0
; SOFT-NEXT: mov r7, r2
-; SOFT-NEXT: sbcs r7, r4
+; SOFT-NEXT: sbcs r7, r5
; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: sbcs r7, r4
-; SOFT-NEXT: bge .LBB21_8
+; SOFT-NEXT: sbcs r7, r5
+; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: bge .LBB21_13
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB21_9
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB21_14
; SOFT-NEXT: .LBB21_2: @ %entry
-; SOFT-NEXT: bge .LBB21_10
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB21_4
; SOFT-NEXT: .LBB21_3: @ %entry
-; SOFT-NEXT: blt .LBB21_5
+; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: .LBB21_4: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: .LBB21_5: @ %entry
-; SOFT-NEXT: movs r6, #1
-; SOFT-NEXT: lsls r6, r6, #31
-; SOFT-NEXT: rsbs r7, r0, #0
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB21_6
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: mov r1, r0
+; SOFT-NEXT: .LBB21_6: @ %entry
+; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB21_8
+; SOFT-NEXT: @ %bb.7: @ %entry
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: .LBB21_8: @ %entry
+; SOFT-NEXT: lsls r3, r4, #31
+; SOFT-NEXT: rsbs r7, r6, #0
+; SOFT-NEXT: mov r7, r3
; SOFT-NEXT: sbcs r7, r1
-; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: sbcs r7, r2
-; SOFT-NEXT: sbcs r5, r3
-; SOFT-NEXT: bge .LBB21_11
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: bge .LBB21_12
-; SOFT-NEXT: .LBB21_7: @ %entry
-; SOFT-NEXT: add sp, #4
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB21_8: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: blt .LBB21_2
-; SOFT-NEXT: .LBB21_9: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB21_3
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: sbcs r0, r2
+; SOFT-NEXT: bge .LBB21_15
+; SOFT-NEXT: @ %bb.9: @ %entry
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB21_16
; SOFT-NEXT: .LBB21_10: @ %entry
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bge .LBB21_4
-; SOFT-NEXT: b .LBB21_5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB21_12
; SOFT-NEXT: .LBB21_11: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: blt .LBB21_7
+; SOFT-NEXT: mov r1, r3
; SOFT-NEXT: .LBB21_12: @ %entry
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB21_13: @ %entry
+; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB21_2
+; SOFT-NEXT: .LBB21_14: @ %entry
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB21_3
+; SOFT-NEXT: b .LBB21_4
+; SOFT-NEXT: .LBB21_15: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB21_10
+; SOFT-NEXT: .LBB21_16: @ %entry
+; SOFT-NEXT: mov r6, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB21_11
+; SOFT-NEXT: b .LBB21_12
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI21_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -1303,9 +1466,13 @@ define i64 @stest_f32i64(float %x) {
; VFP2-NEXT: sbcs.w r5, lr, r1
; VFP2-NEXT: sbcs.w r4, r2, r4
; VFP2-NEXT: sbcs r2, r3
-; VFP2-NEXT: itt ge
-; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: movge r1, lr
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f32i64:
@@ -1330,9 +1497,11 @@ define i64 @stest_f32i64(float %x) {
; FULL-NEXT: sbcs.w r4, r12, r1
; FULL-NEXT: sbcs.w r2, r3, r2
; FULL-NEXT: sbcs.w r2, r3, r5
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: it eq
+; FULL-NEXT: moveq r0, #0
+; FULL-NEXT: csel r1, r1, r12, ne
; FULL-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = fptosi float %x to i128
@@ -1353,15 +1522,24 @@ define i64 @utest_f32i64(float %x) {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: subs r2, r2, #1
; SOFT-NEXT: sbcs r3, r4
-; SOFT-NEXT: bhs .LBB22_3
+; SOFT-NEXT: blo .LBB22_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bhs .LBB22_4
-; SOFT-NEXT: .LBB22_2: @ %entry
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB22_3
+; SOFT-NEXT: b .LBB22_4
+; SOFT-NEXT: .LBB22_2:
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB22_4
; SOFT-NEXT: .LBB22_3: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: blo .LBB22_2
; SOFT-NEXT: .LBB22_4: @ %entry
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB22_6
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: .LBB22_6: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: pop {r4, pc}
;
@@ -1373,9 +1551,13 @@ define i64 @utest_f32i64(float %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: itt hs
-; VFP2-NEXT: movhs r0, r12
-; VFP2-NEXT: movhs r1, r12
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lo
+; VFP2-NEXT: movlo r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: moveq r1, r12
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utest_f32i64:
@@ -1384,10 +1566,12 @@ define i64 @utest_f32i64(float %x) {
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixunssfti
; FULL-NEXT: subs r2, #1
-; FULL-NEXT: mov.w r12, #0
; FULL-NEXT: sbcs r2, r3, #0
-; FULL-NEXT: csel r0, r0, r12, lo
-; FULL-NEXT: csel r1, r1, r12, lo
+; FULL-NEXT: mov.w r3, #0
+; FULL-NEXT: cset r2, lo
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csel r1, r1, r3, ne
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptoui float %x to i128
@@ -1408,16 +1592,23 @@ define i64 @ustest_f32i64(float %x) {
; SOFT-NEXT: subs r6, r2, #1
; SOFT-NEXT: mov r6, r3
; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB23_9
-; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: mov r6, r4
; SOFT-NEXT: bge .LBB23_10
+; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB23_11
; SOFT-NEXT: .LBB23_2: @ %entry
-; SOFT-NEXT: bge .LBB23_11
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB23_12
; SOFT-NEXT: .LBB23_3: @ %entry
-; SOFT-NEXT: blt .LBB23_5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB23_13
; SOFT-NEXT: .LBB23_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB23_6
; SOFT-NEXT: .LBB23_5: @ %entry
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: .LBB23_6: @ %entry
; SOFT-NEXT: rsbs r6, r0, #0
; SOFT-NEXT: mov r6, r5
; SOFT-NEXT: sbcs r6, r1
@@ -1425,32 +1616,41 @@ define i64 @ustest_f32i64(float %x) {
; SOFT-NEXT: sbcs r6, r2
; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB23_12
-; SOFT-NEXT: @ %bb.6: @ %entry
+; SOFT-NEXT: bge .LBB23_14
+; SOFT-NEXT: @ %bb.7: @ %entry
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB23_13
-; SOFT-NEXT: .LBB23_7: @ %entry
-; SOFT-NEXT: beq .LBB23_14
+; SOFT-NEXT: beq .LBB23_15
; SOFT-NEXT: .LBB23_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB23_16
; SOFT-NEXT: .LBB23_9: @ %entry
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: blt .LBB23_2
+; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .LBB23_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB23_3
+; SOFT-NEXT: mov r6, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB23_2
; SOFT-NEXT: .LBB23_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB23_4
-; SOFT-NEXT: b .LBB23_5
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB23_3
; SOFT-NEXT: .LBB23_12: @ %entry
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB23_4
+; SOFT-NEXT: .LBB23_13: @ %entry
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB23_5
+; SOFT-NEXT: b .LBB23_6
+; SOFT-NEXT: .LBB23_14: @ %entry
; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB23_7
-; SOFT-NEXT: .LBB23_13: @ %entry
-; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: bne .LBB23_8
-; SOFT-NEXT: .LBB23_14: @ %entry
+; SOFT-NEXT: .LBB23_15: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB23_9
+; SOFT-NEXT: .LBB23_16: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: pop {r4, r5, r6, pc}
;
@@ -1462,11 +1662,15 @@ define i64 @ustest_f32i64(float %x) {
; VFP2-NEXT: subs.w lr, r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
-; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
-; VFP2-NEXT: movge r0, r12
+; VFP2-NEXT: mov.w lr, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt.w lr, #1
+; VFP2-NEXT: cmp.w lr, #0
+; VFP2-NEXT: itttt eq
+; VFP2-NEXT: moveq r3, r12
+; VFP2-NEXT: moveq r2, #1
+; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: rsbs.w lr, r0, #0
; VFP2-NEXT: sbcs.w lr, r12, r1
; VFP2-NEXT: sbcs.w r2, r12, r2
@@ -1484,18 +1688,20 @@ define i64 @ustest_f32i64(float %x) {
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixsfti
-; FULL-NEXT: subs.w lr, r2, #1
-; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
-; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
-; FULL-NEXT: csel r1, r1, r12, lt
+; FULL-NEXT: subs.w r12, r2, #1
+; FULL-NEXT: mov.w lr, #0
+; FULL-NEXT: sbcs r12, r3, #0
+; FULL-NEXT: cset r12, lt
+; FULL-NEXT: cmp.w r12, #0
+; FULL-NEXT: it eq
+; FULL-NEXT: moveq r2, #1
+; FULL-NEXT: csel r0, r0, lr, ne
+; FULL-NEXT: csel r12, r3, lr, ne
+; FULL-NEXT: csel r1, r1, lr, ne
; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
+; FULL-NEXT: sbcs.w r3, lr, r1
+; FULL-NEXT: sbcs.w r2, lr, r2
+; FULL-NEXT: sbcs.w r2, lr, r12
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
@@ -1516,64 +1722,88 @@ define i64 @stest_f16i64(half %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r4, #0
-; SOFT-NEXT: mvns r5, r4
-; SOFT-NEXT: ldr r6, .LCPI24_0
-; SOFT-NEXT: adds r7, r0, #1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: ldr r0, .LCPI24_0
+; SOFT-NEXT: adds r7, r6, #1
; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: sbcs r7, r6
+; SOFT-NEXT: sbcs r7, r0
; SOFT-NEXT: mov r7, r2
-; SOFT-NEXT: sbcs r7, r4
+; SOFT-NEXT: sbcs r7, r5
; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: sbcs r7, r4
-; SOFT-NEXT: bge .LBB24_8
+; SOFT-NEXT: sbcs r7, r5
+; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: bge .LBB24_13
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB24_9
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB24_14
; SOFT-NEXT: .LBB24_2: @ %entry
-; SOFT-NEXT: bge .LBB24_10
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB24_4
; SOFT-NEXT: .LBB24_3: @ %entry
-; SOFT-NEXT: blt .LBB24_5
+; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: .LBB24_4: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: .LBB24_5: @ %entry
-; SOFT-NEXT: movs r6, #1
-; SOFT-NEXT: lsls r6, r6, #31
-; SOFT-NEXT: rsbs r7, r0, #0
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB24_6
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: mov r1, r0
+; SOFT-NEXT: .LBB24_6: @ %entry
+; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB24_8
+; SOFT-NEXT: @ %bb.7: @ %entry
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: .LBB24_8: @ %entry
+; SOFT-NEXT: lsls r3, r4, #31
+; SOFT-NEXT: rsbs r7, r6, #0
+; SOFT-NEXT: mov r7, r3
; SOFT-NEXT: sbcs r7, r1
-; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: sbcs r7, r2
-; SOFT-NEXT: sbcs r5, r3
-; SOFT-NEXT: bge .LBB24_11
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: bge .LBB24_12
-; SOFT-NEXT: .LBB24_7: @ %entry
-; SOFT-NEXT: add sp, #4
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB24_8: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: blt .LBB24_2
-; SOFT-NEXT: .LBB24_9: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB24_3
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: sbcs r0, r2
+; SOFT-NEXT: bge .LBB24_15
+; SOFT-NEXT: @ %bb.9: @ %entry
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB24_16
; SOFT-NEXT: .LBB24_10: @ %entry
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bge .LBB24_4
-; SOFT-NEXT: b .LBB24_5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB24_12
; SOFT-NEXT: .LBB24_11: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: blt .LBB24_7
+; SOFT-NEXT: mov r1, r3
; SOFT-NEXT: .LBB24_12: @ %entry
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB24_13: @ %entry
+; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB24_2
+; SOFT-NEXT: .LBB24_14: @ %entry
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB24_3
+; SOFT-NEXT: b .LBB24_4
+; SOFT-NEXT: .LBB24_15: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB24_10
+; SOFT-NEXT: .LBB24_16: @ %entry
+; SOFT-NEXT: mov r6, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB24_11
+; SOFT-NEXT: b .LBB24_12
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI24_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -1607,9 +1837,13 @@ define i64 @stest_f16i64(half %x) {
; VFP2-NEXT: sbcs.w r5, lr, r1
; VFP2-NEXT: sbcs.w r4, r2, r4
; VFP2-NEXT: sbcs r2, r3
-; VFP2-NEXT: itt ge
-; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: movge r1, lr
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f16i64:
@@ -1636,9 +1870,11 @@ define i64 @stest_f16i64(half %x) {
; FULL-NEXT: sbcs.w r4, r12, r1
; FULL-NEXT: sbcs.w r2, r3, r2
; FULL-NEXT: sbcs.w r2, r3, r5
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: it eq
+; FULL-NEXT: moveq r0, #0
+; FULL-NEXT: csel r1, r1, r12, ne
; FULL-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = fptosi half %x to i128
@@ -1661,15 +1897,24 @@ define i64 @utesth_f16i64(half %x) {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: subs r2, r2, #1
; SOFT-NEXT: sbcs r3, r4
-; SOFT-NEXT: bhs .LBB25_3
+; SOFT-NEXT: blo .LBB25_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bhs .LBB25_4
-; SOFT-NEXT: .LBB25_2: @ %entry
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB25_3
+; SOFT-NEXT: b .LBB25_4
+; SOFT-NEXT: .LBB25_2:
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB25_4
; SOFT-NEXT: .LBB25_3: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: blo .LBB25_2
; SOFT-NEXT: .LBB25_4: @ %entry
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB25_6
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: .LBB25_6: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: pop {r4, pc}
;
@@ -1684,9 +1929,13 @@ define i64 @utesth_f16i64(half %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: itt hs
-; VFP2-NEXT: movhs r0, r12
-; VFP2-NEXT: movhs r1, r12
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lo
+; VFP2-NEXT: movlo r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: moveq r1, r12
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utesth_f16i64:
@@ -1697,10 +1946,12 @@ define i64 @utesth_f16i64(half %x) {
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixunshfti
; FULL-NEXT: subs r2, #1
-; FULL-NEXT: mov.w r12, #0
; FULL-NEXT: sbcs r2, r3, #0
-; FULL-NEXT: csel r0, r0, r12, lo
-; FULL-NEXT: csel r1, r1, r12, lo
+; FULL-NEXT: mov.w r3, #0
+; FULL-NEXT: cset r2, lo
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csel r1, r1, r3, ne
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptoui half %x to i128
@@ -1723,16 +1974,23 @@ define i64 @ustest_f16i64(half %x) {
; SOFT-NEXT: subs r6, r2, #1
; SOFT-NEXT: mov r6, r3
; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB26_9
-; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: mov r6, r4
; SOFT-NEXT: bge .LBB26_10
+; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB26_11
; SOFT-NEXT: .LBB26_2: @ %entry
-; SOFT-NEXT: bge .LBB26_11
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB26_12
; SOFT-NEXT: .LBB26_3: @ %entry
-; SOFT-NEXT: blt .LBB26_5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB26_13
; SOFT-NEXT: .LBB26_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB26_6
; SOFT-NEXT: .LBB26_5: @ %entry
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: .LBB26_6: @ %entry
; SOFT-NEXT: rsbs r6, r0, #0
; SOFT-NEXT: mov r6, r5
; SOFT-NEXT: sbcs r6, r1
@@ -1740,32 +1998,41 @@ define i64 @ustest_f16i64(half %x) {
; SOFT-NEXT: sbcs r6, r2
; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB26_12
-; SOFT-NEXT: @ %bb.6: @ %entry
+; SOFT-NEXT: bge .LBB26_14
+; SOFT-NEXT: @ %bb.7: @ %entry
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB26_13
-; SOFT-NEXT: .LBB26_7: @ %entry
-; SOFT-NEXT: beq .LBB26_14
+; SOFT-NEXT: beq .LBB26_15
; SOFT-NEXT: .LBB26_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB26_16
; SOFT-NEXT: .LBB26_9: @ %entry
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: blt .LBB26_2
+; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .LBB26_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB26_3
+; SOFT-NEXT: mov r6, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB26_2
; SOFT-NEXT: .LBB26_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB26_4
-; SOFT-NEXT: b .LBB26_5
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB26_3
; SOFT-NEXT: .LBB26_12: @ %entry
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB26_4
+; SOFT-NEXT: .LBB26_13: @ %entry
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB26_5
+; SOFT-NEXT: b .LBB26_6
+; SOFT-NEXT: .LBB26_14: @ %entry
; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB26_7
-; SOFT-NEXT: .LBB26_13: @ %entry
-; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: bne .LBB26_8
-; SOFT-NEXT: .LBB26_14: @ %entry
+; SOFT-NEXT: .LBB26_15: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB26_9
+; SOFT-NEXT: .LBB26_16: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: pop {r4, r5, r6, pc}
;
@@ -1780,11 +2047,15 @@ define i64 @ustest_f16i64(half %x) {
; VFP2-NEXT: subs.w lr, r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
-; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
-; VFP2-NEXT: movge r0, r12
+; VFP2-NEXT: mov.w lr, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt.w lr, #1
+; VFP2-NEXT: cmp.w lr, #0
+; VFP2-NEXT: itttt eq
+; VFP2-NEXT: moveq r3, r12
+; VFP2-NEXT: moveq r2, #1
+; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: rsbs.w lr, r0, #0
; VFP2-NEXT: sbcs.w lr, r12, r1
; VFP2-NEXT: sbcs.w r2, r12, r2
@@ -1804,18 +2075,20 @@ define i64 @ustest_f16i64(half %x) {
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
-; FULL-NEXT: subs.w lr, r2, #1
-; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
-; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
-; FULL-NEXT: csel r1, r1, r12, lt
+; FULL-NEXT: subs.w r12, r2, #1
+; FULL-NEXT: mov.w lr, #0
+; FULL-NEXT: sbcs r12, r3, #0
+; FULL-NEXT: cset r12, lt
+; FULL-NEXT: cmp.w r12, #0
+; FULL-NEXT: it eq
+; FULL-NEXT: moveq r2, #1
+; FULL-NEXT: csel r0, r0, lr, ne
+; FULL-NEXT: csel r12, r3, lr, ne
+; FULL-NEXT: csel r1, r1, lr, ne
; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
+; FULL-NEXT: sbcs.w r3, lr, r1
+; FULL-NEXT: sbcs.w r2, lr, r2
+; FULL-NEXT: sbcs.w r2, lr, r12
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
@@ -1848,15 +2121,16 @@ define i32 @stest_f64i32_mm(double %x) {
; SOFT-NEXT: subs r5, r0, r4
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: mov r5, r2
; SOFT-NEXT: bge .LBB27_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: bge .LBB27_8
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB27_8
; SOFT-NEXT: .LBB27_2: @ %entry
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB27_4
; SOFT-NEXT: .LBB27_3: @ %entry
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB27_4: @ %entry
; SOFT-NEXT: mvns r3, r3
; SOFT-NEXT: lsls r2, r2, #31
@@ -1868,12 +2142,12 @@ define i32 @stest_f64i32_mm(double %x) {
; SOFT-NEXT: .LBB27_6: @ %entry
; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB27_7: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: blt .LBB27_2
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB27_2
; SOFT-NEXT: .LBB27_8: @ %entry
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB27_3
; SOFT-NEXT: b .LBB27_4
; SOFT-NEXT: .p2align 2
@@ -1887,17 +2161,16 @@ define i32 @stest_f64i32_mm(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2lz
-; VFP2-NEXT: mvn r2, #-2147483648
-; VFP2-NEXT: subs r3, r0, r2
-; VFP2-NEXT: sbcs r3, r1, #0
-; VFP2-NEXT: it ge
-; VFP2-NEXT: movge r0, r2
+; VFP2-NEXT: mvn r12, #-2147483648
+; VFP2-NEXT: subs.w r3, r0, r12
; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: sbcs r3, r1, #0
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r2, #1
; VFP2-NEXT: cmp r2, #0
-; VFP2-NEXT: it ne
+; VFP2-NEXT: ite ne
; VFP2-NEXT: movne r2, r1
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: mov.w r1, #-1
; VFP2-NEXT: rsbs.w r3, r0, #-2147483648
; VFP2-NEXT: sbcs r1, r2
@@ -1961,19 +2234,24 @@ define i32 @ustest_f64i32_mm(double %x) {
; SOFT-NEXT: .save {r7, lr}
; SOFT-NEXT: push {r7, lr}
; SOFT-NEXT: bl __aeabi_d2lz
-; SOFT-NEXT: mov r2, r0
-; SOFT-NEXT: movs r0, #0
+; SOFT-NEXT: asrs r3, r1, #31
+; SOFT-NEXT: ands r3, r1
+; SOFT-NEXT: movs r2, #0
; SOFT-NEXT: cmp r1, #1
-; SOFT-NEXT: blt .LBB29_2
+; SOFT-NEXT: bge .LBB29_3
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mvns r2, r0
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bpl .LBB29_4
; SOFT-NEXT: .LBB29_2: @ %entry
-; SOFT-NEXT: asrs r3, r1, #31
-; SOFT-NEXT: ands r3, r1
-; SOFT-NEXT: bmi .LBB29_4
-; SOFT-NEXT: @ %bb.3: @ %entry
; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: pop {r7, pc}
+; SOFT-NEXT: .LBB29_3: @ %entry
+; SOFT-NEXT: mvns r0, r2
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bmi .LBB29_2
; SOFT-NEXT: .LBB29_4: @ %entry
+; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r7, pc}
;
; VFP2-LABEL: ustest_f64i32_mm:
@@ -2015,15 +2293,16 @@ define i32 @stest_f32i32_mm(float %x) {
; SOFT-NEXT: subs r5, r0, r4
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: mov r5, r2
; SOFT-NEXT: bge .LBB30_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: bge .LBB30_8
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB30_8
; SOFT-NEXT: .LBB30_2: @ %entry
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB30_4
; SOFT-NEXT: .LBB30_3: @ %entry
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB30_4: @ %entry
; SOFT-NEXT: mvns r3, r3
; SOFT-NEXT: lsls r2, r2, #31
@@ -2035,12 +2314,12 @@ define i32 @stest_f32i32_mm(float %x) {
; SOFT-NEXT: .LBB30_6: @ %entry
; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB30_7: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: blt .LBB30_2
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB30_2
; SOFT-NEXT: .LBB30_8: @ %entry
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB30_3
; SOFT-NEXT: b .LBB30_4
; SOFT-NEXT: .p2align 2
@@ -2135,15 +2414,16 @@ define i32 @stest_f16i32_mm(half %x) {
; SOFT-NEXT: subs r5, r0, r4
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: mov r5, r2
; SOFT-NEXT: bge .LBB33_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: bge .LBB33_8
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB33_8
; SOFT-NEXT: .LBB33_2: @ %entry
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB33_4
; SOFT-NEXT: .LBB33_3: @ %entry
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB33_4: @ %entry
; SOFT-NEXT: mvns r3, r3
; SOFT-NEXT: lsls r2, r2, #31
@@ -2155,12 +2435,12 @@ define i32 @stest_f16i32_mm(half %x) {
; SOFT-NEXT: .LBB33_6: @ %entry
; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB33_7: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: blt .LBB33_2
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB33_2
; SOFT-NEXT: .LBB33_8: @ %entry
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB33_3
; SOFT-NEXT: b .LBB33_4
; SOFT-NEXT: .p2align 2
@@ -2693,78 +2973,81 @@ define i64 @stest_f64i64_mm(double %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r0, #1
+; SOFT-NEXT: movs r4, #1
; SOFT-NEXT: movs r5, #0
; SOFT-NEXT: ldr r6, .LCPI45_0
-; SOFT-NEXT: adds r4, r7, #1
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r6
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: blt .LBB45_2
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: adds r0, r0, #1
+; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: sbcs r0, r6
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: bge .LBB45_12
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: .LBB45_2: @ %entry
-; SOFT-NEXT: mvns r6, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB45_12
-; SOFT-NEXT: @ %bb.3: @ %entry
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: beq .LBB45_13
-; SOFT-NEXT: .LBB45_4: @ %entry
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: .LBB45_2: @ %entry
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: beq .LBB45_14
+; SOFT-NEXT: .LBB45_3: @ %entry
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB45_5
+; SOFT-NEXT: .LBB45_4: @ %entry
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: .LBB45_5: @ %entry
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r5
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: bne .LBB45_7
-; SOFT-NEXT: .LBB45_6: @ %entry
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: @ %bb.6: @ %entry
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB45_7: @ %entry
-; SOFT-NEXT: lsls r3, r0, #31
-; SOFT-NEXT: rsbs r4, r7, #0
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r1
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: sbcs r6, r2
+; SOFT-NEXT: lsls r6, r4, #31
+; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: rsbs r7, r7, #0
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: sbcs r7, r1
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: sbcs r7, r2
+; SOFT-NEXT: sbcs r0, r3
; SOFT-NEXT: bge .LBB45_15
; SOFT-NEXT: @ %bb.8: @ %entry
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB45_16
; SOFT-NEXT: .LBB45_9: @ %entry
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB45_11
; SOFT-NEXT: .LBB45_10: @ %entry
-; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: str r4, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB45_11: @ %entry
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB45_12: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB45_4
+; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB45_2
; SOFT-NEXT: .LBB45_13: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB45_5
+; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB45_3
; SOFT-NEXT: .LBB45_14: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI45_0
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB45_6
-; SOFT-NEXT: b .LBB45_7
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB45_4
+; SOFT-NEXT: b .LBB45_5
; SOFT-NEXT: .LBB45_15: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB45_9
; SOFT-NEXT: .LBB45_16: @ %entry
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB45_10
; SOFT-NEXT: b .LBB45_11
; SOFT-NEXT: .p2align 2
@@ -2803,8 +3086,8 @@ define i64 @stest_f64i64_mm(double %x) {
; VFP2-NEXT: movlt.w r12, #1
; VFP2-NEXT: cmp.w r12, #0
; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: moveq r1, lr
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f64i64_mm:
@@ -2831,8 +3114,8 @@ define i64 @stest_f64i64_mm(double %x) {
; FULL-NEXT: sbcs.w r2, r3, r5
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r0, r0, r2, ne
; FULL-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = fptosi double %x to i128
@@ -2856,6 +3139,7 @@ define i64 @utest_f64i64_mm(double %x) {
; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB46_5
; SOFT-NEXT: .LBB46_2: @ %entry
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB46_6
; SOFT-NEXT: .LBB46_3: @ %entry
; SOFT-NEXT: pop {r4, pc}
@@ -2865,6 +3149,7 @@ define i64 @utest_f64i64_mm(double %x) {
; SOFT-NEXT: bne .LBB46_2
; SOFT-NEXT: .LBB46_5: @ %entry
; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB46_3
; SOFT-NEXT: .LBB46_6: @ %entry
; SOFT-NEXT: mov r1, r4
@@ -2908,8 +3193,8 @@ entry:
define i64 @ustest_f64i64_mm(double %x) {
; SOFT-LABEL: ustest_f64i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __fixdfti
; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: movs r1, #0
@@ -2918,42 +3203,46 @@ define i64 @ustest_f64i64_mm(double %x) {
; SOFT-NEXT: sbcs r2, r1
; SOFT-NEXT: blt .LBB47_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB47_3
; SOFT-NEXT: b .LBB47_4
; SOFT-NEXT: .LBB47_2:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: movs r5, #1
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB47_4
; SOFT-NEXT: .LBB47_3: @ %entry
-; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: .LBB47_4: @ %entry
-; SOFT-NEXT: beq .LBB47_10
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB47_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: bne .LBB47_7
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB47_6: @ %entry
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: .LBB47_7: @ %entry
; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: bpl .LBB47_11
-; SOFT-NEXT: @ %bb.8: @ %entry
+; SOFT-NEXT: bpl .LBB47_10
+; SOFT-NEXT: @ %bb.7: @ %entry
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB47_11
+; SOFT-NEXT: .LBB47_8: @ %entry
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bpl .LBB47_12
; SOFT-NEXT: .LBB47_9: @ %entry
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB47_10: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: beq .LBB47_6
-; SOFT-NEXT: b .LBB47_7
-; SOFT-NEXT: .LBB47_11: @ %entry
; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB47_8
+; SOFT-NEXT: .LBB47_11: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bmi .LBB47_9
; SOFT-NEXT: .LBB47_12: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
;
; VFP2-LABEL: ustest_f64i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -2966,13 +3255,17 @@ define i64 @ustest_f64i64_mm(double %x) {
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt.w r12, #1
; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itte eq
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: itt eq
; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: movne r12, r3
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt mi
+; VFP2-NEXT: moveq r3, r12
+; VFP2-NEXT: cmp r3, #0
+; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r0, #0
+; VFP2-NEXT: cmp.w r12, #0
+; VFP2-NEXT: it eq
+; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: cmp r3, #0
+; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r1, #0
; VFP2-NEXT: pop {r7, pc}
;
@@ -2985,12 +3278,15 @@ define i64 @ustest_f64i64_mm(double %x) {
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: csel r3, r3, r2, ne
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r2, r3, r2, ne
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: itt mi
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it mi
; FULL-NEXT: movmi r0, #0
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it mi
; FULL-NEXT: movmi r1, #0
; FULL-NEXT: pop {r7, pc}
entry:
@@ -3006,78 +3302,81 @@ define i64 @stest_f32i64_mm(float %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r0, #1
+; SOFT-NEXT: movs r4, #1
; SOFT-NEXT: movs r5, #0
; SOFT-NEXT: ldr r6, .LCPI48_0
-; SOFT-NEXT: adds r4, r7, #1
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r6
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: blt .LBB48_2
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: adds r0, r0, #1
+; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: sbcs r0, r6
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: bge .LBB48_12
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: .LBB48_2: @ %entry
-; SOFT-NEXT: mvns r6, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB48_12
-; SOFT-NEXT: @ %bb.3: @ %entry
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: beq .LBB48_13
-; SOFT-NEXT: .LBB48_4: @ %entry
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: .LBB48_2: @ %entry
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: beq .LBB48_14
+; SOFT-NEXT: .LBB48_3: @ %entry
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB48_5
+; SOFT-NEXT: .LBB48_4: @ %entry
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: .LBB48_5: @ %entry
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r5
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: bne .LBB48_7
-; SOFT-NEXT: .LBB48_6: @ %entry
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: @ %bb.6: @ %entry
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB48_7: @ %entry
-; SOFT-NEXT: lsls r3, r0, #31
-; SOFT-NEXT: rsbs r4, r7, #0
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r1
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: sbcs r6, r2
+; SOFT-NEXT: lsls r6, r4, #31
+; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: rsbs r7, r7, #0
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: sbcs r7, r1
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: sbcs r7, r2
+; SOFT-NEXT: sbcs r0, r3
; SOFT-NEXT: bge .LBB48_15
; SOFT-NEXT: @ %bb.8: @ %entry
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB48_16
; SOFT-NEXT: .LBB48_9: @ %entry
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB48_11
; SOFT-NEXT: .LBB48_10: @ %entry
-; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: str r4, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB48_11: @ %entry
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB48_12: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB48_4
+; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB48_2
; SOFT-NEXT: .LBB48_13: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB48_5
+; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB48_3
; SOFT-NEXT: .LBB48_14: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI48_0
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB48_6
-; SOFT-NEXT: b .LBB48_7
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB48_4
+; SOFT-NEXT: b .LBB48_5
; SOFT-NEXT: .LBB48_15: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB48_9
; SOFT-NEXT: .LBB48_16: @ %entry
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB48_10
; SOFT-NEXT: b .LBB48_11
; SOFT-NEXT: .p2align 2
@@ -3116,8 +3415,8 @@ define i64 @stest_f32i64_mm(float %x) {
; VFP2-NEXT: movlt.w r12, #1
; VFP2-NEXT: cmp.w r12, #0
; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: moveq r1, lr
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f32i64_mm:
@@ -3144,8 +3443,8 @@ define i64 @stest_f32i64_mm(float %x) {
; FULL-NEXT: sbcs.w r2, r3, r5
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r0, r0, r2, ne
; FULL-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = fptosi float %x to i128
@@ -3169,6 +3468,7 @@ define i64 @utest_f32i64_mm(float %x) {
; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB49_5
; SOFT-NEXT: .LBB49_2: @ %entry
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB49_6
; SOFT-NEXT: .LBB49_3: @ %entry
; SOFT-NEXT: pop {r4, pc}
@@ -3178,6 +3478,7 @@ define i64 @utest_f32i64_mm(float %x) {
; SOFT-NEXT: bne .LBB49_2
; SOFT-NEXT: .LBB49_5: @ %entry
; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB49_3
; SOFT-NEXT: .LBB49_6: @ %entry
; SOFT-NEXT: mov r1, r4
@@ -3221,8 +3522,8 @@ entry:
define i64 @ustest_f32i64_mm(float %x) {
; SOFT-LABEL: ustest_f32i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __fixsfti
; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: movs r1, #0
@@ -3231,42 +3532,46 @@ define i64 @ustest_f32i64_mm(float %x) {
; SOFT-NEXT: sbcs r2, r1
; SOFT-NEXT: blt .LBB50_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB50_3
; SOFT-NEXT: b .LBB50_4
; SOFT-NEXT: .LBB50_2:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: movs r5, #1
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB50_4
; SOFT-NEXT: .LBB50_3: @ %entry
-; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: .LBB50_4: @ %entry
-; SOFT-NEXT: beq .LBB50_10
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB50_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: bne .LBB50_7
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB50_6: @ %entry
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: .LBB50_7: @ %entry
; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: bpl .LBB50_11
-; SOFT-NEXT: @ %bb.8: @ %entry
+; SOFT-NEXT: bpl .LBB50_10
+; SOFT-NEXT: @ %bb.7: @ %entry
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB50_11
+; SOFT-NEXT: .LBB50_8: @ %entry
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bpl .LBB50_12
; SOFT-NEXT: .LBB50_9: @ %entry
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB50_10: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: beq .LBB50_6
-; SOFT-NEXT: b .LBB50_7
-; SOFT-NEXT: .LBB50_11: @ %entry
; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB50_8
+; SOFT-NEXT: .LBB50_11: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bmi .LBB50_9
; SOFT-NEXT: .LBB50_12: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
;
; VFP2-LABEL: ustest_f32i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -3279,13 +3584,17 @@ define i64 @ustest_f32i64_mm(float %x) {
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt.w r12, #1
; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itte eq
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: itt eq
; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: movne r12, r3
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt mi
+; VFP2-NEXT: moveq r3, r12
+; VFP2-NEXT: cmp r3, #0
+; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r0, #0
+; VFP2-NEXT: cmp.w r12, #0
+; VFP2-NEXT: it eq
+; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: cmp r3, #0
+; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r1, #0
; VFP2-NEXT: pop {r7, pc}
;
@@ -3298,12 +3607,15 @@ define i64 @ustest_f32i64_mm(float %x) {
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: csel r3, r3, r2, ne
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r2, r3, r2, ne
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: itt mi
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it mi
; FULL-NEXT: movmi r0, #0
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it mi
; FULL-NEXT: movmi r1, #0
; FULL-NEXT: pop {r7, pc}
entry:
@@ -3319,80 +3631,83 @@ define i64 @stest_f16i64_mm(half %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r0, #1
+; SOFT-NEXT: movs r4, #1
; SOFT-NEXT: movs r5, #0
; SOFT-NEXT: ldr r6, .LCPI51_0
-; SOFT-NEXT: adds r4, r7, #1
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r6
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: blt .LBB51_2
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: adds r0, r0, #1
+; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: sbcs r0, r6
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: bge .LBB51_12
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: .LBB51_2: @ %entry
-; SOFT-NEXT: mvns r6, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB51_12
-; SOFT-NEXT: @ %bb.3: @ %entry
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: beq .LBB51_13
-; SOFT-NEXT: .LBB51_4: @ %entry
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: .LBB51_2: @ %entry
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: beq .LBB51_14
+; SOFT-NEXT: .LBB51_3: @ %entry
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB51_5
+; SOFT-NEXT: .LBB51_4: @ %entry
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: .LBB51_5: @ %entry
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r5
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: bne .LBB51_7
-; SOFT-NEXT: .LBB51_6: @ %entry
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: @ %bb.6: @ %entry
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB51_7: @ %entry
-; SOFT-NEXT: lsls r3, r0, #31
-; SOFT-NEXT: rsbs r4, r7, #0
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r1
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: sbcs r6, r2
+; SOFT-NEXT: lsls r6, r4, #31
+; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: rsbs r7, r7, #0
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: sbcs r7, r1
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: sbcs r7, r2
+; SOFT-NEXT: sbcs r0, r3
; SOFT-NEXT: bge .LBB51_15
; SOFT-NEXT: @ %bb.8: @ %entry
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB51_16
; SOFT-NEXT: .LBB51_9: @ %entry
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB51_11
; SOFT-NEXT: .LBB51_10: @ %entry
-; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: str r4, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB51_11: @ %entry
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB51_12: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB51_4
+; SOFT-NEXT: mov r7, r5
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB51_2
; SOFT-NEXT: .LBB51_13: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB51_5
+; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB51_3
; SOFT-NEXT: .LBB51_14: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI51_0
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB51_6
-; SOFT-NEXT: b .LBB51_7
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB51_4
+; SOFT-NEXT: b .LBB51_5
; SOFT-NEXT: .LBB51_15: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB51_9
; SOFT-NEXT: .LBB51_16: @ %entry
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB51_10
; SOFT-NEXT: b .LBB51_11
; SOFT-NEXT: .p2align 2
@@ -3434,8 +3749,8 @@ define i64 @stest_f16i64_mm(half %x) {
; VFP2-NEXT: movlt.w r12, #1
; VFP2-NEXT: cmp.w r12, #0
; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: moveq r1, lr
+; VFP2-NEXT: moveq r0, r12
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f16i64_mm:
@@ -3464,8 +3779,8 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: sbcs.w r2, r3, r5
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r0, r0, r2, ne
; FULL-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = fptosi half %x to i128
@@ -3491,6 +3806,7 @@ define i64 @utesth_f16i64_mm(half %x) {
; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB52_5
; SOFT-NEXT: .LBB52_2: @ %entry
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB52_6
; SOFT-NEXT: .LBB52_3: @ %entry
; SOFT-NEXT: pop {r4, pc}
@@ -3500,6 +3816,7 @@ define i64 @utesth_f16i64_mm(half %x) {
; SOFT-NEXT: bne .LBB52_2
; SOFT-NEXT: .LBB52_5: @ %entry
; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB52_3
; SOFT-NEXT: .LBB52_6: @ %entry
; SOFT-NEXT: mov r1, r4
@@ -3548,8 +3865,8 @@ entry:
define i64 @ustest_f16i64_mm(half %x) {
; SOFT-LABEL: ustest_f16i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
@@ -3560,42 +3877,46 @@ define i64 @ustest_f16i64_mm(half %x) {
; SOFT-NEXT: sbcs r2, r1
; SOFT-NEXT: blt .LBB53_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB53_3
; SOFT-NEXT: b .LBB53_4
; SOFT-NEXT: .LBB53_2:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: movs r5, #1
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB53_4
; SOFT-NEXT: .LBB53_3: @ %entry
-; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: .LBB53_4: @ %entry
-; SOFT-NEXT: beq .LBB53_10
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB53_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: bne .LBB53_7
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB53_6: @ %entry
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: .LBB53_7: @ %entry
; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: bpl .LBB53_11
-; SOFT-NEXT: @ %bb.8: @ %entry
+; SOFT-NEXT: bpl .LBB53_10
+; SOFT-NEXT: @ %bb.7: @ %entry
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB53_11
+; SOFT-NEXT: .LBB53_8: @ %entry
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bpl .LBB53_12
; SOFT-NEXT: .LBB53_9: @ %entry
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB53_10: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: beq .LBB53_6
-; SOFT-NEXT: b .LBB53_7
-; SOFT-NEXT: .LBB53_11: @ %entry
; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB53_8
+; SOFT-NEXT: .LBB53_11: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bmi .LBB53_9
; SOFT-NEXT: .LBB53_12: @ %entry
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: pop {r4, r5, r7, pc}
;
; VFP2-LABEL: ustest_f16i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -3611,13 +3932,17 @@ define i64 @ustest_f16i64_mm(half %x) {
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt.w r12, #1
; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itte eq
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: itt eq
; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: movne r12, r3
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt mi
+; VFP2-NEXT: moveq r3, r12
+; VFP2-NEXT: cmp r3, #0
+; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r0, #0
+; VFP2-NEXT: cmp.w r12, #0
+; VFP2-NEXT: it eq
+; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: cmp r3, #0
+; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r1, #0
; VFP2-NEXT: pop {r7, pc}
;
@@ -3632,12 +3957,15 @@ define i64 @ustest_f16i64_mm(half %x) {
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: csel r3, r3, r2, ne
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r2, r3, r2, ne
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: itt mi
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it mi
; FULL-NEXT: movmi r0, #0
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it mi
; FULL-NEXT: movmi r1, #0
; FULL-NEXT: pop {r7, pc}
entry:
@@ -3686,15 +4014,16 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: mov r2, r1
; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: sbcs r2, r3
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: bge .LBB54_14
; SOFT-NEXT: @ %bb.3: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: bge .LBB54_15
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB54_15
; SOFT-NEXT: .LBB54_4: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: bne .LBB54_6
; SOFT-NEXT: .LBB54_5: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: ldr r0, .LCPI54_0
; SOFT-NEXT: .LBB54_6: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: subs r2, r2, r0
@@ -3720,6 +4049,7 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: beq .LBB54_17
; SOFT-NEXT: .LBB54_10: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: bne .LBB54_12
; SOFT-NEXT: .LBB54_11: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: ldr r0, .LCPI54_0
@@ -3733,12 +4063,12 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: b .LBB54_1
; SOFT-NEXT: .LBB54_14: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r0, .LCPI54_0
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: blt .LBB54_4
-; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB54_4
+; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: beq .LBB54_5
; SOFT-NEXT: b .LBB54_6
; SOFT-NEXT: .LBB54_16: @ in Loop: Header=BB54_2 Depth=1
@@ -3747,6 +4077,7 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: bne .LBB54_10
; SOFT-NEXT: .LBB54_17: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: beq .LBB54_11
; SOFT-NEXT: b .LBB54_12
; SOFT-NEXT: .LBB54_18:
@@ -3849,17 +4180,18 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #20
+; SOFT-NEXT: sub sp, #20
; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: mov r5, r0
; SOFT-NEXT: movs r0, #0
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: lsls r1, r0, #31
-; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: lsls r7, r0, #10
; SOFT-NEXT: b .LBB55_2
; SOFT-NEXT: .LBB55_1: @ in Loop: Header=BB55_2 Depth=1
@@ -3867,7 +4199,7 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: adds r4, #8
; SOFT-NEXT: adds r5, #8
; SOFT-NEXT: subs r7, r7, #2
-; SOFT-NEXT: beq .LBB55_14
+; SOFT-NEXT: beq .LBB55_18
; SOFT-NEXT: .LBB55_2: @ =>This Inner Loop Header: Depth=1
; SOFT-NEXT: ldr r0, [r4]
; SOFT-NEXT: movs r1, #79
@@ -3875,21 +4207,24 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fmul
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: subs r2, r2, r0
; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: subs r2, r2, r0
+; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB55_4
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: bge .LBB55_14
; SOFT-NEXT: @ %bb.3: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB55_15
; SOFT-NEXT: .LBB55_4: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: blt .LBB55_6
-; SOFT-NEXT: @ %bb.5: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB55_6
+; SOFT-NEXT: .LBB55_5: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: .LBB55_6: @ in Loop: Header=BB55_2 Depth=1
; SOFT-NEXT: ldr r2, .LCPI55_0
; SOFT-NEXT: subs r2, r0, r2
-; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: sbcs r1, r2
; SOFT-NEXT: blt .LBB55_8
; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB55_2 Depth=1
@@ -3900,31 +4235,52 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fmul
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: subs r2, r2, r0
; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: subs r2, r2, r0
+; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB55_10
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: bge .LBB55_16
; SOFT-NEXT: @ %bb.9: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB55_17
; SOFT-NEXT: .LBB55_10: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: blt .LBB55_12
-; SOFT-NEXT: @ %bb.11: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB55_12
+; SOFT-NEXT: .LBB55_11: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: .LBB55_12: @ in Loop: Header=BB55_2 Depth=1
; SOFT-NEXT: ldr r2, .LCPI55_0
; SOFT-NEXT: subs r2, r0, r2
-; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: sbcs r1, r2
; SOFT-NEXT: blt .LBB55_1
; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB55_2 Depth=1
; SOFT-NEXT: ldr r0, .LCPI55_0
; SOFT-NEXT: b .LBB55_1
-; SOFT-NEXT: .LBB55_14:
-; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: .LBB55_14: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB55_4
+; SOFT-NEXT: .LBB55_15: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB55_5
+; SOFT-NEXT: b .LBB55_6
+; SOFT-NEXT: .LBB55_16: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB55_10
+; SOFT-NEXT: .LBB55_17: @ in Loop: Header=BB55_2 Depth=1
+; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB55_11
+; SOFT-NEXT: b .LBB55_12
+; SOFT-NEXT: .LBB55_18:
+; SOFT-NEXT: add sp, #20
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.15:
+; SOFT-NEXT: @ %bb.19:
; SOFT-NEXT: .LCPI55_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 4d091c23026581..78090083a00264 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -9,58 +9,58 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
-; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: vmov r0, r1, d8
-; CHECK-NEXT: vmov.32 d9[0], r4
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: adr r2, .LCPI0_0
+; CHECK-NEXT: vld1.64 {d8, d9}, [r2:128]
+; CHECK-NEXT: vmov.32 d10[0], r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vmov.32 d8[0], r0
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: subs r4, r4, r3
-; CHECK-NEXT: adr r2, .LCPI0_0
-; CHECK-NEXT: vmov.32 d9[1], r5
-; CHECK-NEXT: sbcs r5, r5, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: mvn r4, #0
-; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mvnne r5, #0
+; CHECK-NEXT: sbcs r4, r5, #0
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vmov.32 d11[1], r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vmov.i32 q10, #0x80000000
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vdup.32 d19, r5
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128]
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: vdup.32 d18, r0
-; CHECK-NEXT: vbit q8, q4, q9
-; CHECK-NEXT: vmov r0, r1, d17
-; CHECK-NEXT: vmov r3, r5, d16
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov.32 d10[1], r5
+; CHECK-NEXT: mvnne r4, #0
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: vdup.32 d16, r4
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: vbsl q8, q5, q4
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r3, r5, d17
; CHECK-NEXT: rsbs r0, r0, #-2147483648
; CHECK-NEXT: sbcs r0, r4, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: rsbs r1, r3, #-2147483648
; CHECK-NEXT: sbcs r1, r4, r5
-; CHECK-NEXT: vdup.32 d19, r0
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d18, r2
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vdup.32 d19, r2
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: vbif q8, q10, q9
; CHECK-NEXT: vmovn.i64 d0, q8
-; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r11, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
@@ -95,21 +95,21 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-NEXT: vmov.32 d9[0], r4
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: mvn r3, #0
-; CHECK-NEXT: subs r4, r4, r3
-; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: vmov.32 d8[0], r0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mvnne r5, #0
; CHECK-NEXT: subs r0, r0, r3
+; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vdup.32 d17, r5
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: subs r1, r4, r3
+; CHECK-NEXT: sbcs r1, r5, #0
; CHECK-NEXT: movwlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d16, r2
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vdup.32 d17, r2
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d16, r0
; CHECK-NEXT: vand q9, q4, q8
; CHECK-NEXT: vorn q8, q9, q8
; CHECK-NEXT: vmovn.i64 d0, q8
@@ -131,49 +131,49 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
-; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: vmov r0, r1, d8
-; CHECK-NEXT: vmov.32 d9[0], r4
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov.32 d8[0], r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vmov.32 d8[0], r0
; CHECK-NEXT: mvn r3, #0
; CHECK-NEXT: subs r4, r4, r3
+; CHECK-NEXT: sbcs r4, r5, #0
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: vmov.i64 q9, #0xffffffff
-; CHECK-NEXT: vmov.32 d9[1], r5
-; CHECK-NEXT: sbcs r5, r5, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mvnne r5, #0
+; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vmov.32 d9[1], r1
; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vdup.32 d17, r5
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d16, r0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov.32 d8[1], r5
+; CHECK-NEXT: mvnne r4, #0
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: vdup.32 d16, r4
; CHECK-NEXT: vbsl q8, q4, q9
-; CHECK-NEXT: vmov r0, r1, d17
-; CHECK-NEXT: vmov r3, r5, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r3, r5, d17
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: rsbs r1, r3, #0
; CHECK-NEXT: rscs r1, r5, #0
-; CHECK-NEXT: vmov.32 d19[0], r0
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vmov.32 d18[0], r2
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov.32 d19[0], r2
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d18[0], r0
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9}
@@ -195,103 +195,106 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
-; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: adr r1, .LCPI3_0
-; CHECK-NEXT: vld1.64 {d10, d11}, [r1:128]
-; CHECK-NEXT: vmov r5, s17
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: mvn r9, #-2147483648
-; CHECK-NEXT: vmov.32 d13[0], r6
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: vmov r6, s17
+; CHECK-NEXT: vmov r10, s19
+; CHECK-NEXT: vmov.32 d8[0], r7
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r6, r9
-; CHECK-NEXT: vmov.32 d12[0], r0
-; CHECK-NEXT: sbcs r2, r7, #0
-; CHECK-NEXT: vmov r8, s16
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: vmov.32 d13[1], r7
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r0, r0, r9
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: subs r3, r7, r6
+; CHECK-NEXT: sbcs r3, r8, #0
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: adr r2, .LCPI3_0
+; CHECK-NEXT: movwlt r3, #1
+; CHECK-NEXT: subs r7, r5, r6
+; CHECK-NEXT: sbcs r7, r4, #0
+; CHECK-NEXT: vmov.32 d11[1], r1
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mvnne r7, #0
+; CHECK-NEXT: subs r0, r0, r6
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vdup.32 d17, r2
+; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vmov.32 d12[1], r1
+; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: vorr q4, q8, q8
-; CHECK-NEXT: vbsl q4, q6, q5
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vmov.32 d13[0], r0
-; CHECK-NEXT: subs r0, r0, r9
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: vmov.32 d13[1], r1
-; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: vmov r5, r7, d9
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vmov.32 d12[0], r0
-; CHECK-NEXT: subs r0, r0, r9
-; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: vmov.32 d10[1], r4
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: subs r0, r9, r6
+; CHECK-NEXT: sbcs r0, r11, #0
+; CHECK-NEXT: vdup.32 d16, r7
; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vdup.32 d17, r6
+; CHECK-NEXT: vbsl q8, q5, q9
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov.32 d9[1], r11
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vmov.32 d12[1], r1
-; CHECK-NEXT: rsbs r3, r11, #-2147483648
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: mvn r0, #0
-; CHECK-NEXT: vbsl q8, q6, q5
-; CHECK-NEXT: adr r1, .LCPI3_1
-; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
-; CHECK-NEXT: sbcs r3, r0, r10
-; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: vmov r1, r2, d17
-; CHECK-NEXT: movwlt r3, #1
; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvn r6, #0
+; CHECK-NEXT: vdup.32 d21, r0
; CHECK-NEXT: mvnne r3, #0
-; CHECK-NEXT: rsbs r6, r5, #-2147483648
-; CHECK-NEXT: vmov r6, r5, d16
-; CHECK-NEXT: sbcs r7, r0, r7
+; CHECK-NEXT: vmov.32 d8[1], r8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vdup.32 d20, r3
+; CHECK-NEXT: vbit q9, q4, q10
+; CHECK-NEXT: adr r5, .LCPI3_1
+; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128]
+; CHECK-NEXT: vmov r5, r4, d17
+; CHECK-NEXT: vmov r3, r7, d18
+; CHECK-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-NEXT: sbcs r0, r6, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEXT: vmov r1, r3, d19
+; CHECK-NEXT: sbcs r7, r6, r7
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: rsbs r5, r5, #-2147483648
+; CHECK-NEXT: sbcs r5, r6, r4
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: rsbs r1, r1, #-2147483648
+; CHECK-NEXT: sbcs r1, r6, r3
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mvnne r5, #0
; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vdup.32 d25, r5
; CHECK-NEXT: mvnne r7, #0
-; CHECK-NEXT: vdup.32 d23, r7
-; CHECK-NEXT: vdup.32 d22, r3
-; CHECK-NEXT: vbsl q11, q4, q9
-; CHECK-NEXT: vmovn.i64 d1, q11
-; CHECK-NEXT: rsbs r1, r1, #-2147483648
-; CHECK-NEXT: sbcs r1, r0, r2
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: rsbs r2, r6, #-2147483648
-; CHECK-NEXT: sbcs r0, r0, r5
-; CHECK-NEXT: vdup.32 d21, r1
-; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vdup.32 d20, r4
-; CHECK-NEXT: vbif q8, q9, q10
-; CHECK-NEXT: vmovn.i64 d0, q8
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vdup.32 d23, r2
+; CHECK-NEXT: vdup.32 d24, r0
+; CHECK-NEXT: vbif q8, q10, q12
+; CHECK-NEXT: vdup.32 d22, r7
+; CHECK-NEXT: vbif q9, q10, q11
+; CHECK-NEXT: vmovn.i64 d1, q8
+; CHECK-NEXT: vmovn.i64 d0, q9
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
@@ -326,52 +329,52 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: vmov r5, s19
+; CHECK-NEXT: vmov r6, s19
; CHECK-NEXT: vmov r7, s18
-; CHECK-NEXT: vmov.32 d9[0], r10
+; CHECK-NEXT: vmov.32 d9[0], r9
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov.32 d8[0], r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vmov.32 d11[0], r0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mvn r3, #0
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: subs r2, r5, r7
+; CHECK-NEXT: sbcs r2, r4, #0
; CHECK-NEXT: vmov.32 d10[0], r0
-; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: movwlo r2, #1
+; CHECK-NEXT: subs r0, r0, r7
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r1, r5, r3
-; CHECK-NEXT: sbcs r1, r4, #0
+; CHECK-NEXT: subs r1, r6, r7
+; CHECK-NEXT: sbcs r1, r10, #0
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movwlo r1, #1
+; CHECK-NEXT: subs r7, r9, r7
+; CHECK-NEXT: sbcs r7, r8, #0
+; CHECK-NEXT: movwlo r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvnne r3, #0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: subs r7, r10, r3
-; CHECK-NEXT: sbcs r7, r8, #0
-; CHECK-NEXT: vdup.32 d19, r1
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: vdup.32 d18, r0
-; CHECK-NEXT: movwlo r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: mvnne r7, #0
-; CHECK-NEXT: subs r3, r6, r3
-; CHECK-NEXT: sbcs r3, r9, #0
-; CHECK-NEXT: vdup.32 d17, r7
-; CHECK-NEXT: movwlo r2, #1
; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vdup.32 d19, r1
; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vdup.32 d17, r3
+; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: vand q10, q5, q9
; CHECK-NEXT: vdup.32 d16, r2
; CHECK-NEXT: vand q11, q4, q8
@@ -397,96 +400,97 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vorr q4, q0, q0
-; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: vmov r5, s17
-; CHECK-NEXT: vmov r8, s16
-; CHECK-NEXT: vmov.32 d9[0], r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mvn r9, #0
-; CHECK-NEXT: subs r2, r6, r9
-; CHECK-NEXT: sbcs r2, r7, #0
-; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov.32 d16[0], r2
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: subs r2, r2, r4
+; CHECK-NEXT: vmov r8, s19
+; CHECK-NEXT: sbcs r2, r1, #0
+; CHECK-NEXT: vmov.32 d17[0], r5
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: vmov.i64 q5, #0xffffffff
; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: subs r3, r5, r4
+; CHECK-NEXT: sbcs r3, r6, #0
+; CHECK-NEXT: vmov.32 d17[1], r6
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvnne r3, #0
; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vdup.32 d19, r3
; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r0, r0, r9
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vmov.32 d9[1], r7
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vmov.32 d8[1], r1
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d17, r2
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: vbif q4, q5, q8
+; CHECK-NEXT: vdup.32 d18, r2
+; CHECK-NEXT: vmov.32 d16[1], r1
+; CHECK-NEXT: vorr q4, q9, q9
+; CHECK-NEXT: vbsl q4, q8, q5
+; CHECK-NEXT: vmov r10, r9, d8
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov.32 d13[0], r0
+; CHECK-NEXT: vmov.32 d12[0], r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vmov r7, r10, d8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r5, r9
-; CHECK-NEXT: vmov.32 d12[0], r0
+; CHECK-NEXT: subs r2, r5, r4
+; CHECK-NEXT: vmov.32 d13[0], r0
; CHECK-NEXT: sbcs r2, r6, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: vmov.32 d13[1], r6
; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r0, r0, r9
+; CHECK-NEXT: subs r0, r0, r4
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vdup.32 d17, r2
+; CHECK-NEXT: vmov.32 d13[1], r1
; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vmov.32 d12[1], r1
+; CHECK-NEXT: vmov r5, r4, d9
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vmov r2, r3, d9
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: rsbs r7, r7, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov.32 d12[1], r6
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: rsbs r0, r10, #0
+; CHECK-NEXT: vdup.32 d16, r2
+; CHECK-NEXT: rscs r0, r9, #0
; CHECK-NEXT: vbsl q8, q6, q5
-; CHECK-NEXT: rscs r7, r10, #0
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: vmov r0, r1, d17
-; CHECK-NEXT: mvnne r7, #0
-; CHECK-NEXT: vmov r6, r5, d16
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: rsbs r1, r2, #0
-; CHECK-NEXT: rscs r1, r3, #0
-; CHECK-NEXT: vmov.32 d19[0], r0
+; CHECK-NEXT: vmov r1, r2, d16
+; CHECK-NEXT: vmov r3, r6, d17
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: rscs r1, r2, #0
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movwlt r1, #1
+; CHECK-NEXT: rsbs r2, r3, #0
+; CHECK-NEXT: rscs r2, r6, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: rsbs r3, r5, #0
+; CHECK-NEXT: rscs r3, r4, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mvnne r7, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: rsbs r0, r6, #0
-; CHECK-NEXT: rscs r0, r5, #0
-; CHECK-NEXT: vmov.32 d21[0], r1
-; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov.32 d20[0], r7
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vmov.32 d18[0], r4
-; CHECK-NEXT: vand q10, q4, q10
-; CHECK-NEXT: vand q8, q8, q9
-; CHECK-NEXT: vmovn.i64 d1, q10
-; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vmov.32 d21[0], r2
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov.32 d20[0], r1
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d19[0], r7
+; CHECK-NEXT: vand q8, q8, q10
+; CHECK-NEXT: vmov.32 d18[0], r0
+; CHECK-NEXT: vmovn.i64 d1, q8
+; CHECK-NEXT: vand q9, q4, q9
+; CHECK-NEXT: vmovn.i64 d0, q9
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
@@ -506,108 +510,112 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: .pad #4
; CHECK-NEON-NEXT: sub sp, sp, #4
-; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s3
-; CHECK-NEON-NEXT: vmov.f32 s18, s1
-; CHECK-NEON-NEXT: vmov.f32 s20, s0
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r6, r0
-; CHECK-NEON-NEXT: vmov r0, s16
-; CHECK-NEON-NEXT: mov r5, r1
+; CHECK-NEON-NEXT: mov r9, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: vmov r10, s16
+; CHECK-NEON-NEXT: mov r8, r1
+; CHECK-NEON-NEXT: vmov r6, s20
+; CHECK-NEON-NEXT: vmov.32 d8[0], r9
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov r2, s18
-; CHECK-NEON-NEXT: adr r3, .LCPI6_0
-; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r3:128]
-; CHECK-NEON-NEXT: mvn r9, #-2147483648
-; CHECK-NEON-NEXT: subs r3, r6, r9
-; CHECK-NEON-NEXT: mov r4, #0
-; CHECK-NEON-NEXT: sbcs r3, r5, #0
-; CHECK-NEON-NEXT: vmov.32 d15[0], r0
-; CHECK-NEON-NEXT: movwlt r4, #1
-; CHECK-NEON-NEXT: cmp r4, #0
-; CHECK-NEON-NEXT: mvnne r4, #0
-; CHECK-NEON-NEXT: subs r0, r0, r9
-; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: vmov.32 d14[0], r6
-; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: vmov r8, s20
-; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: vmov.32 d15[1], r1
-; CHECK-NEON-NEXT: mov r7, #0
-; CHECK-NEON-NEXT: vdup.32 d11, r0
-; CHECK-NEON-NEXT: vmov.32 d14[1], r5
-; CHECK-NEON-NEXT: mov r0, r2
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: mov r0, r6
+; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: vdup.32 d10, r4
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov.32 d13[0], r0
-; CHECK-NEON-NEXT: subs r0, r0, r9
-; CHECK-NEON-NEXT: vbsl q5, q7, q4
-; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: mov r0, r8
-; CHECK-NEON-NEXT: movwlt r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: vmov r11, r10, d10
-; CHECK-NEON-NEXT: vmov.32 d13[1], r1
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: vmov r5, r4, d11
+; CHECK-NEON-NEXT: mov r11, r0
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: mov r0, r10
+; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov.32 d12[0], r0
-; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: mvn r6, #-2147483648
+; CHECK-NEON-NEXT: subs r3, r9, r6
+; CHECK-NEON-NEXT: sbcs r3, r8, #0
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r3, #0
+; CHECK-NEON-NEXT: adr r2, .LCPI6_0
+; CHECK-NEON-NEXT: movwlt r3, #1
+; CHECK-NEON-NEXT: subs r5, r5, r6
+; CHECK-NEON-NEXT: sbcs r5, r4, #0
+; CHECK-NEON-NEXT: vmov.32 d11[1], r1
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: mvnne r5, #0
+; CHECK-NEON-NEXT: subs r0, r0, r6
; CHECK-NEON-NEXT: sbcs r0, r1, #0
+; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: vdup.32 d17, r6
+; CHECK-NEON-NEXT: mov r2, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: vmov.32 d12[1], r1
-; CHECK-NEON-NEXT: rsbs r3, r11, #-2147483648
-; CHECK-NEON-NEXT: vdup.32 d16, r0
-; CHECK-NEON-NEXT: mvn r0, #0
-; CHECK-NEON-NEXT: vbsl q8, q6, q4
-; CHECK-NEON-NEXT: adr r1, .LCPI6_1
-; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r1:128]
-; CHECK-NEON-NEXT: sbcs r3, r0, r10
-; CHECK-NEON-NEXT: mov r3, #0
-; CHECK-NEON-NEXT: vmov r1, r2, d17
-; CHECK-NEON-NEXT: movwlt r3, #1
+; CHECK-NEON-NEXT: vmov.32 d10[1], r4
+; CHECK-NEON-NEXT: vdup.32 d17, r0
+; CHECK-NEON-NEXT: subs r0, r11, r6
+; CHECK-NEON-NEXT: sbcs r0, r7, #0
+; CHECK-NEON-NEXT: vdup.32 d16, r5
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: vbsl q8, q5, q9
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: vmov.32 d9[1], r7
+; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mvn r6, #0
+; CHECK-NEON-NEXT: vdup.32 d21, r0
; CHECK-NEON-NEXT: mvnne r3, #0
-; CHECK-NEON-NEXT: rsbs r6, r5, #-2147483648
-; CHECK-NEON-NEXT: sbcs r6, r0, r4
-; CHECK-NEON-NEXT: vmov r5, r4, d16
-; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: movwlt r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: vdup.32 d23, r6
-; CHECK-NEON-NEXT: vdup.32 d22, r3
-; CHECK-NEON-NEXT: vbsl q11, q5, q9
-; CHECK-NEON-NEXT: vmovn.i64 d1, q11
-; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r0, r2
-; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: rsbs r2, r5, #-2147483648
-; CHECK-NEON-NEXT: sbcs r0, r0, r4
-; CHECK-NEON-NEXT: vdup.32 d21, r1
+; CHECK-NEON-NEXT: vmov.32 d8[1], r8
+; CHECK-NEON-NEXT: vmov r0, r1, d16
+; CHECK-NEON-NEXT: vdup.32 d20, r3
+; CHECK-NEON-NEXT: vbit q9, q4, q10
+; CHECK-NEON-NEXT: adr r5, .LCPI6_1
+; CHECK-NEON-NEXT: vld1.64 {d20, d21}, [r5:128]
+; CHECK-NEON-NEXT: vmov r5, r4, d17
+; CHECK-NEON-NEXT: vmov r3, r7, d18
+; CHECK-NEON-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r6, r1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEON-NEXT: vmov r1, r3, d19
+; CHECK-NEON-NEXT: sbcs r7, r6, r7
+; CHECK-NEON-NEXT: mov r7, #0
; CHECK-NEON-NEXT: movwlt r7, #1
+; CHECK-NEON-NEXT: rsbs r5, r5, #-2147483648
+; CHECK-NEON-NEXT: sbcs r5, r6, r4
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r6, r3
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: mvnne r5, #0
; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: vdup.32 d25, r5
; CHECK-NEON-NEXT: mvnne r7, #0
-; CHECK-NEON-NEXT: vdup.32 d20, r7
-; CHECK-NEON-NEXT: vbif q8, q9, q10
-; CHECK-NEON-NEXT: vmovn.i64 d0, q8
-; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vdup.32 d23, r2
+; CHECK-NEON-NEXT: vdup.32 d24, r0
+; CHECK-NEON-NEXT: vbif q8, q10, q12
+; CHECK-NEON-NEXT: vdup.32 d22, r7
+; CHECK-NEON-NEXT: vbif q9, q10, q11
+; CHECK-NEON-NEXT: vmovn.i64 d1, q8
+; CHECK-NEON-NEXT: vmovn.i64 d0, q9
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: add sp, sp, #4
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEON-NEXT: .p2align 4
@@ -627,104 +635,111 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
+; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[0]
; CHECK-FP16-NEXT: vorr d8, d0, d0
-; CHECK-FP16-NEXT: vmov.u16 r8, d0[0]
-; CHECK-FP16-NEXT: vmov.u16 r9, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[1]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
-; CHECK-FP16-NEXT: mov r5, r1
-; CHECK-FP16-NEXT: vmov.32 d9[0], r4
+; CHECK-FP16-NEXT: mov r8, r1
+; CHECK-FP16-NEXT: vmov.32 d10[0], r4
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: vmov s0, r6
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: mov r7, r1
+; CHECK-FP16-NEXT: vmov.32 d12[0], r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r9, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[3]
+; CHECK-FP16-NEXT: mov r10, r1
+; CHECK-FP16-NEXT: vmov.32 d11[0], r9
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mvn r6, #-2147483648
+; CHECK-FP16-NEXT: subs r3, r4, r6
+; CHECK-FP16-NEXT: sbcs r3, r8, #0
+; CHECK-FP16-NEXT: vmov.32 d13[0], r0
+; CHECK-FP16-NEXT: mov r3, #0
; CHECK-FP16-NEXT: adr r2, .LCPI6_0
-; CHECK-FP16-NEXT: mvn r10, #-2147483648
-; CHECK-FP16-NEXT: vld1.64 {d10, d11}, [r2:128]
-; CHECK-FP16-NEXT: subs r2, r4, r10
-; CHECK-FP16-NEXT: sbcs r2, r5, #0
-; CHECK-FP16-NEXT: vmov s0, r9
-; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: vmov.32 d8[0], r0
-; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: subs r0, r0, r10
+; CHECK-FP16-NEXT: movwlt r3, #1
+; CHECK-FP16-NEXT: subs r5, r5, r6
+; CHECK-FP16-NEXT: sbcs r5, r7, #0
+; CHECK-FP16-NEXT: vmov.32 d13[1], r1
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: mvnne r5, #0
+; CHECK-FP16-NEXT: subs r0, r0, r6
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: vmov.32 d9[1], r5
+; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: mov r2, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: vmov.32 d8[1], r1
; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r2
-; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: vbif q4, q5, q8
-; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov.32 d13[0], r0
-; CHECK-FP16-NEXT: subs r0, r0, r10
-; CHECK-FP16-NEXT: vmov s0, r8
-; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: mov r7, #0
-; CHECK-FP16-NEXT: vmov r9, r8, d8
-; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: vmov.32 d13[1], r1
-; CHECK-FP16-NEXT: vmov r5, r4, d9
-; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov.32 d12[0], r0
-; CHECK-FP16-NEXT: subs r0, r0, r10
-; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: vmov.32 d12[1], r7
+; CHECK-FP16-NEXT: vdup.32 d17, r0
+; CHECK-FP16-NEXT: subs r0, r9, r6
+; CHECK-FP16-NEXT: sbcs r0, r10, #0
+; CHECK-FP16-NEXT: vdup.32 d16, r5
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r7
+; CHECK-FP16-NEXT: vbsl q8, q6, q9
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: vmov.32 d11[1], r10
; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: vmov.32 d12[1], r1
-; CHECK-FP16-NEXT: rsbs r3, r9, #-2147483648
-; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: mvn r0, #0
-; CHECK-FP16-NEXT: vbsl q8, q6, q5
-; CHECK-FP16-NEXT: adr r1, .LCPI6_1
-; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r1:128]
-; CHECK-FP16-NEXT: sbcs r3, r0, r8
-; CHECK-FP16-NEXT: mov r3, #0
-; CHECK-FP16-NEXT: vmov r1, r2, d17
-; CHECK-FP16-NEXT: movwlt r3, #1
; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mvn r6, #0
+; CHECK-FP16-NEXT: vdup.32 d21, r0
; CHECK-FP16-NEXT: mvnne r3, #0
-; CHECK-FP16-NEXT: rsbs r7, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r7, r0, r4
-; CHECK-FP16-NEXT: vmov r5, r4, d16
+; CHECK-FP16-NEXT: vmov.32 d10[1], r8
+; CHECK-FP16-NEXT: vmov r0, r1, d16
+; CHECK-FP16-NEXT: vdup.32 d20, r3
+; CHECK-FP16-NEXT: vbit q9, q5, q10
+; CHECK-FP16-NEXT: adr r5, .LCPI6_1
+; CHECK-FP16-NEXT: vld1.64 {d20, d21}, [r5:128]
+; CHECK-FP16-NEXT: vmov r5, r4, d17
+; CHECK-FP16-NEXT: vmov r3, r7, d18
+; CHECK-FP16-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r6, r1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-FP16-NEXT: vmov r1, r3, d19
+; CHECK-FP16-NEXT: sbcs r7, r6, r7
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: movwlt r7, #1
+; CHECK-FP16-NEXT: rsbs r5, r5, #-2147483648
+; CHECK-FP16-NEXT: sbcs r5, r6, r4
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r6, r3
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: mvnne r5, #0
; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: vdup.32 d25, r5
; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: vdup.32 d23, r7
-; CHECK-FP16-NEXT: vdup.32 d22, r3
-; CHECK-FP16-NEXT: vbsl q11, q4, q9
-; CHECK-FP16-NEXT: vmovn.i64 d1, q11
-; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r0, r2
-; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: mvnne r1, #0
-; CHECK-FP16-NEXT: rsbs r2, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r0, r0, r4
-; CHECK-FP16-NEXT: vdup.32 d21, r1
-; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: mvnne r6, #0
-; CHECK-FP16-NEXT: vdup.32 d20, r6
-; CHECK-FP16-NEXT: vbif q8, q9, q10
-; CHECK-FP16-NEXT: vmovn.i64 d0, q8
-; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-FP16-NEXT: vdup.32 d23, r2
+; CHECK-FP16-NEXT: vdup.32 d24, r0
+; CHECK-FP16-NEXT: vbif q8, q10, q12
+; CHECK-FP16-NEXT: vdup.32 d22, r7
+; CHECK-FP16-NEXT: vbif q9, q10, q11
+; CHECK-FP16-NEXT: vmovn.i64 d1, q8
+; CHECK-FP16-NEXT: vmovn.i64 d0, q9
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-FP16-NEXT: .p2align 4
; CHECK-FP16-NEXT: @ %bb.1:
@@ -751,8 +766,8 @@ entry:
define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: .vsave {d12, d13}
; CHECK-NEON-NEXT: vpush {d12, d13}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
@@ -763,7 +778,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
-; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: mov r10, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: mov r8, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
@@ -779,37 +794,37 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: vmov.32 d9[0], r4
+; CHECK-NEON-NEXT: vmov.32 d9[0], r10
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
-; CHECK-NEON-NEXT: mvn r3, #0
+; CHECK-NEON-NEXT: mvn r4, #0
+; CHECK-NEON-NEXT: subs r2, r5, r4
+; CHECK-NEON-NEXT: sbcs r2, r7, #0
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
-; CHECK-NEON-NEXT: subs r0, r0, r3
; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: mov r3, #0
+; CHECK-NEON-NEXT: movwlo r2, #1
+; CHECK-NEON-NEXT: subs r0, r0, r4
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlo r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: subs r1, r4, r3
+; CHECK-NEON-NEXT: subs r1, r10, r4
; CHECK-NEON-NEXT: sbcs r1, r8, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlo r1, #1
+; CHECK-NEON-NEXT: subs r7, r6, r4
+; CHECK-NEON-NEXT: sbcs r7, r9, #0
+; CHECK-NEON-NEXT: movwlo r3, #1
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mvnne r3, #0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: subs r6, r6, r3
-; CHECK-NEON-NEXT: sbcs r6, r9, #0
-; CHECK-NEON-NEXT: vdup.32 d19, r1
-; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: vdup.32 d18, r0
-; CHECK-NEON-NEXT: movwlo r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: subs r3, r5, r3
-; CHECK-NEON-NEXT: sbcs r3, r7, #0
-; CHECK-NEON-NEXT: vdup.32 d17, r6
-; CHECK-NEON-NEXT: movwlo r2, #1
; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: vdup.32 d19, r1
; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: vdup.32 d17, r3
+; CHECK-NEON-NEXT: vdup.32 d18, r0
; CHECK-NEON-NEXT: vand q10, q4, q9
; CHECK-NEON-NEXT: vdup.32 d16, r2
; CHECK-NEON-NEXT: vand q11, q6, q8
@@ -819,12 +834,12 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: vmovn.i64 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: vpop {d12, d13}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: utesth_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
@@ -832,52 +847,52 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-FP16-NEXT: vmov.u16 r5, d0[3]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
-; CHECK-FP16-NEXT: mov r4, r0
+; CHECK-FP16-NEXT: mov r10, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
; CHECK-FP16-NEXT: mov r8, r1
-; CHECK-FP16-NEXT: vmov.32 d11[0], r4
+; CHECK-FP16-NEXT: vmov.32 d11[0], r10
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: vmov s0, r5
; CHECK-FP16-NEXT: mov r6, r0
-; CHECK-FP16-NEXT: mov r9, r1
+; CHECK-FP16-NEXT: mov r7, r1
; CHECK-FP16-NEXT: vmov.32 d10[0], r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
-; CHECK-FP16-NEXT: mov r7, r1
+; CHECK-FP16-NEXT: mov r9, r1
; CHECK-FP16-NEXT: vmov.32 d9[0], r5
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
-; CHECK-FP16-NEXT: mvn r3, #0
+; CHECK-FP16-NEXT: mvn r4, #0
+; CHECK-FP16-NEXT: subs r2, r6, r4
+; CHECK-FP16-NEXT: sbcs r2, r7, #0
; CHECK-FP16-NEXT: vmov.32 d8[0], r0
-; CHECK-FP16-NEXT: subs r0, r0, r3
; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: mov r3, #0
+; CHECK-FP16-NEXT: movwlo r2, #1
+; CHECK-FP16-NEXT: subs r0, r0, r4
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlo r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: subs r1, r5, r3
-; CHECK-FP16-NEXT: sbcs r1, r7, #0
+; CHECK-FP16-NEXT: subs r1, r5, r4
+; CHECK-FP16-NEXT: sbcs r1, r9, #0
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlo r1, #1
+; CHECK-FP16-NEXT: subs r7, r10, r4
+; CHECK-FP16-NEXT: sbcs r7, r8, #0
+; CHECK-FP16-NEXT: movwlo r3, #1
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mvnne r3, #0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: mvnne r1, #0
-; CHECK-FP16-NEXT: subs r7, r4, r3
-; CHECK-FP16-NEXT: sbcs r7, r8, #0
-; CHECK-FP16-NEXT: vdup.32 d19, r1
-; CHECK-FP16-NEXT: mov r7, #0
-; CHECK-FP16-NEXT: vdup.32 d18, r0
-; CHECK-FP16-NEXT: movwlo r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: subs r3, r6, r3
-; CHECK-FP16-NEXT: sbcs r3, r9, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r7
-; CHECK-FP16-NEXT: movwlo r2, #1
; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: vdup.32 d19, r1
; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: vdup.32 d17, r3
+; CHECK-FP16-NEXT: vdup.32 d18, r0
; CHECK-FP16-NEXT: vand q10, q4, q9
; CHECK-FP16-NEXT: vdup.32 d16, r2
; CHECK-FP16-NEXT: vand q11, q5, q8
@@ -886,7 +901,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-FP16-NEXT: vmovn.i64 d1, q9
; CHECK-FP16-NEXT: vmovn.i64 d0, q8
; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -902,207 +917,211 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEON-NEXT: vmov r0, s3
-; CHECK-NEON-NEXT: vmov.f32 s16, s2
-; CHECK-NEON-NEXT: vmov.f32 s18, s1
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: vmov r0, s16
-; CHECK-NEON-NEXT: mov r6, r1
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: mov r7, r1
+; CHECK-NEON-NEXT: vmov r5, s18
+; CHECK-NEON-NEXT: vmov r8, s16
+; CHECK-NEON-NEXT: vmov.32 d9[0], r6
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: vmov r8, s20
-; CHECK-NEON-NEXT: vmov.32 d13[0], r5
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov r2, s18
-; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: vmov.32 d13[1], r6
+; CHECK-NEON-NEXT: vmov.32 d9[1], r7
; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: mov r7, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: vmov.32 d12[1], r1
+; CHECK-NEON-NEXT: vmov.32 d8[1], r1
; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: subs r1, r5, r9
-; CHECK-NEON-NEXT: sbcs r1, r6, #0
+; CHECK-NEON-NEXT: subs r1, r6, r9
+; CHECK-NEON-NEXT: sbcs r1, r7, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlt r1, #1
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: vdup.32 d9, r1
-; CHECK-NEON-NEXT: vdup.32 d8, r0
-; CHECK-NEON-NEXT: mov r0, r2
+; CHECK-NEON-NEXT: vdup.32 d13, r1
+; CHECK-NEON-NEXT: vdup.32 d12, r0
+; CHECK-NEON-NEXT: mov r0, r5
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEON-NEXT: vbsl q4, q6, q5
+; CHECK-NEON-NEXT: vbif q4, q5, q6
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r8
; CHECK-NEON-NEXT: mov r6, r1
-; CHECK-NEON-NEXT: vmov r4, r10, d8
+; CHECK-NEON-NEXT: vmov r7, r10, d8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: subs r2, r5, r9
-; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: sbcs r2, r6, #0
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: vmov.32 d13[1], r6
; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: mvnne r2, #0
; CHECK-NEON-NEXT: subs r0, r0, r9
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: vdup.32 d17, r2
+; CHECK-NEON-NEXT: vmov.32 d13[1], r1
; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: vmov.32 d12[1], r1
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: vmov r2, r3, d9
-; CHECK-NEON-NEXT: vdup.32 d16, r0
-; CHECK-NEON-NEXT: rsbs r6, r4, #0
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: vmov.32 d12[1], r6
+; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: vdup.32 d17, r0
+; CHECK-NEON-NEXT: rsbs r0, r7, #0
+; CHECK-NEON-NEXT: vdup.32 d16, r2
+; CHECK-NEON-NEXT: vmov r7, r5, d9
; CHECK-NEON-NEXT: vbsl q8, q6, q5
-; CHECK-NEON-NEXT: rscs r6, r10, #0
-; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: movwlt r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: vmov r0, r1, d17
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: vmov r5, r4, d16
-; CHECK-NEON-NEXT: rsbs r0, r0, #0
-; CHECK-NEON-NEXT: rscs r0, r1, #0
+; CHECK-NEON-NEXT: rscs r0, r10, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: rsbs r1, r2, #0
-; CHECK-NEON-NEXT: rscs r1, r3, #0
-; CHECK-NEON-NEXT: vmov.32 d19[0], r0
+; CHECK-NEON-NEXT: vmov r1, r2, d16
+; CHECK-NEON-NEXT: vmov r3, r6, d17
+; CHECK-NEON-NEXT: rsbs r1, r1, #0
+; CHECK-NEON-NEXT: rscs r1, r2, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlt r1, #1
+; CHECK-NEON-NEXT: rsbs r2, r3, #0
+; CHECK-NEON-NEXT: rscs r2, r6, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: rsbs r3, r7, #0
+; CHECK-NEON-NEXT: rscs r3, r5, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: mvnne r4, #0
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvnne r2, #0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: rsbs r0, r5, #0
-; CHECK-NEON-NEXT: rscs r0, r4, #0
-; CHECK-NEON-NEXT: vmov.32 d21[0], r1
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: vmov.32 d20[0], r6
-; CHECK-NEON-NEXT: mvnne r7, #0
-; CHECK-NEON-NEXT: vmov.32 d18[0], r7
-; CHECK-NEON-NEXT: vand q10, q4, q10
-; CHECK-NEON-NEXT: vand q8, q8, q9
-; CHECK-NEON-NEXT: vmovn.i64 d1, q10
-; CHECK-NEON-NEXT: vmovn.i64 d0, q8
+; CHECK-NEON-NEXT: vmov.32 d21[0], r2
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: vmov.32 d20[0], r1
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: vmov.32 d19[0], r4
+; CHECK-NEON-NEXT: vand q8, q8, q10
+; CHECK-NEON-NEXT: vmov.32 d18[0], r0
+; CHECK-NEON-NEXT: vmovn.i64 d1, q8
+; CHECK-NEON-NEXT: vand q9, q4, q9
+; CHECK-NEON-NEXT: vmovn.i64 d0, q9
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: ustest_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vorr d8, d0, d0
-; CHECK-FP16-NEXT: vmov.u16 r8, d0[0]
-; CHECK-FP16-NEXT: vmov.u16 r9, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r8, d0[2]
+; CHECK-FP16-NEXT: vmov.u16 r9, d0[3]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r4, r0
-; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
; CHECK-FP16-NEXT: mov r5, r1
-; CHECK-FP16-NEXT: vmov.32 d9[0], r4
+; CHECK-FP16-NEXT: vmov.32 d11[0], r4
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: mvn r10, #0
-; CHECK-FP16-NEXT: subs r2, r4, r10
-; CHECK-FP16-NEXT: sbcs r2, r5, #0
-; CHECK-FP16-NEXT: vmov.32 d8[0], r0
-; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: vmov s0, r9
-; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: subs r0, r0, r10
+; CHECK-FP16-NEXT: vmov.32 d10[0], r0
+; CHECK-FP16-NEXT: mvn r7, #0
+; CHECK-FP16-NEXT: subs r0, r0, r7
+; CHECK-FP16-NEXT: vmov.i64 q6, #0xffffffff
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: vmov.32 d9[1], r5
+; CHECK-FP16-NEXT: vmov.32 d11[1], r5
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: vmov.i64 q5, #0xffffffff
+; CHECK-FP16-NEXT: vmov s0, r8
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: vmov.32 d8[1], r1
+; CHECK-FP16-NEXT: vmov.32 d10[1], r1
; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: subs r1, r4, r7
; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r2
+; CHECK-FP16-NEXT: sbcs r1, r5, #0
+; CHECK-FP16-NEXT: vmov s16, r9
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: movwlt r1, #1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mvnne r1, #0
+; CHECK-FP16-NEXT: vdup.32 d17, r1
; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: vbif q4, q5, q8
+; CHECK-FP16-NEXT: vbif q5, q6, q8
+; CHECK-FP16-NEXT: vmov r9, r8, d10
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov s0, r8
+; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: mov r5, r1
-; CHECK-FP16-NEXT: vmov.32 d13[0], r0
-; CHECK-FP16-NEXT: vmov r7, r8, d8
+; CHECK-FP16-NEXT: vmov.32 d14[0], r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r4, r10
-; CHECK-FP16-NEXT: vmov.32 d12[0], r0
+; CHECK-FP16-NEXT: subs r2, r4, r7
+; CHECK-FP16-NEXT: vmov.32 d15[0], r0
; CHECK-FP16-NEXT: sbcs r2, r5, #0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: vmov.32 d13[1], r5
; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: subs r0, r0, r10
+; CHECK-FP16-NEXT: subs r0, r0, r7
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r2
+; CHECK-FP16-NEXT: vmov.32 d15[1], r1
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: vmov.32 d12[1], r1
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: vmov r2, r3, d9
-; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: rsbs r7, r7, #0
-; CHECK-FP16-NEXT: vbsl q8, q6, q5
-; CHECK-FP16-NEXT: rscs r7, r8, #0
-; CHECK-FP16-NEXT: mov r7, #0
-; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: vmov r0, r1, d17
-; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: vmov r5, r4, d16
-; CHECK-FP16-NEXT: rsbs r0, r0, #0
-; CHECK-FP16-NEXT: rscs r0, r1, #0
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: vmov.32 d14[1], r5
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: vmov r5, r4, d11
+; CHECK-FP16-NEXT: vdup.32 d17, r0
+; CHECK-FP16-NEXT: rsbs r0, r9, #0
+; CHECK-FP16-NEXT: vdup.32 d16, r2
+; CHECK-FP16-NEXT: rscs r0, r8, #0
+; CHECK-FP16-NEXT: vbsl q8, q7, q6
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: rsbs r1, r2, #0
-; CHECK-FP16-NEXT: rscs r1, r3, #0
-; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r1, r2, d16
+; CHECK-FP16-NEXT: vmov r3, r7, d17
+; CHECK-FP16-NEXT: rsbs r1, r1, #0
+; CHECK-FP16-NEXT: rscs r1, r2, #0
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: mvnne r1, #0
-; CHECK-FP16-NEXT: rsbs r0, r5, #0
-; CHECK-FP16-NEXT: rscs r0, r4, #0
-; CHECK-FP16-NEXT: vmov.32 d21[0], r1
+; CHECK-FP16-NEXT: rsbs r2, r3, #0
+; CHECK-FP16-NEXT: rscs r2, r7, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: rsbs r3, r5, #0
+; CHECK-FP16-NEXT: rscs r3, r4, #0
; CHECK-FP16-NEXT: movwlt r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: vmov.32 d20[0], r7
; CHECK-FP16-NEXT: mvnne r6, #0
-; CHECK-FP16-NEXT: vmov.32 d18[0], r6
-; CHECK-FP16-NEXT: vand q10, q4, q10
-; CHECK-FP16-NEXT: vand q8, q8, q9
-; CHECK-FP16-NEXT: vmovn.i64 d1, q10
-; CHECK-FP16-NEXT: vmovn.i64 d0, q8
-; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mvnne r1, #0
+; CHECK-FP16-NEXT: vmov.32 d21[0], r2
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: vmov.32 d20[0], r1
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: vmov.32 d19[0], r6
+; CHECK-FP16-NEXT: vand q8, q8, q10
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmovn.i64 d1, q8
+; CHECK-FP16-NEXT: vand q9, q5, q9
+; CHECK-FP16-NEXT: vmovn.i64 d0, q9
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -1624,56 +1643,59 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mvn r9, #0
-; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: subs r0, r0, r8
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r1, r6
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: sbcs r0, r2, #0
; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: sbcs r1, r2, #0
+; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r2, r0, #0
-; CHECK-NEXT: rscs r2, r4, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r1
-; CHECK-NEXT: sbcs r1, r9, r3
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: movne r0, r2
+; CHECK-NEXT: moveq r10, r6
+; CHECK-NEXT: moveq r5, r8
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r10, #-2147483648
+; CHECK-NEXT: sbcs r0, r8, r0
+; CHECK-NEXT: sbcs r0, r8, r3
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs r6, r0, r9
-; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
-; CHECK-NEXT: sbcs r6, r2, #0
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: subs r4, r0, r8
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r4, r1, r6
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r4, r2
+; CHECK-NEXT: moveq r0, r8
; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: rscs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r1, r8, r4
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r9, #1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: moveq r10, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r10
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
@@ -1689,36 +1711,39 @@ entry:
define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-LABEL: utest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r2, #1
; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: sbcs r0, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r4
; CHECK-NEXT: vmov.32 d0[0], r0
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1730,26 +1755,26 @@ entry:
define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: subs r1, r2, #1
; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #1
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movge r2, r8
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: moveq r4, r1
-; CHECK-NEXT: movne r1, r0
-; CHECK-NEXT: rsbs r0, r1, #0
-; CHECK-NEXT: rscs r0, r4, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r9, #1
+; CHECK-NEXT: moveq r3, r6
+; CHECK-NEXT: moveq r8, r6
+; CHECK-NEXT: moveq r2, r9
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: rsbs r0, r6, #0
+; CHECK-NEXT: rscs r0, r8, #0
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: mov r7, #0
@@ -1757,32 +1782,34 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: moveq r4, r7
-; CHECK-NEXT: movne r7, r1
+; CHECK-NEXT: moveq r6, r7
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs r6, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: movlt r8, r2
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: moveq r3, r2
-; CHECK-NEXT: moveq r1, r2
-; CHECK-NEXT: movne r2, r0
-; CHECK-NEXT: rsbs r0, r2, #0
+; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r9, r2
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: moveq r1, r4
+; CHECK-NEXT: movne r4, r0
+; CHECK-NEXT: rsbs r0, r4, #0
; CHECK-NEXT: rscs r0, r1, #0
-; CHECK-NEXT: rscs r0, r8, #0
+; CHECK-NEXT: rscs r0, r9, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r2, r5
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r8
+; CHECK-NEXT: vmov.32 d0[0], r4
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: vmov.32 d0[0], r2
-; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1803,56 +1830,59 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mvn r9, #0
-; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: subs r0, r0, r8
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r1, r6
; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: sbcs r1, r2, #0
+; CHECK-NEXT: sbcs r0, r2, #0
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r2, r0, #0
-; CHECK-NEXT: rscs r2, r4, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r1
-; CHECK-NEXT: sbcs r1, r9, r3
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: movne r0, r2
+; CHECK-NEXT: moveq r10, r6
+; CHECK-NEXT: moveq r5, r8
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r10, #-2147483648
+; CHECK-NEXT: sbcs r0, r8, r0
+; CHECK-NEXT: sbcs r0, r8, r3
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r6, r0, r9
-; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
-; CHECK-NEXT: sbcs r6, r2, #0
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: subs r4, r0, r8
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r4, r1, r6
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r4, r2
+; CHECK-NEXT: moveq r0, r8
; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: rscs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r1, r8, r4
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r9, #1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: moveq r10, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r10
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
@@ -1868,36 +1898,39 @@ entry:
define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-LABEL: utest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r2, #1
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r4
; CHECK-NEXT: vmov.32 d0[0], r0
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1909,59 +1942,61 @@ entry:
define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: subs r1, r2, #1
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #1
-; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: movge r2, r8
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mov r9, #1
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r3, r6
+; CHECK-NEXT: moveq r8, r6
+; CHECK-NEXT: moveq r2, r9
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: rsbs r0, r6, #0
; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: moveq r4, r1
-; CHECK-NEXT: movne r1, r0
-; CHECK-NEXT: rsbs r0, r1, #0
-; CHECK-NEXT: rscs r0, r4, #0
+; CHECK-NEXT: rscs r0, r8, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: moveq r4, r7
-; CHECK-NEXT: movne r7, r1
+; CHECK-NEXT: moveq r6, r7
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r6, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: movlt r8, r2
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: moveq r3, r2
-; CHECK-NEXT: moveq r1, r2
-; CHECK-NEXT: movne r2, r0
-; CHECK-NEXT: rsbs r0, r2, #0
+; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r9, r2
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: moveq r1, r4
+; CHECK-NEXT: movne r4, r0
+; CHECK-NEXT: rsbs r0, r4, #0
; CHECK-NEXT: rscs r0, r1, #0
-; CHECK-NEXT: rscs r0, r8, #0
+; CHECK-NEXT: rscs r0, r9, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r2, r5
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r8
+; CHECK-NEXT: vmov.32 d0[0], r4
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: vmov.32 d0[0], r2
-; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1975,11 +2010,13 @@ entry:
define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT: .vsave {d8}
-; CHECK-NEON-NEXT: vpush {d8}
-; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r8, r0
@@ -1987,58 +2024,62 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: mvn r9, #0
-; CHECK-NEON-NEXT: subs r1, r0, r9
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r4, r6
+; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r1, r7
+; CHECK-NEON-NEXT: mov r11, r1
+; CHECK-NEON-NEXT: sbcs r0, r2, #0
; CHECK-NEON-NEXT: vmov s0, r8
-; CHECK-NEON-NEXT: sbcs r1, r2, #0
-; CHECK-NEON-NEXT: mov r5, #0
-; CHECK-NEON-NEXT: sbcs r1, r3, #0
-; CHECK-NEON-NEXT: mov r8, #-2147483648
-; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: sbcs r0, r3, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: mov r10, #0
-; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: moveq r3, r1
-; CHECK-NEON-NEXT: movne r1, r2
-; CHECK-NEON-NEXT: moveq r4, r6
-; CHECK-NEON-NEXT: moveq r0, r9
-; CHECK-NEON-NEXT: rsbs r2, r0, #0
-; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r9, r1
-; CHECK-NEON-NEXT: sbcs r1, r9, r3
-; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: movne r5, r0
-; CHECK-NEON-NEXT: moveq r4, r8
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: moveq r3, r0
+; CHECK-NEON-NEXT: movne r0, r2
+; CHECK-NEON-NEXT: moveq r11, r7
+; CHECK-NEON-NEXT: moveq r5, r9
+; CHECK-NEON-NEXT: rsbs r1, r5, #0
+; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r9, r0
+; CHECK-NEON-NEXT: sbcs r0, r9, r3
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r5, r6
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: subs r7, r0, r9
+; CHECK-NEON-NEXT: subs r4, r0, r9
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
-; CHECK-NEON-NEXT: sbcs r7, r1, r6
-; CHECK-NEON-NEXT: sbcs r7, r2, #0
-; CHECK-NEON-NEXT: sbcs r7, r3, #0
-; CHECK-NEON-NEXT: mov r7, #0
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r3, r7
-; CHECK-NEON-NEXT: movne r7, r2
-; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: sbcs r4, r1, r7
+; CHECK-NEON-NEXT: sbcs r4, r2, #0
+; CHECK-NEON-NEXT: sbcs r4, r3, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: moveq r3, r4
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: movne r4, r2
; CHECK-NEON-NEXT: moveq r0, r9
; CHECK-NEON-NEXT: rsbs r1, r0, #0
-; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r9, r7
+; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r9, r4
; CHECK-NEON-NEXT: sbcs r1, r9, r3
; CHECK-NEON-NEXT: movwlt r10, #1
; CHECK-NEON-NEXT: cmp r10, #0
-; CHECK-NEON-NEXT: movne r10, r0
-; CHECK-NEON-NEXT: moveq r6, r8
-; CHECK-NEON-NEXT: vmov.32 d0[0], r10
-; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: moveq r0, r10
+; CHECK-NEON-NEXT: mov r1, #-2147483648
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: moveq r11, r1
+; CHECK-NEON-NEXT: cmp r10, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r11
+; CHECK-NEON-NEXT: moveq r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
@@ -2048,56 +2089,59 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: mov r4, r1
-; CHECK-FP16-NEXT: mvn r9, #0
-; CHECK-FP16-NEXT: subs r1, r0, r9
-; CHECK-FP16-NEXT: mvn r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r4, r5
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: mvn r8, #0
+; CHECK-FP16-NEXT: subs r0, r0, r8
+; CHECK-FP16-NEXT: mvn r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r1, r6
+; CHECK-FP16-NEXT: mov r10, r1
+; CHECK-FP16-NEXT: sbcs r0, r2, #0
; CHECK-FP16-NEXT: vmov s0, r7
-; CHECK-FP16-NEXT: sbcs r1, r2, #0
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
; CHECK-FP16-NEXT: mov r7, #0
-; CHECK-FP16-NEXT: sbcs r1, r3, #0
-; CHECK-FP16-NEXT: mov r8, #-2147483648
-; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: mov r10, #0
-; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: moveq r3, r1
-; CHECK-FP16-NEXT: movne r1, r2
-; CHECK-FP16-NEXT: moveq r4, r5
-; CHECK-FP16-NEXT: moveq r0, r9
-; CHECK-FP16-NEXT: rsbs r2, r0, #0
-; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r9, r1
-; CHECK-FP16-NEXT: sbcs r1, r9, r3
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: mov r9, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: moveq r3, r0
+; CHECK-FP16-NEXT: movne r0, r2
+; CHECK-FP16-NEXT: moveq r10, r6
+; CHECK-FP16-NEXT: moveq r5, r8
+; CHECK-FP16-NEXT: rsbs r1, r5, #0
+; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r8, r0
+; CHECK-FP16-NEXT: sbcs r0, r8, r3
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: movne r7, r0
-; CHECK-FP16-NEXT: moveq r4, r8
+; CHECK-FP16-NEXT: moveq r5, r7
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: subs r6, r0, r9
-; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: sbcs r6, r1, r5
-; CHECK-FP16-NEXT: sbcs r6, r2, #0
-; CHECK-FP16-NEXT: sbcs r6, r3, #0
-; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r3, r6
-; CHECK-FP16-NEXT: movne r6, r2
-; CHECK-FP16-NEXT: movne r5, r1
-; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: subs r4, r0, r8
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: sbcs r4, r1, r6
+; CHECK-FP16-NEXT: sbcs r4, r2, #0
+; CHECK-FP16-NEXT: sbcs r4, r3, #0
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: moveq r3, r4
+; CHECK-FP16-NEXT: movne r6, r1
+; CHECK-FP16-NEXT: movne r4, r2
+; CHECK-FP16-NEXT: moveq r0, r8
; CHECK-FP16-NEXT: rsbs r1, r0, #0
-; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r9, r6
-; CHECK-FP16-NEXT: sbcs r1, r9, r3
-; CHECK-FP16-NEXT: movwlt r10, #1
-; CHECK-FP16-NEXT: cmp r10, #0
-; CHECK-FP16-NEXT: movne r10, r0
-; CHECK-FP16-NEXT: moveq r5, r8
-; CHECK-FP16-NEXT: vmov.32 d0[0], r10
-; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r8, r4
+; CHECK-FP16-NEXT: sbcs r1, r8, r3
+; CHECK-FP16-NEXT: movwlt r9, #1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: mov r1, #-2147483648
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: moveq r10, r1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r10
+; CHECK-FP16-NEXT: moveq r6, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r6
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
@@ -2112,8 +2156,8 @@ entry:
define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
@@ -2124,60 +2168,66 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixunssfti
-; CHECK-NEON-NEXT: mov r4, r1
-; CHECK-NEON-NEXT: subs r1, r2, #1
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: subs r0, r2, #1
; CHECK-NEON-NEXT: vmov s0, r5
-; CHECK-NEON-NEXT: sbcs r1, r3, #0
+; CHECK-NEON-NEXT: sbcs r0, r3, #0
; CHECK-NEON-NEXT: mov r5, #0
-; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: movwlo r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: moveq r4, r5
-; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: moveq r6, r5
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: subs r2, r2, #1
-; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r3, #0
-; CHECK-NEON-NEXT: movwlo r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: moveq r0, r6
-; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: movwlo r7, #1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r0, r7
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: movne r5, r4
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
-; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r5
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfti
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: subs r0, r2, #1
+; CHECK-FP16-NEXT: vmov s0, r7
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
+; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: mov r4, r1
-; CHECK-FP16-NEXT: subs r1, r2, #1
-; CHECK-FP16-NEXT: vmov s0, r6
-; CHECK-FP16-NEXT: sbcs r1, r3, #0
+; CHECK-FP16-NEXT: movwlo r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: mov r5, #0
-; CHECK-FP16-NEXT: movwlo r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r4, r6
-; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: moveq r5, r7
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: subs r2, r2, #1
-; CHECK-FP16-NEXT: vmov.32 d1[0], r6
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
; CHECK-FP16-NEXT: sbcs r2, r3, #0
-; CHECK-FP16-NEXT: movwlo r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: moveq r0, r5
-; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: movwlo r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: moveq r0, r6
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r4
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
-; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
-; CHECK-FP16-NEXT: pop {r4, r5, r6, pc}
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r7
+; CHECK-FP16-NEXT: movne r6, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r6
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -2189,8 +2239,8 @@ entry:
define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
@@ -2201,106 +2251,110 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: mov r8, r1
; CHECK-NEON-NEXT: subs r1, r2, #1
-; CHECK-NEON-NEXT: sbcs r1, r3, #0
-; CHECK-NEON-NEXT: mov r8, #1
-; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: movge r2, r8
-; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: moveq r3, r1
-; CHECK-NEON-NEXT: moveq r4, r1
-; CHECK-NEON-NEXT: movne r1, r0
-; CHECK-NEON-NEXT: rsbs r0, r1, #0
-; CHECK-NEON-NEXT: rscs r0, r4, #0
; CHECK-NEON-NEXT: vmov s0, r5
-; CHECK-NEON-NEXT: rscs r0, r2, #0
+; CHECK-NEON-NEXT: sbcs r1, r3, #0
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: mov r9, #1
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: moveq r3, r5
+; CHECK-NEON-NEXT: moveq r8, r5
+; CHECK-NEON-NEXT: moveq r2, r9
+; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: rsbs r0, r5, #0
; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: rscs r0, r8, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: rscs r0, r2, #0
; CHECK-NEON-NEXT: rscs r0, r3, #0
-; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: movwlt r7, #1
; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r4, r7
-; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: moveq r5, r7
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: subs r6, r2, #1
-; CHECK-NEON-NEXT: vmov.32 d1[0], r7
-; CHECK-NEON-NEXT: sbcs r6, r3, #0
-; CHECK-NEON-NEXT: movlt r8, r2
-; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: moveq r3, r2
-; CHECK-NEON-NEXT: moveq r1, r2
-; CHECK-NEON-NEXT: movne r2, r0
-; CHECK-NEON-NEXT: rsbs r0, r2, #0
+; CHECK-NEON-NEXT: subs r4, r2, #1
+; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: sbcs r4, r3, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: movne r9, r2
+; CHECK-NEON-NEXT: moveq r3, r4
+; CHECK-NEON-NEXT: moveq r1, r4
+; CHECK-NEON-NEXT: movne r4, r0
+; CHECK-NEON-NEXT: rsbs r0, r4, #0
; CHECK-NEON-NEXT: rscs r0, r1, #0
-; CHECK-NEON-NEXT: rscs r0, r8, #0
+; CHECK-NEON-NEXT: rscs r0, r9, #0
; CHECK-NEON-NEXT: rscs r0, r3, #0
-; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: moveq r2, r5
-; CHECK-NEON-NEXT: movne r5, r1
-; CHECK-NEON-NEXT: vmov.32 d0[0], r2
-; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r5
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r4, r6
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: movne r7, r8
+; CHECK-NEON-NEXT: vmov.32 d0[0], r4
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r7
+; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r6
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; CHECK-FP16-LABEL: ustest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r4, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: mov r4, r1
+; CHECK-FP16-NEXT: mov r8, r1
; CHECK-FP16-NEXT: subs r1, r2, #1
; CHECK-FP16-NEXT: sbcs r1, r3, #0
-; CHECK-FP16-NEXT: mov r8, #1
-; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: movge r2, r8
-; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: moveq r3, r1
-; CHECK-FP16-NEXT: moveq r4, r1
-; CHECK-FP16-NEXT: movne r1, r0
-; CHECK-FP16-NEXT: rsbs r0, r1, #0
-; CHECK-FP16-NEXT: rscs r0, r4, #0
-; CHECK-FP16-NEXT: vmov s0, r5
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: movwlt r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: mov r9, #1
+; CHECK-FP16-NEXT: moveq r3, r6
+; CHECK-FP16-NEXT: moveq r8, r6
+; CHECK-FP16-NEXT: moveq r2, r9
+; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: rsbs r0, r6, #0
+; CHECK-FP16-NEXT: rscs r0, r8, #0
+; CHECK-FP16-NEXT: vmov s0, r4
; CHECK-FP16-NEXT: rscs r0, r2, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: rscs r0, r3, #0
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: moveq r4, r7
-; CHECK-FP16-NEXT: movne r7, r1
+; CHECK-FP16-NEXT: moveq r6, r7
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: subs r6, r2, #1
-; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: sbcs r6, r3, #0
-; CHECK-FP16-NEXT: movlt r8, r2
-; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: moveq r3, r2
-; CHECK-FP16-NEXT: moveq r1, r2
-; CHECK-FP16-NEXT: movne r2, r0
-; CHECK-FP16-NEXT: rsbs r0, r2, #0
+; CHECK-FP16-NEXT: subs r4, r2, #1
+; CHECK-FP16-NEXT: vmov.32 d1[0], r6
+; CHECK-FP16-NEXT: sbcs r4, r3, #0
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movne r9, r2
+; CHECK-FP16-NEXT: moveq r3, r4
+; CHECK-FP16-NEXT: moveq r1, r4
+; CHECK-FP16-NEXT: movne r4, r0
+; CHECK-FP16-NEXT: rsbs r0, r4, #0
; CHECK-FP16-NEXT: rscs r0, r1, #0
-; CHECK-FP16-NEXT: rscs r0, r8, #0
+; CHECK-FP16-NEXT: rscs r0, r9, #0
; CHECK-FP16-NEXT: rscs r0, r3, #0
; CHECK-FP16-NEXT: movwlt r5, #1
; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: moveq r2, r5
+; CHECK-FP16-NEXT: moveq r4, r5
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r8
+; CHECK-FP16-NEXT: vmov.32 d0[0], r4
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r7
; CHECK-FP16-NEXT: movne r5, r1
-; CHECK-FP16-NEXT: vmov.32 d0[0], r2
-; CHECK-FP16-NEXT: vmov.32 d1[1], r4
; CHECK-FP16-NEXT: vmov.32 d0[1], r5
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -2326,34 +2380,34 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: vmov r0, r1, d9
-; CHECK-NEXT: mvn r6, #-2147483648
-; CHECK-NEXT: subs r2, r4, r6
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: sbcs r2, r8, #0
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: movge r4, r6
-; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: vmov r0, r2, d9
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: subs r3, r4, r5
+; CHECK-NEXT: sbcs r3, r1, #0
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: movwlt r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movne r3, r1
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: sbcs r1, r7, r3
+; CHECK-NEXT: movge r4, r8
+; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: subs r2, r0, r6
-; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r6, r0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: mov r0, #-2147483648
-; CHECK-NEXT: movne r7, r1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r5, r8
-; CHECK-NEXT: rsbs r2, r4, #-2147483648
-; CHECK-NEXT: mvn r1, #0
-; CHECK-NEXT: sbcs r2, r1, r5
-; CHECK-NEXT: movge r4, r0
-; CHECK-NEXT: rsbs r2, r6, #-2147483648
+; CHECK-NEXT: subs r2, r0, r5
; CHECK-NEXT: vmov.32 d0[0], r4
-; CHECK-NEXT: sbcs r1, r1, r7
-; CHECK-NEXT: movge r6, r0
-; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: sbcs r2, r1, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r5, r0
+; CHECK-NEXT: rsbs r0, r5, #-2147483648
+; CHECK-NEXT: sbcs r0, r7, r6
+; CHECK-NEXT: movge r5, r8
+; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
@@ -2406,16 +2460,16 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov r2, r12, d9
-; CHECK-NEXT: mvn r4, #0
-; CHECK-NEXT: subs r5, r0, r4
-; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: sbcs r5, r1, #0
+; CHECK-NEXT: mvn r5, #0
+; CHECK-NEXT: subs r3, r0, r5
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movge r0, r4
+; CHECK-NEXT: sbcs r3, r1, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: movwlt r3, #1
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movne r3, r1
+; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r3, #0
; CHECK-NEXT: movwlt r6, #1
@@ -2424,20 +2478,20 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r12
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: subs r2, r0, r4
+; CHECK-NEXT: subs r2, r0, r5
; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r4, r0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: movne r0, r1
-; CHECK-NEXT: rsbs r1, r4, #0
-; CHECK-NEXT: rscs r0, r0, #0
-; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r5, r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: movne r2, r1
+; CHECK-NEXT: movne r5, r0
+; CHECK-NEXT: rsbs r0, r5, #0
+; CHECK-NEXT: rscs r0, r2, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r4, r5
+; CHECK-NEXT: vmov.32 d0[1], r4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
@@ -2451,81 +2505,72 @@ entry:
define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: stest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mvn r9, #0
+; CHECK-NEXT: mov r10, #0
; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r5, s16
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vmov r2, s18
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: vmov r0, s17
-; CHECK-NEXT: mvn r6, #-2147483648
-; CHECK-NEXT: mov r3, #-2147483648
-; CHECK-NEXT: mvn r10, #0
-; CHECK-NEXT: vmov r7, s16
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: subs r2, r11, r6
-; CHECK-NEXT: sbcs r2, r1, #0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mvn r7, #-2147483648
+; CHECK-NEXT: subs r0, r0, r7
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r0, r1
+; CHECK-NEXT: moveq r4, r7
+; CHECK-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-NEXT: sbcs r0, r9, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: movge r4, r8
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r0, r7
+; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movge r11, r6
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: movne r2, r1
-; CHECK-NEXT: rsbs r1, r11, #-2147483648
-; CHECK-NEXT: sbcs r1, r10, r2
-; CHECK-NEXT: movge r11, r3
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: subs r0, r0, r6
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: mov r9, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: movge r5, r6
-; CHECK-NEXT: movwlt r9, #1
-; CHECK-NEXT: cmp r9, #0
-; CHECK-NEXT: movne r9, r1
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-NEXT: sbcs r1, r9, r2
+; CHECK-NEXT: movge r5, r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: subs r0, r0, r6
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: subs r0, r0, r7
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: mov r8, #0
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: movge r7, r6
-; CHECK-NEXT: movwlt r8, #1
-; CHECK-NEXT: cmp r8, #0
-; CHECK-NEXT: movne r8, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r0, r1
+; CHECK-NEXT: moveq r6, r7
+; CHECK-NEXT: rsbs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r9, r0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: movge r6, r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
+; CHECK-NEXT: subs r2, r0, r7
+; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r6, r0
-; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: movne r4, r1
-; CHECK-NEXT: rsbs r0, r6, #-2147483648
-; CHECK-NEXT: sbcs r0, r10, r4
-; CHECK-NEXT: mov r1, #-2147483648
-; CHECK-NEXT: movge r6, r1
+; CHECK-NEXT: movwlt r10, #1
+; CHECK-NEXT: cmp r10, #0
+; CHECK-NEXT: movne r10, r1
+; CHECK-NEXT: movne r7, r0
; CHECK-NEXT: rsbs r0, r7, #-2147483648
-; CHECK-NEXT: sbcs r0, r10, r8
-; CHECK-NEXT: vmov.32 d1[0], r6
-; CHECK-NEXT: movge r7, r1
-; CHECK-NEXT: rsbs r0, r5, #-2147483648
-; CHECK-NEXT: vmov.32 d0[0], r7
-; CHECK-NEXT: sbcs r0, r10, r9
-; CHECK-NEXT: movge r5, r1
-; CHECK-NEXT: vmov.32 d1[1], r11
-; CHECK-NEXT: vmov.32 d0[1], r5
-; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: vmov.32 d0[0], r5
+; CHECK-NEXT: sbcs r0, r9, r10
+; CHECK-NEXT: vmov.32 d1[1], r4
+; CHECK-NEXT: movge r7, r8
+; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: add sp, sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2591,33 +2636,33 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vmov r2, s16
-; CHECK-NEXT: mvn r6, #0
-; CHECK-NEXT: subs r3, r0, r6
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: subs r3, r0, r7
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: sbcs r3, r1, #0
-; CHECK-NEXT: vmov r8, s17
+; CHECK-NEXT: mov r10, #0
; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: movge r0, r6
+; CHECK-NEXT: vmov r9, s18
; CHECK-NEXT: movwlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movne r3, r1
+; CHECK-NEXT: moveq r0, r7
; CHECK-NEXT: rsbs r1, r0, #0
+; CHECK-NEXT: vmov r8, s17
; CHECK-NEXT: rscs r1, r3, #0
-; CHECK-NEXT: vmov r9, s18
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: movne r4, r0
-; CHECK-NEXT: mov r10, #0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
+; CHECK-NEXT: subs r2, r0, r7
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: sbcs r2, r1, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movge r0, r6
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: movne r2, r1
+; CHECK-NEXT: moveq r0, r7
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r2, #0
; CHECK-NEXT: movwlt r5, #1
@@ -2625,36 +2670,36 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NEXT: movne r5, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
-; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: subs r2, r0, r7
+; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: sbcs r2, r1, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movge r0, r6
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: movne r2, r1
+; CHECK-NEXT: moveq r0, r7
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r2, #0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r6, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
-; CHECK-NEXT: vmov.32 d1[0], r7
+; CHECK-NEXT: subs r2, r0, r7
+; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r6, r0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: movne r0, r1
-; CHECK-NEXT: rsbs r1, r6, #0
-; CHECK-NEXT: rscs r0, r0, #0
+; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: vmov.32 d0[0], r5
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: movne r2, r1
+; CHECK-NEXT: movne r7, r0
+; CHECK-NEXT: rsbs r0, r7, #0
+; CHECK-NEXT: vmov.32 d1[1], r4
+; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: movwlt r10, #1
; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: movne r10, r6
+; CHECK-NEXT: movne r10, r7
; CHECK-NEXT: vmov.32 d0[1], r10
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -2669,164 +2714,150 @@ entry:
define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT: .pad #4
-; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
; CHECK-NEON-NEXT: vpush {d8, d9, d10}
-; CHECK-NEON-NEXT: .pad #8
-; CHECK-NEON-NEXT: sub sp, sp, #8
; CHECK-NEON-NEXT: vmov r0, s3
-; CHECK-NEON-NEXT: vmov.f32 s16, s2
-; CHECK-NEON-NEXT: vmov.f32 s18, s1
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov r2, s16
-; CHECK-NEON-NEXT: mov r11, r0
-; CHECK-NEON-NEXT: vmov r0, s18
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: mov r3, #-2147483648
-; CHECK-NEON-NEXT: mvn r10, #0
-; CHECK-NEON-NEXT: vmov r7, s20
-; CHECK-NEON-NEXT: mov r4, #0
-; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEON-NEXT: subs r2, r11, r6
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: subs r2, r4, r7
; CHECK-NEON-NEXT: sbcs r2, r1, #0
+; CHECK-NEON-NEXT: mov r8, #-2147483648
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movge r11, r6
+; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: movne r2, r1
-; CHECK-NEON-NEXT: rsbs r1, r11, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r10, r2
-; CHECK-NEON-NEXT: movge r11, r3
+; CHECK-NEON-NEXT: moveq r4, r7
+; CHECK-NEON-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: sbcs r1, r9, r2
+; CHECK-NEON-NEXT: movge r4, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: subs r0, r0, r6
+; CHECK-NEON-NEXT: subs r0, r0, r7
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: mov r8, #0
-; CHECK-NEON-NEXT: mov r0, r7
-; CHECK-NEON-NEXT: movge r5, r6
-; CHECK-NEON-NEXT: movwlt r8, #1
-; CHECK-NEON-NEXT: cmp r8, #0
-; CHECK-NEON-NEXT: movne r8, r1
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: movne r2, r1
+; CHECK-NEON-NEXT: moveq r5, r7
+; CHECK-NEON-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r9, r2
+; CHECK-NEON-NEXT: movge r5, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r7, r0
-; CHECK-NEON-NEXT: subs r0, r0, r6
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: subs r0, r0, r7
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: mov r9, #0
-; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEON-NEXT: movge r7, r6
-; CHECK-NEON-NEXT: movwlt r9, #1
-; CHECK-NEON-NEXT: cmp r9, #0
-; CHECK-NEON-NEXT: movne r9, r1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: movne r0, r1
+; CHECK-NEON-NEXT: moveq r6, r7
+; CHECK-NEON-NEXT: rsbs r1, r6, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r9, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: movge r6, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
+; CHECK-NEON-NEXT: subs r2, r0, r7
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r1, #0
-; CHECK-NEON-NEXT: movlt r6, r0
-; CHECK-NEON-NEXT: movwlt r4, #1
-; CHECK-NEON-NEXT: cmp r4, #0
-; CHECK-NEON-NEXT: movne r4, r1
-; CHECK-NEON-NEXT: rsbs r0, r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r0, r10, r4
-; CHECK-NEON-NEXT: mov r1, #-2147483648
-; CHECK-NEON-NEXT: movge r6, r1
+; CHECK-NEON-NEXT: movwlt r10, #1
+; CHECK-NEON-NEXT: cmp r10, #0
+; CHECK-NEON-NEXT: movne r10, r1
+; CHECK-NEON-NEXT: movne r7, r0
; CHECK-NEON-NEXT: rsbs r0, r7, #-2147483648
-; CHECK-NEON-NEXT: sbcs r0, r10, r9
-; CHECK-NEON-NEXT: vmov.32 d1[0], r6
-; CHECK-NEON-NEXT: movge r7, r1
-; CHECK-NEON-NEXT: rsbs r0, r5, #-2147483648
-; CHECK-NEON-NEXT: vmov.32 d0[0], r7
-; CHECK-NEON-NEXT: sbcs r0, r10, r8
-; CHECK-NEON-NEXT: movge r5, r1
-; CHECK-NEON-NEXT: vmov.32 d1[1], r11
-; CHECK-NEON-NEXT: vmov.32 d0[1], r5
-; CHECK-NEON-NEXT: add sp, sp, #8
+; CHECK-NEON-NEXT: vmov.32 d0[0], r5
+; CHECK-NEON-NEXT: sbcs r0, r9, r10
+; CHECK-NEON-NEXT: vmov.32 d1[1], r4
+; CHECK-NEON-NEXT: movge r7, r8
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
-; CHECK-NEON-NEXT: add sp, sp, #4
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: stest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP16-NEXT: .pad #4
-; CHECK-FP16-NEXT: sub sp, sp, #4
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: .vsave {d8, d9}
; CHECK-FP16-NEXT: vpush {d8, d9}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
-; CHECK-FP16-NEXT: vmov.u16 r4, d0[2]
-; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
-; CHECK-FP16-NEXT: vmov.u16 r6, d0[1]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r5, d0[2]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: mov r10, r0
+; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: mvn r7, #-2147483648
; CHECK-FP16-NEXT: subs r0, r0, r7
; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: mov r2, #-2147483648
+; CHECK-FP16-NEXT: mov r8, #-2147483648
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: movge r10, r7
+; CHECK-FP16-NEXT: mvn r9, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: movne r0, r1
-; CHECK-FP16-NEXT: rsbs r1, r10, #-2147483648
-; CHECK-FP16-NEXT: mvn r9, #0
+; CHECK-FP16-NEXT: moveq r4, r7
+; CHECK-FP16-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-FP16-NEXT: mov r10, #0
; CHECK-FP16-NEXT: sbcs r0, r9, r0
-; CHECK-FP16-NEXT: vmov s16, r4
-; CHECK-FP16-NEXT: mov r11, #0
; CHECK-FP16-NEXT: vmov s18, r5
-; CHECK-FP16-NEXT: movge r10, r2
+; CHECK-FP16-NEXT: movge r4, r8
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.f32 s0, s18
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: subs r0, r0, r7
-; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: movge r5, r7
-; CHECK-FP16-NEXT: movwlt r4, #1
-; CHECK-FP16-NEXT: cmp r4, #0
-; CHECK-FP16-NEXT: movne r4, r1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: movne r0, r1
+; CHECK-FP16-NEXT: moveq r5, r7
+; CHECK-FP16-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r9, r0
+; CHECK-FP16-NEXT: movge r5, r8
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: mov r6, r0
; CHECK-FP16-NEXT: subs r0, r0, r7
-; CHECK-FP16-NEXT: mov r8, #0
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: movge r6, r7
-; CHECK-FP16-NEXT: movwlt r8, #1
-; CHECK-FP16-NEXT: cmp r8, #0
-; CHECK-FP16-NEXT: movne r8, r1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: movne r0, r1
+; CHECK-FP16-NEXT: vmov.u16 r1, d8[1]
+; CHECK-FP16-NEXT: moveq r6, r7
+; CHECK-FP16-NEXT: vmov s0, r1
+; CHECK-FP16-NEXT: rsbs r1, r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r9, r0
+; CHECK-FP16-NEXT: movge r6, r8
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: subs r2, r0, r7
+; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: movlt r7, r0
-; CHECK-FP16-NEXT: movwlt r11, #1
-; CHECK-FP16-NEXT: cmp r11, #0
-; CHECK-FP16-NEXT: movne r11, r1
+; CHECK-FP16-NEXT: movwlt r10, #1
+; CHECK-FP16-NEXT: cmp r10, #0
+; CHECK-FP16-NEXT: movne r10, r1
+; CHECK-FP16-NEXT: movne r7, r0
; CHECK-FP16-NEXT: rsbs r0, r7, #-2147483648
-; CHECK-FP16-NEXT: sbcs r0, r9, r11
-; CHECK-FP16-NEXT: mov r1, #-2147483648
-; CHECK-FP16-NEXT: movge r7, r1
-; CHECK-FP16-NEXT: rsbs r0, r6, #-2147483648
-; CHECK-FP16-NEXT: sbcs r0, r9, r8
-; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: movge r6, r1
-; CHECK-FP16-NEXT: rsbs r0, r5, #-2147483648
-; CHECK-FP16-NEXT: vmov.32 d0[0], r6
-; CHECK-FP16-NEXT: sbcs r0, r9, r4
-; CHECK-FP16-NEXT: movge r5, r1
-; CHECK-FP16-NEXT: vmov.32 d1[1], r10
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: vmov.32 d0[0], r5
+; CHECK-FP16-NEXT: sbcs r0, r9, r10
+; CHECK-FP16-NEXT: vmov.32 d1[1], r4
+; CHECK-FP16-NEXT: movge r7, r8
+; CHECK-FP16-NEXT: vmov.32 d0[1], r7
; CHECK-FP16-NEXT: vpop {d8, d9}
-; CHECK-FP16-NEXT: add sp, sp, #4
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2944,34 +2975,34 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: vmov r2, s20
-; CHECK-NEON-NEXT: mvn r6, #0
-; CHECK-NEON-NEXT: subs r3, r0, r6
+; CHECK-NEON-NEXT: mvn r7, #0
+; CHECK-NEON-NEXT: subs r3, r0, r7
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: sbcs r3, r1, #0
-; CHECK-NEON-NEXT: vmov r8, s18
+; CHECK-NEON-NEXT: mov r10, #0
; CHECK-NEON-NEXT: mov r3, #0
-; CHECK-NEON-NEXT: movge r0, r6
+; CHECK-NEON-NEXT: vmov r8, s18
; CHECK-NEON-NEXT: movwlt r3, #1
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movne r3, r1
+; CHECK-NEON-NEXT: moveq r0, r7
; CHECK-NEON-NEXT: rsbs r1, r0, #0
-; CHECK-NEON-NEXT: rscs r1, r3, #0
; CHECK-NEON-NEXT: vmov r9, s16
+; CHECK-NEON-NEXT: rscs r1, r3, #0
; CHECK-NEON-NEXT: movwlt r4, #1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: movne r4, r0
-; CHECK-NEON-NEXT: mov r10, #0
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
+; CHECK-NEON-NEXT: subs r2, r0, r7
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: sbcs r2, r1, #0
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movge r0, r6
; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: movne r2, r1
+; CHECK-NEON-NEXT: moveq r0, r7
; CHECK-NEON-NEXT: rsbs r1, r0, #0
; CHECK-NEON-NEXT: rscs r1, r2, #0
; CHECK-NEON-NEXT: movwlt r5, #1
@@ -2980,37 +3011,37 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-NEXT: mov r0, r9
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
-; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: subs r2, r0, r7
+; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: sbcs r2, r1, #0
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movge r0, r6
; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: movne r2, r1
+; CHECK-NEON-NEXT: moveq r0, r7
; CHECK-NEON-NEXT: rsbs r1, r0, #0
; CHECK-NEON-NEXT: rscs r1, r2, #0
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: movne r7, r0
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: movne r6, r0
; CHECK-NEON-NEXT: mov r0, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
-; CHECK-NEON-NEXT: vmov.32 d1[0], r7
+; CHECK-NEON-NEXT: subs r2, r0, r7
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r1, #0
-; CHECK-NEON-NEXT: movlt r6, r0
-; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: movne r0, r1
-; CHECK-NEON-NEXT: rsbs r1, r6, #0
-; CHECK-NEON-NEXT: rscs r0, r0, #0
+; CHECK-NEON-NEXT: mov r2, #0
; CHECK-NEON-NEXT: vmov.32 d0[0], r5
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: movne r2, r1
+; CHECK-NEON-NEXT: movne r7, r0
+; CHECK-NEON-NEXT: rsbs r0, r7, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r4
+; CHECK-NEON-NEXT: rscs r0, r2, #0
; CHECK-NEON-NEXT: movwlt r10, #1
; CHECK-NEON-NEXT: cmp r10, #0
-; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: movne r10, r6
+; CHECK-NEON-NEXT: movne r10, r7
; CHECK-NEON-NEXT: vmov.32 d0[1], r10
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -3023,75 +3054,75 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-FP16-NEXT: vpush {d8, d9}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
; CHECK-FP16-NEXT: vorr d8, d0, d0
-; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[2]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.u16 r2, d8[1]
-; CHECK-FP16-NEXT: mvn r4, #0
-; CHECK-FP16-NEXT: vmov.u16 r3, d8[2]
-; CHECK-FP16-NEXT: vmov s0, r5
+; CHECK-FP16-NEXT: mvn r5, #0
+; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: mov r8, #0
+; CHECK-FP16-NEXT: vmov s18, r7
; CHECK-FP16-NEXT: vmov s16, r2
-; CHECK-FP16-NEXT: subs r2, r0, r4
+; CHECK-FP16-NEXT: subs r2, r0, r5
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: vmov s18, r3
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movge r0, r4
; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: moveq r0, r5
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
; CHECK-FP16-NEXT: movwlt r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: movne r6, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r0, r4
+; CHECK-FP16-NEXT: subs r2, r0, r5
; CHECK-FP16-NEXT: vmov.f32 s0, s18
; CHECK-FP16-NEXT: sbcs r2, r1, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movge r0, r4
; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: moveq r0, r5
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: movne r7, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r0, r4
+; CHECK-FP16-NEXT: subs r2, r0, r5
; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movge r0, r4
; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: moveq r0, r5
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
-; CHECK-FP16-NEXT: movwlt r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: movne r5, r0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movne r4, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r0, r4
-; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: subs r2, r0, r5
+; CHECK-FP16-NEXT: vmov.32 d1[0], r4
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: movlt r4, r0
-; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: movne r0, r1
-; CHECK-FP16-NEXT: rsbs r1, r4, #0
-; CHECK-FP16-NEXT: rscs r0, r0, #0
+; CHECK-FP16-NEXT: mov r2, #0
; CHECK-FP16-NEXT: vmov.32 d0[0], r7
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: movne r5, r0
+; CHECK-FP16-NEXT: rsbs r0, r5, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r6
+; CHECK-FP16-NEXT: rscs r0, r2, #0
; CHECK-FP16-NEXT: movwlt r8, #1
; CHECK-FP16-NEXT: cmp r8, #0
-; CHECK-FP16-NEXT: vmov.32 d1[1], r6
-; CHECK-FP16-NEXT: movne r8, r4
+; CHECK-FP16-NEXT: movne r8, r5
; CHECK-FP16-NEXT: vmov.32 d0[1], r8
; CHECK-FP16-NEXT: vpop {d8, d9}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc}
@@ -3599,56 +3630,59 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mvn r9, #0
-; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: subs r0, r0, r8
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r1, r6
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: sbcs r0, r2, #0
; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: sbcs r1, r2, #0
+; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r2, r0, #0
-; CHECK-NEXT: rscs r2, r4, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r1
-; CHECK-NEXT: sbcs r1, r9, r3
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: movne r0, r2
+; CHECK-NEXT: moveq r10, r6
+; CHECK-NEXT: moveq r5, r8
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r10, #-2147483648
+; CHECK-NEXT: sbcs r0, r8, r0
+; CHECK-NEXT: sbcs r0, r8, r3
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs r6, r0, r9
-; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
-; CHECK-NEXT: sbcs r6, r2, #0
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: subs r4, r0, r8
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r4, r1, r6
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r4, r2
+; CHECK-NEXT: moveq r0, r8
; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: rscs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r1, r8, r4
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r9, #1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: moveq r10, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r10
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
@@ -3662,36 +3696,39 @@ entry:
define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: utest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r2, #1
; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: sbcs r0, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r4
; CHECK-NEXT: vmov.32 d0[0], r0
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3702,45 +3739,49 @@ entry:
define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
-; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: subs r0, r2, #1
+; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: sbcs r0, r3, #0
-; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: moveq r5, r0
-; CHECK-NEXT: moveq r4, r0
-; CHECK-NEXT: movne r0, r3
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwmi r4, #0
-; CHECK-NEXT: movwmi r5, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r6, r7
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwmi r6, #0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r2, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r1, r6
-; CHECK-NEXT: moveq r0, r6
-; CHECK-NEXT: movne r6, r3
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r0, r4
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movwmi r0, #0
-; CHECK-NEXT: movwmi r1, #0
-; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r4, r1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: vmov.32 d1[0], r0
+; CHECK-NEXT: movwmi r4, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r8
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r1
+; CHECK-NEXT: movwmi r7, #0
+; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3759,56 +3800,59 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mvn r9, #0
-; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: subs r0, r0, r8
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r1, r6
; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: sbcs r1, r2, #0
+; CHECK-NEXT: sbcs r0, r2, #0
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r2, r0, #0
-; CHECK-NEXT: rscs r2, r4, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r1
-; CHECK-NEXT: sbcs r1, r9, r3
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: movne r0, r2
+; CHECK-NEXT: moveq r10, r6
+; CHECK-NEXT: moveq r5, r8
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r10, #-2147483648
+; CHECK-NEXT: sbcs r0, r8, r0
+; CHECK-NEXT: sbcs r0, r8, r3
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r6, r0, r9
-; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
-; CHECK-NEXT: sbcs r6, r2, #0
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: subs r4, r0, r8
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r4, r1, r6
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r4, r2
+; CHECK-NEXT: moveq r0, r8
; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: rscs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r1, r8, r4
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r9, #1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: moveq r10, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r10
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
@@ -3822,36 +3866,39 @@ entry:
define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: utest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r2, #1
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r4
; CHECK-NEXT: vmov.32 d0[0], r0
-; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3862,45 +3909,49 @@ entry:
define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
-; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: sbcs r0, r3, #0
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: moveq r5, r0
-; CHECK-NEXT: moveq r4, r0
-; CHECK-NEXT: movne r0, r3
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: movwmi r4, #0
-; CHECK-NEXT: movwmi r5, #0
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r6, r7
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwmi r6, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r2, r2, #1
-; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r1, r6
-; CHECK-NEXT: moveq r0, r6
-; CHECK-NEXT: movne r6, r3
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r0, r4
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movwmi r0, #0
-; CHECK-NEXT: movwmi r1, #0
-; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r4, r1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: vmov.32 d1[0], r0
+; CHECK-NEXT: movwmi r4, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r8
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r1
+; CHECK-NEXT: movwmi r7, #0
+; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -3912,8 +3963,10 @@ entry:
define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
@@ -3924,58 +3977,62 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: mvn r9, #0
-; CHECK-NEON-NEXT: subs r1, r0, r9
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r4, r6
+; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r1, r7
+; CHECK-NEON-NEXT: mov r11, r1
+; CHECK-NEON-NEXT: sbcs r0, r2, #0
; CHECK-NEON-NEXT: vmov s0, r8
-; CHECK-NEON-NEXT: sbcs r1, r2, #0
-; CHECK-NEON-NEXT: mov r5, #0
-; CHECK-NEON-NEXT: sbcs r1, r3, #0
-; CHECK-NEON-NEXT: mov r8, #-2147483648
-; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: sbcs r0, r3, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: mov r10, #0
-; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: moveq r3, r1
-; CHECK-NEON-NEXT: movne r1, r2
-; CHECK-NEON-NEXT: moveq r4, r6
-; CHECK-NEON-NEXT: moveq r0, r9
-; CHECK-NEON-NEXT: rsbs r2, r0, #0
-; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r9, r1
-; CHECK-NEON-NEXT: sbcs r1, r9, r3
-; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: movne r5, r0
-; CHECK-NEON-NEXT: moveq r4, r8
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: moveq r3, r0
+; CHECK-NEON-NEXT: movne r0, r2
+; CHECK-NEON-NEXT: moveq r11, r7
+; CHECK-NEON-NEXT: moveq r5, r9
+; CHECK-NEON-NEXT: rsbs r1, r5, #0
+; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r9, r0
+; CHECK-NEON-NEXT: sbcs r0, r9, r3
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r5, r6
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: subs r7, r0, r9
+; CHECK-NEON-NEXT: subs r4, r0, r9
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
-; CHECK-NEON-NEXT: sbcs r7, r1, r6
-; CHECK-NEON-NEXT: sbcs r7, r2, #0
-; CHECK-NEON-NEXT: sbcs r7, r3, #0
-; CHECK-NEON-NEXT: mov r7, #0
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r3, r7
-; CHECK-NEON-NEXT: movne r7, r2
-; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: sbcs r4, r1, r7
+; CHECK-NEON-NEXT: sbcs r4, r2, #0
+; CHECK-NEON-NEXT: sbcs r4, r3, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: moveq r3, r4
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: movne r4, r2
; CHECK-NEON-NEXT: moveq r0, r9
; CHECK-NEON-NEXT: rsbs r1, r0, #0
-; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r9, r7
+; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r9, r4
; CHECK-NEON-NEXT: sbcs r1, r9, r3
; CHECK-NEON-NEXT: movwlt r10, #1
; CHECK-NEON-NEXT: cmp r10, #0
-; CHECK-NEON-NEXT: movne r10, r0
-; CHECK-NEON-NEXT: moveq r6, r8
-; CHECK-NEON-NEXT: vmov.32 d0[0], r10
-; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: moveq r0, r10
+; CHECK-NEON-NEXT: mov r1, #-2147483648
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: moveq r11, r1
+; CHECK-NEON-NEXT: cmp r10, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r11
+; CHECK-NEON-NEXT: moveq r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
@@ -3985,56 +4042,59 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: mov r4, r1
-; CHECK-FP16-NEXT: mvn r9, #0
-; CHECK-FP16-NEXT: subs r1, r0, r9
-; CHECK-FP16-NEXT: mvn r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r4, r5
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: mvn r8, #0
+; CHECK-FP16-NEXT: subs r0, r0, r8
+; CHECK-FP16-NEXT: mvn r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r1, r6
+; CHECK-FP16-NEXT: mov r10, r1
+; CHECK-FP16-NEXT: sbcs r0, r2, #0
; CHECK-FP16-NEXT: vmov s0, r7
-; CHECK-FP16-NEXT: sbcs r1, r2, #0
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
; CHECK-FP16-NEXT: mov r7, #0
-; CHECK-FP16-NEXT: sbcs r1, r3, #0
-; CHECK-FP16-NEXT: mov r8, #-2147483648
-; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: mov r10, #0
-; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: moveq r3, r1
-; CHECK-FP16-NEXT: movne r1, r2
-; CHECK-FP16-NEXT: moveq r4, r5
-; CHECK-FP16-NEXT: moveq r0, r9
-; CHECK-FP16-NEXT: rsbs r2, r0, #0
-; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r9, r1
-; CHECK-FP16-NEXT: sbcs r1, r9, r3
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: mov r9, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: moveq r3, r0
+; CHECK-FP16-NEXT: movne r0, r2
+; CHECK-FP16-NEXT: moveq r10, r6
+; CHECK-FP16-NEXT: moveq r5, r8
+; CHECK-FP16-NEXT: rsbs r1, r5, #0
+; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r8, r0
+; CHECK-FP16-NEXT: sbcs r0, r8, r3
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: movne r7, r0
-; CHECK-FP16-NEXT: moveq r4, r8
+; CHECK-FP16-NEXT: moveq r5, r7
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: subs r6, r0, r9
-; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: sbcs r6, r1, r5
-; CHECK-FP16-NEXT: sbcs r6, r2, #0
-; CHECK-FP16-NEXT: sbcs r6, r3, #0
-; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r3, r6
-; CHECK-FP16-NEXT: movne r6, r2
-; CHECK-FP16-NEXT: movne r5, r1
-; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: subs r4, r0, r8
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: sbcs r4, r1, r6
+; CHECK-FP16-NEXT: sbcs r4, r2, #0
+; CHECK-FP16-NEXT: sbcs r4, r3, #0
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: moveq r3, r4
+; CHECK-FP16-NEXT: movne r6, r1
+; CHECK-FP16-NEXT: movne r4, r2
+; CHECK-FP16-NEXT: moveq r0, r8
; CHECK-FP16-NEXT: rsbs r1, r0, #0
-; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r9, r6
-; CHECK-FP16-NEXT: sbcs r1, r9, r3
-; CHECK-FP16-NEXT: movwlt r10, #1
-; CHECK-FP16-NEXT: cmp r10, #0
-; CHECK-FP16-NEXT: movne r10, r0
-; CHECK-FP16-NEXT: moveq r5, r8
-; CHECK-FP16-NEXT: vmov.32 d0[0], r10
-; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r8, r4
+; CHECK-FP16-NEXT: sbcs r1, r8, r3
+; CHECK-FP16-NEXT: movwlt r9, #1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: mov r1, #-2147483648
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: moveq r10, r1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r10
+; CHECK-FP16-NEXT: moveq r6, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r6
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
@@ -4047,8 +4107,8 @@ entry:
define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
@@ -4059,60 +4119,66 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixunssfti
-; CHECK-NEON-NEXT: mov r4, r1
-; CHECK-NEON-NEXT: subs r1, r2, #1
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: subs r0, r2, #1
; CHECK-NEON-NEXT: vmov s0, r5
-; CHECK-NEON-NEXT: sbcs r1, r3, #0
+; CHECK-NEON-NEXT: sbcs r0, r3, #0
; CHECK-NEON-NEXT: mov r5, #0
-; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: movwlo r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: moveq r4, r5
-; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: moveq r6, r5
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: subs r2, r2, #1
-; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r3, #0
-; CHECK-NEON-NEXT: movwlo r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: moveq r0, r6
-; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: movwlo r7, #1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r0, r7
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: movne r5, r4
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
-; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r5
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfti
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: subs r0, r2, #1
+; CHECK-FP16-NEXT: vmov s0, r7
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
+; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: mov r4, r1
-; CHECK-FP16-NEXT: subs r1, r2, #1
-; CHECK-FP16-NEXT: vmov s0, r6
-; CHECK-FP16-NEXT: sbcs r1, r3, #0
+; CHECK-FP16-NEXT: movwlo r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: mov r5, #0
-; CHECK-FP16-NEXT: movwlo r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r4, r6
-; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: moveq r5, r7
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: subs r2, r2, #1
-; CHECK-FP16-NEXT: vmov.32 d1[0], r6
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
; CHECK-FP16-NEXT: sbcs r2, r3, #0
-; CHECK-FP16-NEXT: movwlo r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: moveq r0, r5
-; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: movwlo r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: moveq r0, r6
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r4
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
-; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
-; CHECK-FP16-NEXT: pop {r4, r5, r6, pc}
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r7
+; CHECK-FP16-NEXT: movne r6, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r6
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -4123,90 +4189,100 @@ entry:
define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
-; CHECK-NEON-NEXT: vmov r0, s0
-; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: subs r0, r2, #1
+; CHECK-NEON-NEXT: vmov s0, r7
; CHECK-NEON-NEXT: sbcs r0, r3, #0
-; CHECK-NEON-NEXT: vmov s0, r6
-; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: mov r4, r1
-; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: moveq r5, r0
-; CHECK-NEON-NEXT: moveq r4, r0
-; CHECK-NEON-NEXT: movne r0, r3
-; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mov r7, #0
-; CHECK-NEON-NEXT: movwmi r4, #0
-; CHECK-NEON-NEXT: movwmi r5, #0
+; CHECK-NEON-NEXT: mov r5, r3
+; CHECK-NEON-NEXT: movwlt r7, #1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r6, r7
+; CHECK-NEON-NEXT: moveq r5, r7
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: mov r8, r1
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwmi r6, #0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r2, r2, #1
-; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: vmov.32 d0[0], r6
; CHECK-NEON-NEXT: sbcs r2, r3, #0
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r1, r7
-; CHECK-NEON-NEXT: moveq r0, r7
-; CHECK-NEON-NEXT: movne r7, r3
-; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: moveq r0, r4
+; CHECK-NEON-NEXT: moveq r3, r4
+; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movwmi r0, #0
-; CHECK-NEON-NEXT: movwmi r1, #0
-; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: movne r4, r1
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: vmov.32 d1[0], r0
+; CHECK-NEON-NEXT: movwmi r4, #0
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: movne r7, r8
+; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r1
+; CHECK-NEON-NEXT: movwmi r7, #0
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
;
; CHECK-FP16-LABEL: ustest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
-; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[1]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: mov r6, r0
; CHECK-FP16-NEXT: subs r0, r2, #1
-; CHECK-FP16-NEXT: sbcs r0, r3, #0
; CHECK-FP16-NEXT: vmov s0, r7
-; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: mov r4, r1
-; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: moveq r5, r0
-; CHECK-FP16-NEXT: moveq r4, r0
-; CHECK-FP16-NEXT: movne r0, r3
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: movwmi r4, #0
-; CHECK-FP16-NEXT: movwmi r5, #0
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: mov r5, r3
+; CHECK-FP16-NEXT: movwlt r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: moveq r6, r7
+; CHECK-FP16-NEXT: moveq r5, r7
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: mov r8, r1
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwmi r6, #0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: subs r2, r2, #1
-; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: vmov.32 d0[0], r6
; CHECK-FP16-NEXT: sbcs r2, r3, #0
-; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r1, r6
-; CHECK-FP16-NEXT: moveq r0, r6
-; CHECK-FP16-NEXT: movne r6, r3
-; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: moveq r0, r4
+; CHECK-FP16-NEXT: moveq r3, r4
+; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movwmi r0, #0
-; CHECK-FP16-NEXT: movwmi r1, #0
-; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movne r4, r1
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: vmov.32 d1[0], r0
+; CHECK-FP16-NEXT: movwmi r4, #0
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r8
+; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r1
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
+; CHECK-FP16-NEXT: movwmi r7, #0
+; CHECK-FP16-NEXT: vmov.32 d0[1], r7
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
diff --git a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll
index c59c9824a902cf..3e77ad65df9927 100644
--- a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll
+++ b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll
@@ -1,51 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck --check-prefix=THUMB %s
declare double @fn()
define void @test(ptr %p, ptr %res) nounwind {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r11, lr}
-; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vldr d8, [r0]
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: vcmp.f64 d16, #0
+; CHECK-NEXT: vcmp.f64 d8, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mrs r5, apsr
-; CHECK-NEXT: vneg.f64 d17, d16
-; CHECK-NEXT: vmovne.f64 d16, d17
-; CHECK-NEXT: vstr d16, [r1]
+; CHECK-NEXT: vneg.f64 d16, d8
+; CHECK-NEXT: vmov.f64 d17, d8
+; CHECK-NEXT: vmovne.f64 d17, d16
+; CHECK-NEXT: vstr d17, [r1]
; CHECK-NEXT: bl fn
+; CHECK-NEXT: vcmp.f64 d8, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: eor r1, r1, #-2147483648
-; CHECK-NEXT: msr APSR_nzcvq, r5
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vmovne.f64 d16, d17
; CHECK-NEXT: vstr d16, [r4]
-; CHECK-NEXT: pop {r4, r5, r11, pc}
-;
-; THUMB-LABEL: test:
-; THUMB: @ %bb.0: @ %entry
-; THUMB-NEXT: push {r4, r5, r7, lr}
-; THUMB-NEXT: vldr d16, [r0]
-; THUMB-NEXT: mov r4, r1
-; THUMB-NEXT: vcmp.f64 d16, #0
-; THUMB-NEXT: vmrs APSR_nzcv, fpscr
-; THUMB-NEXT: mrs r5, apsr
-; THUMB-NEXT: vneg.f64 d17, d16
-; THUMB-NEXT: it ne
-; THUMB-NEXT: vmovne.f64 d16, d17
-; THUMB-NEXT: vstr d16, [r1]
-; THUMB-NEXT: bl fn
-; THUMB-NEXT: vmov d16, r0, r1
-; THUMB-NEXT: eor r1, r1, #-2147483648
-; THUMB-NEXT: msr APSR_nzcvq, r5
-; THUMB-NEXT: vmov d17, r0, r1
-; THUMB-NEXT: it ne
-; THUMB-NEXT: vmovne.f64 d16, d17
-; THUMB-NEXT: vstr d16, [r4]
-; THUMB-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, pc}
entry:
%x = load double, ptr %p
%cmp = fcmp une double %x, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
index d895fe89a2cdc3..67edf9855f372f 100644
--- a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
+++ b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
@@ -75,62 +75,64 @@ define void @test_signed_i32_f64(ptr %d, double %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: mov r7, r2
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: ldr r3, .LCPI1_0
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: mov r6, r2
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: ldr r2, .LCPI1_0
+; SOFT-NEXT: ldr r3, .LCPI1_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: movs r7, #0
+; SOFT-NEXT: ldr r3, .LCPI1_2
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r7
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_d2iz
; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB1_2
+; SOFT-NEXT: bne .LBB1_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: b .LBB1_3
-; SOFT-NEXT: .LBB1_2:
; SOFT-NEXT: movs r0, #1
-; SOFT-NEXT: lsls r4, r0, #31
-; SOFT-NEXT: .LBB1_3:
-; SOFT-NEXT: ldr r2, .LCPI1_1
-; SOFT-NEXT: ldr r3, .LCPI1_2
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB1_5
-; SOFT-NEXT: @ %bb.4:
+; SOFT-NEXT: lsls r0, r0, #31
+; SOFT-NEXT: .LBB1_2:
+; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB1_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: b .LBB1_5
+; SOFT-NEXT: .LBB1_4:
; SOFT-NEXT: ldr r4, .LCPI1_3
; SOFT-NEXT: .LBB1_5:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: bl __aeabi_dcmpun
; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: bne .LBB1_7
; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: mov r5, r4
+; SOFT-NEXT: mov r7, r4
; SOFT-NEXT: .LBB1_7:
-; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT: str r5, [r0]
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: str r7, [r0]
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.8:
; SOFT-NEXT: .LCPI1_0:
-; SOFT-NEXT: .long 3252682752 @ 0xc1e00000
-; SOFT-NEXT: .LCPI1_1:
; SOFT-NEXT: .long 4290772992 @ 0xffc00000
-; SOFT-NEXT: .LCPI1_2:
+; SOFT-NEXT: .LCPI1_1:
; SOFT-NEXT: .long 1105199103 @ 0x41dfffff
+; SOFT-NEXT: .LCPI1_2:
+; SOFT-NEXT: .long 3252682752 @ 0xc1e00000
; SOFT-NEXT: .LCPI1_3:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -159,31 +161,31 @@ define void @test_unsigned_i32_f32(ptr %d, float %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r1
+; SOFT-NEXT: mov r7, r1
; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: ldr r1, .LCPI2_0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_f2uiz
; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __aeabi_f2uiz
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB2_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r7
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB2_2:
-; SOFT-NEXT: ldr r1, .LCPI2_0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r6, #0
; SOFT-NEXT: beq .LBB2_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mvns r4, r5
+; SOFT-NEXT: mvns r0, r5
; SOFT-NEXT: .LBB2_4:
-; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT: str r4, [r0]
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: str r0, [r1]
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
@@ -207,40 +209,41 @@ define void @test_unsigned_i32_f64(ptr %d, double %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: mov r7, r2
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: mov r1, r3
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: ldr r2, .LCPI3_0
+; SOFT-NEXT: ldr r3, .LCPI3_1
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: bl __aeabi_dcmpge
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_d2uiz
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: bne .LBB3_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: .LBB3_2:
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: ldr r2, .LCPI3_0
-; SOFT-NEXT: ldr r3, .LCPI3_1
; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: .LBB3_2:
+; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
; SOFT-NEXT: beq .LBB3_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mvns r4, r5
+; SOFT-NEXT: mvns r0, r6
; SOFT-NEXT: .LBB3_4:
-; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT: str r4, [r0]
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: str r0, [r1]
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f976244892..84f6ee276ba5f1 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -468,10 +468,10 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
; VFP2-NEXT: movlt r0, #0
; VFP2-NEXT: movtlt r0, #65532
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: itt gt
; VFP2-NEXT: movwgt r0, #65535
; VFP2-NEXT: movtgt r0, #3
+; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
@@ -569,67 +569,71 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI6_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: movs r0, #27
; SOFT-NEXT: lsls r1, r0, #27
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __aeabi_f2lz
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB6_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: .LBB6_2:
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: beq .LBB6_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: b .LBB6_5
+; SOFT-NEXT: mvns r4, r6
; SOFT-NEXT: .LBB6_4:
-; SOFT-NEXT: ldr r0, .LCPI6_0
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: .LBB6_5:
-; SOFT-NEXT: ldr r1, .LCPI6_1
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r7, #0
-; SOFT-NEXT: mvns r6, r7
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB6_7
-; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: ldr r0, .LCPI6_2
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: .LBB6_7:
-; SOFT-NEXT: bne .LBB6_9
-; SOFT-NEXT: @ %bb.8:
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: .LBB6_9:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: bne .LBB6_11
-; SOFT-NEXT: @ %bb.10:
; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: bne .LBB6_6
+; SOFT-NEXT: @ %bb.5:
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: .LBB6_6:
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB6_8
+; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: b .LBB6_9
+; SOFT-NEXT: .LBB6_8:
+; SOFT-NEXT: ldr r3, .LCPI6_1
+; SOFT-NEXT: .LBB6_9:
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB6_11
+; SOFT-NEXT: @ %bb.10:
+; SOFT-NEXT: ldr r3, .LCPI6_2
; SOFT-NEXT: .LBB6_11:
+; SOFT-NEXT: cmp r1, #0
; SOFT-NEXT: bne .LBB6_13
; SOFT-NEXT: @ %bb.12:
-; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: mov r6, r3
; SOFT-NEXT: .LBB6_13:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.14:
; SOFT-NEXT: .LCPI6_0:
-; SOFT-NEXT: .long 4294836224 @ 0xfffe0000
-; SOFT-NEXT: .LCPI6_1:
; SOFT-NEXT: .long 1476395007 @ 0x57ffffff
+; SOFT-NEXT: .LCPI6_1:
+; SOFT-NEXT: .long 4294836224 @ 0xfffe0000
; SOFT-NEXT: .LCPI6_2:
; SOFT-NEXT: .long 131071 @ 0x1ffff
;
@@ -650,12 +654,11 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; VFP-NEXT: movlt r0, #0
; VFP-NEXT: vcmp.f32 s2, s4
; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: it gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: itt gt
+; VFP-NEXT: ittt gt
; VFP-NEXT: movwgt r1, #65535
; VFP-NEXT: movtgt r1, #1
+; VFP-NEXT: movgt.w r0, #-1
+; VFP-NEXT: vcmp.f32 s2, s2
; VFP-NEXT: vmrs APSR_nzcv, fpscr
; VFP-NEXT: itt vs
; VFP-NEXT: movvs r0, #0
@@ -676,60 +679,68 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI7_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: movs r0, #223
; SOFT-NEXT: lsls r1, r0, #24
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: lsls r7, r2, #31
-; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB7_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: .LBB7_2:
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: beq .LBB7_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: mvns r4, r6
; SOFT-NEXT: .LBB7_4:
-; SOFT-NEXT: ldr r1, .LCPI7_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mvns r5, r6
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB7_6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: bne .LBB7_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: ldr r7, .LCPI7_1
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB7_6:
-; SOFT-NEXT: bne .LBB7_8
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB7_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: b .LBB7_9
; SOFT-NEXT: .LBB7_8:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bl __aeabi_fcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bne .LBB7_10
-; SOFT-NEXT: @ %bb.9:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: .LBB7_10:
-; SOFT-NEXT: bne .LBB7_12
-; SOFT-NEXT: @ %bb.11:
-; SOFT-NEXT: mov r6, r7
-; SOFT-NEXT: .LBB7_12:
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: lsls r3, r2, #31
+; SOFT-NEXT: .LBB7_9:
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB7_11
+; SOFT-NEXT: @ %bb.10:
+; SOFT-NEXT: ldr r3, .LCPI7_1
+; SOFT-NEXT: .LBB7_11:
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB7_13
+; SOFT-NEXT: @ %bb.12:
+; SOFT-NEXT: mov r6, r3
+; SOFT-NEXT: .LBB7_13:
; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: @ %bb.14:
; SOFT-NEXT: .LCPI7_0:
; SOFT-NEXT: .long 1593835519 @ 0x5effffff
; SOFT-NEXT: .LCPI7_1:
@@ -777,106 +788,110 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #20
; SOFT-NEXT: sub sp, #20
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI8_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r0, #241
; SOFT-NEXT: lsls r1, r0, #24
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r5, #7
-; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mvns r7, r5
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: beq .LBB8_17
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB8_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB8_18
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB8_2:
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: mvns r1, r5
+; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: bne .LBB8_4
-; SOFT-NEXT: .LBB8_3:
-; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: .LBB8_4:
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB8_6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: bne .LBB8_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: .LBB8_6:
-; SOFT-NEXT: ldr r1, .LCPI8_0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mvns r5, r6
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB8_19
+; SOFT-NEXT: .LBB8_6:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB8_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: beq .LBB8_20
+; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: .LBB8_8:
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: beq .LBB8_21
-; SOFT-NEXT: .LBB8_9:
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB8_11
+; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bne .LBB8_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB8_10:
-; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB8_11:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bl __aeabi_fcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: beq .LBB8_22
-; SOFT-NEXT: @ %bb.12:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB8_23
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: beq .LBB8_18
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB8_19
+; SOFT-NEXT: .LBB8_12:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB8_14
; SOFT-NEXT: .LBB8_13:
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: beq .LBB8_24
+; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB8_14:
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: bne .LBB8_16
-; SOFT-NEXT: .LBB8_15:
-; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: @ %bb.15:
+; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: .LBB8_16:
-; SOFT-NEXT: mov r3, r6
-; SOFT-NEXT: add sp, #20
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB8_17:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bne .LBB8_2
+; SOFT-NEXT: movs r4, #7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB8_20
+; SOFT-NEXT: @ %bb.17:
+; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: b .LBB8_21
; SOFT-NEXT: .LBB8_18:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB8_3
-; SOFT-NEXT: b .LBB8_4
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB8_12
; SOFT-NEXT: .LBB8_19:
-; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bne .LBB8_8
+; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB8_13
+; SOFT-NEXT: b .LBB8_14
; SOFT-NEXT: .LBB8_20:
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: bne .LBB8_9
+; SOFT-NEXT: mvns r7, r4
; SOFT-NEXT: .LBB8_21:
-; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB8_10
-; SOFT-NEXT: b .LBB8_11
-; SOFT-NEXT: .LBB8_22:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bne .LBB8_13
+; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB8_23
+; SOFT-NEXT: @ %bb.22:
+; SOFT-NEXT: mov r4, r7
; SOFT-NEXT: .LBB8_23:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bne .LBB8_14
-; SOFT-NEXT: .LBB8_24:
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB8_15
-; SOFT-NEXT: b .LBB8_16
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB8_25
+; SOFT-NEXT: @ %bb.24:
+; SOFT-NEXT: mov r5, r4
+; SOFT-NEXT: .LBB8_25:
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: add sp, #20
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.25:
+; SOFT-NEXT: @ %bb.26:
; SOFT-NEXT: .LCPI8_0:
; SOFT-NEXT: .long 1895825407 @ 0x70ffffff
;
@@ -928,104 +943,109 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #20
; SOFT-NEXT: sub sp, #20
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI9_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r0, #255
; SOFT-NEXT: lsls r1, r0, #24
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r5, #1
-; SOFT-NEXT: lsls r7, r5, #31
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: beq .LBB9_18
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB9_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB9_19
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB9_2:
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: mvns r1, r5
+; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: bne .LBB9_4
-; SOFT-NEXT: .LBB9_3:
-; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: .LBB9_4:
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB9_6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: bne .LBB9_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: .LBB9_6:
-; SOFT-NEXT: ldr r1, .LCPI9_0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mvns r5, r6
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB9_8
+; SOFT-NEXT: .LBB9_6:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB9_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r7, .LCPI9_1
+; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: .LBB9_8:
-; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: beq .LBB9_20
+; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bne .LBB9_10
; SOFT-NEXT: @ %bb.9:
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: beq .LBB9_21
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB9_10:
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB9_12
-; SOFT-NEXT: .LBB9_11:
-; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: beq .LBB9_18
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB9_19
; SOFT-NEXT: .LBB9_12:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bl __aeabi_fcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: beq .LBB9_22
-; SOFT-NEXT: @ %bb.13:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB9_23
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB9_14
+; SOFT-NEXT: .LBB9_13:
+; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB9_14:
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: bne .LBB9_16
+; SOFT-NEXT: @ %bb.15:
; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: beq .LBB9_24
-; SOFT-NEXT: .LBB9_15:
-; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: bne .LBB9_17
; SOFT-NEXT: .LBB9_16:
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: .LBB9_17:
-; SOFT-NEXT: mov r3, r6
-; SOFT-NEXT: add sp, #20
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB9_20
+; SOFT-NEXT: @ %bb.17:
+; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB9_21
+; SOFT-NEXT: b .LBB9_22
; SOFT-NEXT: .LBB9_18:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bne .LBB9_2
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB9_12
; SOFT-NEXT: .LBB9_19:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB9_3
-; SOFT-NEXT: b .LBB9_4
+; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB9_13
+; SOFT-NEXT: b .LBB9_14
; SOFT-NEXT: .LBB9_20:
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: bne .LBB9_10
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: lsls r6, r4, #31
+; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB9_22
; SOFT-NEXT: .LBB9_21:
-; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB9_11
-; SOFT-NEXT: b .LBB9_12
+; SOFT-NEXT: ldr r6, .LCPI9_1
; SOFT-NEXT: .LBB9_22:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bne .LBB9_14
-; SOFT-NEXT: .LBB9_23:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bne .LBB9_15
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB9_24
+; SOFT-NEXT: @ %bb.23:
+; SOFT-NEXT: mov r5, r6
; SOFT-NEXT: .LBB9_24:
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB9_16
-; SOFT-NEXT: b .LBB9_17
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: add sp, #20
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.25:
; SOFT-NEXT: .LCPI9_0:
@@ -1196,6 +1216,12 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: ldr r3, .LCPI11_0
; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI11_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
; SOFT-NEXT: mov r0, r6
@@ -1203,45 +1229,36 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
; SOFT-NEXT: bl __aeabi_d2iz
; SOFT-NEXT: movs r1, #127
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB11_2
+; SOFT-NEXT: bne .LBB11_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: b .LBB11_3
+; SOFT-NEXT: mvns r0, r1
; SOFT-NEXT: .LBB11_2:
+; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB11_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r1, r0
+; SOFT-NEXT: .LBB11_4:
; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: mvns r0, r1
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: .LBB11_3:
-; SOFT-NEXT: ldr r3, .LCPI11_1
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB11_5
-; SOFT-NEXT: @ %bb.4:
-; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
-; SOFT-NEXT: .LBB11_5:
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: bl __aeabi_dcmpun
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB11_7
-; SOFT-NEXT: @ %bb.6:
+; SOFT-NEXT: bne .LBB11_6
+; SOFT-NEXT: @ %bb.5:
; SOFT-NEXT: mov r4, r7
-; SOFT-NEXT: .LBB11_7:
+; SOFT-NEXT: .LBB11_6:
; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.8:
+; SOFT-NEXT: @ %bb.7:
; SOFT-NEXT: .LCPI11_0:
-; SOFT-NEXT: .long 3227516928 @ 0xc0600000
-; SOFT-NEXT: .LCPI11_1:
; SOFT-NEXT: .long 1080016896 @ 0x405fc000
+; SOFT-NEXT: .LCPI11_1:
+; SOFT-NEXT: .long 3227516928 @ 0xc0600000
;
; VFP2-LABEL: test_signed_i8_f64:
; VFP2: @ %bb.0:
@@ -1310,27 +1327,29 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: ldr r3, .LCPI12_0
; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI12_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_d2iz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB12_2
+; SOFT-NEXT: bne .LBB12_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: b .LBB12_3
+; SOFT-NEXT: ldr r0, .LCPI12_2
; SOFT-NEXT: .LBB12_2:
-; SOFT-NEXT: ldr r7, .LCPI12_1
-; SOFT-NEXT: .LBB12_3:
-; SOFT-NEXT: ldr r3, .LCPI12_2
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB12_5
-; SOFT-NEXT: @ %bb.4:
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB12_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: b .LBB12_5
+; SOFT-NEXT: .LBB12_4:
; SOFT-NEXT: ldr r7, .LCPI12_3
; SOFT-NEXT: .LBB12_5:
; SOFT-NEXT: mov r0, r6
@@ -1349,11 +1368,11 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.8:
; SOFT-NEXT: .LCPI12_0:
-; SOFT-NEXT: .long 3232759808 @ 0xc0b00000
+; SOFT-NEXT: .long 1085275648 @ 0x40affe00
; SOFT-NEXT: .LCPI12_1:
-; SOFT-NEXT: .long 4294963200 @ 0xfffff000
+; SOFT-NEXT: .long 3232759808 @ 0xc0b00000
; SOFT-NEXT: .LCPI12_2:
-; SOFT-NEXT: .long 1085275648 @ 0x40affe00
+; SOFT-NEXT: .long 4294963200 @ 0xfffff000
; SOFT-NEXT: .LCPI12_3:
; SOFT-NEXT: .long 4095 @ 0xfff
;
@@ -1425,27 +1444,29 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: ldr r3, .LCPI13_0
; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI13_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_d2iz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB13_2
+; SOFT-NEXT: bne .LBB13_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: b .LBB13_3
+; SOFT-NEXT: ldr r0, .LCPI13_2
; SOFT-NEXT: .LBB13_2:
-; SOFT-NEXT: ldr r7, .LCPI13_1
-; SOFT-NEXT: .LBB13_3:
-; SOFT-NEXT: ldr r3, .LCPI13_2
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB13_5
-; SOFT-NEXT: @ %bb.4:
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB13_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: b .LBB13_5
+; SOFT-NEXT: .LBB13_4:
; SOFT-NEXT: ldr r7, .LCPI13_3
; SOFT-NEXT: .LBB13_5:
; SOFT-NEXT: mov r0, r6
@@ -1464,11 +1485,11 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.8:
; SOFT-NEXT: .LCPI13_0:
-; SOFT-NEXT: .long 3235905536 @ 0xc0e00000
+; SOFT-NEXT: .long 1088421824 @ 0x40dfffc0
; SOFT-NEXT: .LCPI13_1:
-; SOFT-NEXT: .long 4294934528 @ 0xffff8000
+; SOFT-NEXT: .long 3235905536 @ 0xc0e00000
; SOFT-NEXT: .LCPI13_2:
-; SOFT-NEXT: .long 1088421824 @ 0x40dfffc0
+; SOFT-NEXT: .long 4294934528 @ 0xffff8000
; SOFT-NEXT: .LCPI13_3:
; SOFT-NEXT: .long 32767 @ 0x7fff
;
@@ -1540,27 +1561,29 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: ldr r3, .LCPI14_0
; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI14_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_d2iz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB14_2
+; SOFT-NEXT: bne .LBB14_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: b .LBB14_3
+; SOFT-NEXT: ldr r0, .LCPI14_2
; SOFT-NEXT: .LBB14_2:
-; SOFT-NEXT: ldr r7, .LCPI14_1
-; SOFT-NEXT: .LBB14_3:
-; SOFT-NEXT: ldr r3, .LCPI14_2
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB14_5
-; SOFT-NEXT: @ %bb.4:
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB14_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: b .LBB14_5
+; SOFT-NEXT: .LBB14_4:
; SOFT-NEXT: ldr r7, .LCPI14_3
; SOFT-NEXT: .LBB14_5:
; SOFT-NEXT: mov r0, r6
@@ -1579,11 +1602,11 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.8:
; SOFT-NEXT: .LCPI14_0:
-; SOFT-NEXT: .long 3239051264 @ 0xc1100000
+; SOFT-NEXT: .long 1091567608 @ 0x410ffff8
; SOFT-NEXT: .LCPI14_1:
-; SOFT-NEXT: .long 4294705152 @ 0xfffc0000
+; SOFT-NEXT: .long 3239051264 @ 0xc1100000
; SOFT-NEXT: .LCPI14_2:
-; SOFT-NEXT: .long 1091567608 @ 0x410ffff8
+; SOFT-NEXT: .long 4294705152 @ 0xfffc0000
; SOFT-NEXT: .LCPI14_3:
; SOFT-NEXT: .long 262143 @ 0x3ffff
;
@@ -1651,56 +1674,58 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r5, r1
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: movs r4, #0
-; SOFT-NEXT: ldr r3, .LCPI15_0
-; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: ldr r2, .LCPI15_0
+; SOFT-NEXT: ldr r3, .LCPI15_1
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: ldr r3, .LCPI15_2
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_d2iz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB15_2
+; SOFT-NEXT: bne .LBB15_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: b .LBB15_3
-; SOFT-NEXT: .LBB15_2:
; SOFT-NEXT: movs r0, #1
-; SOFT-NEXT: lsls r7, r0, #31
-; SOFT-NEXT: .LBB15_3:
-; SOFT-NEXT: ldr r2, .LCPI15_1
-; SOFT-NEXT: ldr r3, .LCPI15_2
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB15_5
-; SOFT-NEXT: @ %bb.4:
+; SOFT-NEXT: lsls r0, r0, #31
+; SOFT-NEXT: .LBB15_2:
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB15_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: b .LBB15_5
+; SOFT-NEXT: .LBB15_4:
; SOFT-NEXT: ldr r7, .LCPI15_3
; SOFT-NEXT: .LBB15_5:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: bl __aeabi_dcmpun
; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: bne .LBB15_7
; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: mov r4, r7
+; SOFT-NEXT: mov r6, r7
; SOFT-NEXT: .LBB15_7:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.8:
; SOFT-NEXT: .LCPI15_0:
-; SOFT-NEXT: .long 3252682752 @ 0xc1e00000
-; SOFT-NEXT: .LCPI15_1:
; SOFT-NEXT: .long 4290772992 @ 0xffc00000
-; SOFT-NEXT: .LCPI15_2:
+; SOFT-NEXT: .LCPI15_1:
; SOFT-NEXT: .long 1105199103 @ 0x41dfffff
+; SOFT-NEXT: .LCPI15_2:
+; SOFT-NEXT: .long 3252682752 @ 0xc1e00000
; SOFT-NEXT: .LCPI15_3:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -1728,73 +1753,82 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #12
; SOFT-NEXT: sub sp, #12
-; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: mov r7, r1
; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r0, #15
+; SOFT-NEXT: mvns r2, r0
+; SOFT-NEXT: ldr r3, .LCPI16_0
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: movs r0, #195
; SOFT-NEXT: lsls r3, r0, #24
; SOFT-NEXT: movs r4, #0
; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r5, r0
; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __aeabi_d2lz
-; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB16_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: .LBB16_2:
-; SOFT-NEXT: beq .LBB16_4
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: bne .LBB16_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
; SOFT-NEXT: b .LBB16_5
; SOFT-NEXT: .LBB16_4:
-; SOFT-NEXT: ldr r0, .LCPI16_0
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mvns r0, r4
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
; SOFT-NEXT: .LBB16_5:
-; SOFT-NEXT: movs r0, #15
-; SOFT-NEXT: mvns r2, r0
-; SOFT-NEXT: ldr r3, .LCPI16_1
; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: mvns r7, r4
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: bl __aeabi_dcmpun
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB16_7
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bne .LBB16_7
; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: ldr r0, .LCPI16_2
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
; SOFT-NEXT: .LBB16_7:
-; SOFT-NEXT: bne .LBB16_9
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB16_9
; SOFT-NEXT: @ %bb.8:
-; SOFT-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: b .LBB16_10
; SOFT-NEXT: .LBB16_9:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: bl __aeabi_dcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB16_11
-; SOFT-NEXT: @ %bb.10:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB16_11:
-; SOFT-NEXT: bne .LBB16_13
-; SOFT-NEXT: @ %bb.12:
-; SOFT-NEXT: ldr r4, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: .LBB16_13:
+; SOFT-NEXT: ldr r3, .LCPI16_1
+; SOFT-NEXT: .LBB16_10:
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB16_12
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: ldr r3, .LCPI16_2
+; SOFT-NEXT: .LBB16_12:
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB16_14
+; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: .LBB16_14:
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.14:
+; SOFT-NEXT: @ %bb.15:
; SOFT-NEXT: .LCPI16_0:
-; SOFT-NEXT: .long 4294836224 @ 0xfffe0000
-; SOFT-NEXT: .LCPI16_1:
; SOFT-NEXT: .long 1124073471 @ 0x42ffffff
+; SOFT-NEXT: .LCPI16_1:
+; SOFT-NEXT: .long 4294836224 @ 0xfffe0000
; SOFT-NEXT: .LCPI16_2:
; SOFT-NEXT: .long 131071 @ 0x1ffff
;
@@ -1816,12 +1850,11 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
; VFP2-NEXT: movlt r0, #0
; VFP2-NEXT: vcmp.f64 d17, d18
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt.w r0, #-1
-; VFP2-NEXT: vcmp.f64 d17, d17
-; VFP2-NEXT: itt gt
+; VFP2-NEXT: ittt gt
; VFP2-NEXT: movwgt r1, #65535
; VFP2-NEXT: movtgt r1, #1
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f64 d17, d17
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
; VFP2-NEXT: itt vs
; VFP2-NEXT: movvs r0, #0
@@ -1874,70 +1907,77 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
-; SOFT-NEXT: mov r5, r1
-; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: mov r6, r1
+; SOFT-NEXT: mov r5, r0
; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: mvns r2, r4
; SOFT-NEXT: ldr r3, .LCPI17_0
+; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI17_1
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_d2lz
-; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: movs r0, #1
-; SOFT-NEXT: lsls r0, r0, #31
+; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: bne .LBB17_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB17_2:
-; SOFT-NEXT: beq .LBB17_4
+; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB17_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: .LBB17_4:
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: mvns r7, r4
-; SOFT-NEXT: ldr r3, .LCPI17_1
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: bl __aeabi_dcmpun
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB17_6
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bne .LBB17_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: ldr r0, .LCPI17_2
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB17_6:
-; SOFT-NEXT: bne .LBB17_8
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB17_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: ldr r3, [sp] @ 4-byte Reload
+; SOFT-NEXT: b .LBB17_9
; SOFT-NEXT: .LBB17_8:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: bl __aeabi_dcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB17_10
-; SOFT-NEXT: @ %bb.9:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB17_10:
-; SOFT-NEXT: bne .LBB17_12
-; SOFT-NEXT: @ %bb.11:
-; SOFT-NEXT: ldr r4, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: .LBB17_12:
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: lsls r3, r2, #31
+; SOFT-NEXT: .LBB17_9:
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB17_11
+; SOFT-NEXT: @ %bb.10:
+; SOFT-NEXT: ldr r3, .LCPI17_2
+; SOFT-NEXT: .LBB17_11:
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB17_13
+; SOFT-NEXT: @ %bb.12:
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: .LBB17_13:
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: @ %bb.14:
; SOFT-NEXT: .LCPI17_0:
-; SOFT-NEXT: .long 3286237184 @ 0xc3e00000
-; SOFT-NEXT: .LCPI17_1:
; SOFT-NEXT: .long 1138753535 @ 0x43dfffff
+; SOFT-NEXT: .LCPI17_1:
+; SOFT-NEXT: .long 3286237184 @ 0xc3e00000
; SOFT-NEXT: .LCPI17_2:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -2019,119 +2059,122 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #20
-; SOFT-NEXT: sub sp, #20
+; SOFT-NEXT: .pad #28
+; SOFT-NEXT: sub sp, #28
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: mvns r2, r4
; SOFT-NEXT: ldr r3, .LCPI18_0
+; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI18_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: movs r0, #7
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mvns r0, r0
+; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB18_17
+; SOFT-NEXT: bne .LBB18_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB18_18
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB18_2:
-; SOFT-NEXT: beq .LBB18_19
-; SOFT-NEXT: .LBB18_3:
-; SOFT-NEXT: beq .LBB18_5
+; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB18_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB18_4:
-; SOFT-NEXT: mov r0, r3
-; SOFT-NEXT: .LBB18_5:
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: mvns r7, r4
-; SOFT-NEXT: ldr r3, .LCPI18_1
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB18_20
-; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: beq .LBB18_21
-; SOFT-NEXT: .LBB18_7:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: bne .LBB18_9
-; SOFT-NEXT: .LBB18_8:
-; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: .LBB18_9:
; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB18_11
-; SOFT-NEXT: @ %bb.10:
-; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB18_11:
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: bl __aeabi_dcmpun
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: beq .LBB18_22
-; SOFT-NEXT: @ %bb.12:
+; SOFT-NEXT: bne .LBB18_6
+; SOFT-NEXT: @ %bb.5:
+; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: .LBB18_6:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB18_8
+; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: .LBB18_8:
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB18_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: .LBB18_10:
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: beq .LBB18_23
+; SOFT-NEXT: beq .LBB18_18
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB18_19
+; SOFT-NEXT: .LBB18_12:
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB18_14
; SOFT-NEXT: .LBB18_13:
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: beq .LBB18_24
+; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill
; SOFT-NEXT: .LBB18_14:
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bne .LBB18_16
-; SOFT-NEXT: .LBB18_15:
-; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: @ %bb.15:
+; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; SOFT-NEXT: .LBB18_16:
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: add sp, #20
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB18_17:
-; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB18_2
+; SOFT-NEXT: movs r5, #7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB18_20
+; SOFT-NEXT: @ %bb.17:
+; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB18_21
+; SOFT-NEXT: b .LBB18_22
; SOFT-NEXT: .LBB18_18:
-; SOFT-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB18_3
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB18_12
; SOFT-NEXT: .LBB18_19:
-; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB18_4
-; SOFT-NEXT: b .LBB18_5
+; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB18_13
+; SOFT-NEXT: b .LBB18_14
; SOFT-NEXT: .LBB18_20:
-; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: bne .LBB18_7
+; SOFT-NEXT: mvns r7, r5
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB18_22
; SOFT-NEXT: .LBB18_21:
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: beq .LBB18_8
-; SOFT-NEXT: b .LBB18_9
+; SOFT-NEXT: mov r5, r7
; SOFT-NEXT: .LBB18_22:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bne .LBB18_13
-; SOFT-NEXT: .LBB18_23:
-; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bne .LBB18_14
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB18_24
+; SOFT-NEXT: @ %bb.23:
+; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: .LBB18_24:
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB18_15
-; SOFT-NEXT: b .LBB18_16
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: add sp, #28
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.25:
; SOFT-NEXT: .LCPI18_0:
-; SOFT-NEXT: .long 3323985920 @ 0xc6200000
-; SOFT-NEXT: .LCPI18_1:
; SOFT-NEXT: .long 1176502271 @ 0x461fffff
+; SOFT-NEXT: .LCPI18_1:
+; SOFT-NEXT: .long 3323985920 @ 0xc6200000
;
; VFP2-LABEL: test_signed_i100_f64:
; VFP2: @ %bb.0:
@@ -2223,118 +2266,122 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #20
-; SOFT-NEXT: sub sp, #20
+; SOFT-NEXT: .pad #28
+; SOFT-NEXT: sub sp, #28
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: mvns r2, r4
; SOFT-NEXT: ldr r3, .LCPI19_0
+; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; SOFT-NEXT: ldr r3, .LCPI19_1
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: movs r0, #1
-; SOFT-NEXT: lsls r0, r0, #31
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB19_17
+; SOFT-NEXT: bne .LBB19_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB19_18
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB19_2:
-; SOFT-NEXT: beq .LBB19_19
-; SOFT-NEXT: .LBB19_3:
-; SOFT-NEXT: beq .LBB19_5
+; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB19_4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB19_4:
-; SOFT-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: .LBB19_5:
-; SOFT-NEXT: mvns r7, r4
-; SOFT-NEXT: ldr r3, .LCPI19_1
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB19_20
-; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: beq .LBB19_21
-; SOFT-NEXT: .LBB19_7:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: bne .LBB19_9
-; SOFT-NEXT: .LBB19_8:
-; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: .LBB19_9:
; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB19_11
-; SOFT-NEXT: @ %bb.10:
-; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB19_11:
; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: bl __aeabi_dcmpun
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: beq .LBB19_22
-; SOFT-NEXT: @ %bb.12:
+; SOFT-NEXT: bne .LBB19_6
+; SOFT-NEXT: @ %bb.5:
+; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: .LBB19_6:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB19_8
+; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: .LBB19_8:
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB19_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: .LBB19_10:
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: beq .LBB19_23
+; SOFT-NEXT: beq .LBB19_18
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB19_19
+; SOFT-NEXT: .LBB19_12:
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB19_14
; SOFT-NEXT: .LBB19_13:
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: beq .LBB19_24
+; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill
; SOFT-NEXT: .LBB19_14:
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: bne .LBB19_16
-; SOFT-NEXT: .LBB19_15:
-; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: @ %bb.15:
+; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; SOFT-NEXT: .LBB19_16:
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: add sp, #20
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB19_17:
-; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB19_2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB19_20
+; SOFT-NEXT: @ %bb.17:
+; SOFT-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB19_21
+; SOFT-NEXT: b .LBB19_22
; SOFT-NEXT: .LBB19_18:
-; SOFT-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB19_3
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB19_12
; SOFT-NEXT: .LBB19_19:
-; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB19_4
-; SOFT-NEXT: b .LBB19_5
+; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB19_13
+; SOFT-NEXT: b .LBB19_14
; SOFT-NEXT: .LBB19_20:
-; SOFT-NEXT: ldr r0, .LCPI19_2
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: bne .LBB19_7
+; SOFT-NEXT: movs r5, #1
+; SOFT-NEXT: lsls r5, r5, #31
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB19_22
; SOFT-NEXT: .LBB19_21:
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: beq .LBB19_8
-; SOFT-NEXT: b .LBB19_9
+; SOFT-NEXT: ldr r5, .LCPI19_2
; SOFT-NEXT: .LBB19_22:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bne .LBB19_13
-; SOFT-NEXT: .LBB19_23:
-; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bne .LBB19_14
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB19_24
+; SOFT-NEXT: @ %bb.23:
+; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: .LBB19_24:
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB19_15
-; SOFT-NEXT: b .LBB19_16
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: add sp, #28
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.25:
; SOFT-NEXT: .LCPI19_0:
-; SOFT-NEXT: .long 3353346048 @ 0xc7e00000
-; SOFT-NEXT: .LCPI19_1:
; SOFT-NEXT: .long 1205862399 @ 0x47dfffff
+; SOFT-NEXT: .LCPI19_1:
+; SOFT-NEXT: .long 3353346048 @ 0xc7e00000
; SOFT-NEXT: .LCPI19_2:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -2917,10 +2964,10 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; VFP2-NEXT: movlt r0, #0
; VFP2-NEXT: movtlt r0, #65532
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: itt gt
; VFP2-NEXT: movwgt r0, #65535
; VFP2-NEXT: movtgt r0, #3
+; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
@@ -3031,69 +3078,73 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI26_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: movs r0, #27
; SOFT-NEXT: lsls r1, r0, #27
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __aeabi_f2lz
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB26_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: .LBB26_2:
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: beq .LBB26_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: b .LBB26_5
+; SOFT-NEXT: mvns r4, r6
; SOFT-NEXT: .LBB26_4:
-; SOFT-NEXT: ldr r0, .LCPI26_0
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: .LBB26_5:
-; SOFT-NEXT: ldr r1, .LCPI26_1
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r7, #0
-; SOFT-NEXT: mvns r6, r7
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB26_7
-; SOFT-NEXT: @ %bb.6:
-; SOFT-NEXT: ldr r0, .LCPI26_2
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: .LBB26_7:
-; SOFT-NEXT: bne .LBB26_9
-; SOFT-NEXT: @ %bb.8:
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: .LBB26_9:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: bne .LBB26_11
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: bne .LBB26_6
+; SOFT-NEXT: @ %bb.5:
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: .LBB26_6:
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB26_8
+; SOFT-NEXT: @ %bb.7:
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: b .LBB26_9
+; SOFT-NEXT: .LBB26_8:
+; SOFT-NEXT: ldr r3, .LCPI26_1
+; SOFT-NEXT: .LBB26_9:
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB26_11
; SOFT-NEXT: @ %bb.10:
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: ldr r3, .LCPI26_2
; SOFT-NEXT: .LBB26_11:
+; SOFT-NEXT: cmp r1, #0
; SOFT-NEXT: bne .LBB26_13
; SOFT-NEXT: @ %bb.12:
-; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: mov r6, r3
; SOFT-NEXT: .LBB26_13:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.14:
; SOFT-NEXT: .LCPI26_0:
-; SOFT-NEXT: .long 4294836224 @ 0xfffe0000
-; SOFT-NEXT: .LCPI26_1:
; SOFT-NEXT: .long 1476395007 @ 0x57ffffff
+; SOFT-NEXT: .LCPI26_1:
+; SOFT-NEXT: .long 4294836224 @ 0xfffe0000
; SOFT-NEXT: .LCPI26_2:
; SOFT-NEXT: .long 131071 @ 0x1ffff
;
@@ -3115,12 +3166,11 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
; VFP2-NEXT: movlt r0, #0
; VFP2-NEXT: vcmp.f32 s2, s4
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt.w r0, #-1
-; VFP2-NEXT: vcmp.f32 s2, s2
-; VFP2-NEXT: itt gt
+; VFP2-NEXT: ittt gt
; VFP2-NEXT: movwgt r1, #65535
; VFP2-NEXT: movtgt r1, #1
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
; VFP2-NEXT: itt vs
; VFP2-NEXT: movvs r0, #0
@@ -3153,12 +3203,11 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
; FP16-NEXT: movlt r0, #0
; FP16-NEXT: vcmp.f32 s16, s2
; FP16-NEXT: vmrs APSR_nzcv, fpscr
-; FP16-NEXT: it gt
-; FP16-NEXT: movgt.w r0, #-1
-; FP16-NEXT: vcmp.f32 s16, s16
-; FP16-NEXT: itt gt
+; FP16-NEXT: ittt gt
; FP16-NEXT: movwgt r1, #65535
; FP16-NEXT: movtgt r1, #1
+; FP16-NEXT: movgt.w r0, #-1
+; FP16-NEXT: vcmp.f32 s16, s16
; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: itt vs
; FP16-NEXT: movvs r0, #0
@@ -3180,62 +3229,70 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #4
-; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI27_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: movs r0, #223
; SOFT-NEXT: lsls r1, r0, #24
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: lsls r7, r2, #31
-; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB27_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r4, r5
; SOFT-NEXT: .LBB27_2:
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r0, #0
; SOFT-NEXT: beq .LBB27_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: mvns r4, r6
; SOFT-NEXT: .LBB27_4:
-; SOFT-NEXT: ldr r1, .LCPI27_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mvns r5, r6
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB27_6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: bne .LBB27_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: ldr r7, .LCPI27_1
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB27_6:
-; SOFT-NEXT: bne .LBB27_8
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB27_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: b .LBB27_9
; SOFT-NEXT: .LBB27_8:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bl __aeabi_fcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bne .LBB27_10
-; SOFT-NEXT: @ %bb.9:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: .LBB27_10:
-; SOFT-NEXT: bne .LBB27_12
-; SOFT-NEXT: @ %bb.11:
-; SOFT-NEXT: mov r6, r7
-; SOFT-NEXT: .LBB27_12:
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: lsls r3, r2, #31
+; SOFT-NEXT: .LBB27_9:
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB27_11
+; SOFT-NEXT: @ %bb.10:
+; SOFT-NEXT: ldr r3, .LCPI27_1
+; SOFT-NEXT: .LBB27_11:
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: bne .LBB27_13
+; SOFT-NEXT: @ %bb.12:
+; SOFT-NEXT: mov r6, r3
+; SOFT-NEXT: .LBB27_13:
; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: @ %bb.14:
; SOFT-NEXT: .LCPI27_0:
; SOFT-NEXT: .long 1593835519 @ 0x5effffff
; SOFT-NEXT: .LCPI27_1:
@@ -3322,106 +3379,110 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; SOFT-NEXT: sub sp, #20
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI28_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r0, #241
; SOFT-NEXT: lsls r1, r0, #24
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r5, #7
-; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mvns r7, r5
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: beq .LBB28_17
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB28_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB28_18
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB28_2:
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: mvns r1, r5
+; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: bne .LBB28_4
-; SOFT-NEXT: .LBB28_3:
-; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: .LBB28_4:
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB28_6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: bne .LBB28_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: .LBB28_6:
-; SOFT-NEXT: ldr r1, .LCPI28_0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mvns r5, r6
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB28_19
+; SOFT-NEXT: .LBB28_6:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB28_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: beq .LBB28_20
+; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: .LBB28_8:
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: beq .LBB28_21
-; SOFT-NEXT: .LBB28_9:
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB28_11
+; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bne .LBB28_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB28_10:
-; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB28_11:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bl __aeabi_fcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: beq .LBB28_22
-; SOFT-NEXT: @ %bb.12:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB28_23
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: beq .LBB28_18
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB28_19
+; SOFT-NEXT: .LBB28_12:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB28_14
; SOFT-NEXT: .LBB28_13:
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: beq .LBB28_24
+; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB28_14:
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r2, r5
; SOFT-NEXT: bne .LBB28_16
-; SOFT-NEXT: .LBB28_15:
-; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: @ %bb.15:
+; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: .LBB28_16:
-; SOFT-NEXT: mov r3, r6
-; SOFT-NEXT: add sp, #20
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB28_17:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bne .LBB28_2
+; SOFT-NEXT: movs r4, #7
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB28_20
+; SOFT-NEXT: @ %bb.17:
+; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
+; SOFT-NEXT: b .LBB28_21
; SOFT-NEXT: .LBB28_18:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB28_3
-; SOFT-NEXT: b .LBB28_4
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB28_12
; SOFT-NEXT: .LBB28_19:
-; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bne .LBB28_8
+; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB28_13
+; SOFT-NEXT: b .LBB28_14
; SOFT-NEXT: .LBB28_20:
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: bne .LBB28_9
+; SOFT-NEXT: mvns r7, r4
; SOFT-NEXT: .LBB28_21:
-; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB28_10
-; SOFT-NEXT: b .LBB28_11
-; SOFT-NEXT: .LBB28_22:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bne .LBB28_13
+; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB28_23
+; SOFT-NEXT: @ %bb.22:
+; SOFT-NEXT: mov r4, r7
; SOFT-NEXT: .LBB28_23:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bne .LBB28_14
-; SOFT-NEXT: .LBB28_24:
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB28_15
-; SOFT-NEXT: b .LBB28_16
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB28_25
+; SOFT-NEXT: @ %bb.24:
+; SOFT-NEXT: mov r5, r4
+; SOFT-NEXT: .LBB28_25:
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: add sp, #20
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.25:
+; SOFT-NEXT: @ %bb.26:
; SOFT-NEXT: .LCPI28_0:
; SOFT-NEXT: .long 1895825407 @ 0x70ffffff
;
@@ -3518,104 +3579,109 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; SOFT-NEXT: sub sp, #20
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI29_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r0, #255
; SOFT-NEXT: lsls r1, r0, #24
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r5, #1
-; SOFT-NEXT: lsls r7, r5, #31
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: beq .LBB29_18
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB29_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB29_19
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: .LBB29_2:
+; SOFT-NEXT: movs r5, #0
+; SOFT-NEXT: mvns r1, r5
+; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: bne .LBB29_4
-; SOFT-NEXT: .LBB29_3:
-; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: .LBB29_4:
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB29_6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bl __aeabi_fcmpun
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: bne .LBB29_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: .LBB29_6:
-; SOFT-NEXT: ldr r1, .LCPI29_0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mvns r5, r6
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB29_8
+; SOFT-NEXT: .LBB29_6:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB29_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r7, .LCPI29_1
+; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: .LBB29_8:
-; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: beq .LBB29_20
+; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bne .LBB29_10
; SOFT-NEXT: @ %bb.9:
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: beq .LBB29_21
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB29_10:
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB29_12
-; SOFT-NEXT: .LBB29_11:
-; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: beq .LBB29_18
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB29_19
; SOFT-NEXT: .LBB29_12:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: bl __aeabi_fcmpun
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: beq .LBB29_22
-; SOFT-NEXT: @ %bb.13:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB29_23
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB29_14
+; SOFT-NEXT: .LBB29_13:
+; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB29_14:
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: bne .LBB29_16
+; SOFT-NEXT: @ %bb.15:
; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: beq .LBB29_24
-; SOFT-NEXT: .LBB29_15:
-; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: bne .LBB29_17
; SOFT-NEXT: .LBB29_16:
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: .LBB29_17:
-; SOFT-NEXT: mov r3, r6
-; SOFT-NEXT: add sp, #20
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB29_20
+; SOFT-NEXT: @ %bb.17:
+; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB29_21
+; SOFT-NEXT: b .LBB29_22
; SOFT-NEXT: .LBB29_18:
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bne .LBB29_2
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB29_12
; SOFT-NEXT: .LBB29_19:
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: beq .LBB29_3
-; SOFT-NEXT: b .LBB29_4
+; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB29_13
+; SOFT-NEXT: b .LBB29_14
; SOFT-NEXT: .LBB29_20:
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: bne .LBB29_10
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: lsls r6, r4, #31
+; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB29_22
; SOFT-NEXT: .LBB29_21:
-; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB29_11
-; SOFT-NEXT: b .LBB29_12
+; SOFT-NEXT: ldr r6, .LCPI29_1
; SOFT-NEXT: .LBB29_22:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bne .LBB29_14
-; SOFT-NEXT: .LBB29_23:
-; SOFT-NEXT: mov r1, r7
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bne .LBB29_15
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB29_24
+; SOFT-NEXT: @ %bb.23:
+; SOFT-NEXT: mov r5, r6
; SOFT-NEXT: .LBB29_24:
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB29_16
-; SOFT-NEXT: b .LBB29_17
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: add sp, #20
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.25:
; SOFT-NEXT: .LCPI29_0:
diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
index 4cc5f943dadff1..14eb67104eddac 100644
--- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
@@ -24,38 +24,41 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
; SOFT-NEXT: .save {r4, r5, r6, lr}
; SOFT-NEXT: push {r4, r5, r6, lr}
; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: movs r0, #127
+; SOFT-NEXT: lsls r1, r0, #23
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: bl __aeabi_fcmpge
; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB0_2
+; SOFT-NEXT: beq .LBB0_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB0_4
; SOFT-NEXT: .LBB0_2:
-; SOFT-NEXT: movs r0, #127
-; SOFT-NEXT: lsls r1, r0, #23
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB0_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB0_3:
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB0_2
; SOFT-NEXT: .LBB0_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: pop {r4, r5, r6, pc}
;
; VFP2-LABEL: test_signed_i1_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vmov.f32 s0, #1.000000e+00
+; VFP2-NEXT: vcvt.u32.f32 s4, s2
+; VFP2-NEXT: vcmp.f32 s2, #0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmov r0, s4
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r0, #0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
@@ -86,27 +89,29 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, lr}
; SOFT-NEXT: push {r4, r5, r6, lr}
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI1_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB1_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB1_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB1_4
; SOFT-NEXT: .LBB1_2:
-; SOFT-NEXT: ldr r1, .LCPI1_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB1_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB1_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: movs r4, #255
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB1_2
; SOFT-NEXT: .LBB1_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: movs r0, #255
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -158,27 +163,29 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, lr}
; SOFT-NEXT: push {r4, r5, r6, lr}
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI2_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB2_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB2_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB2_4
; SOFT-NEXT: .LBB2_2:
-; SOFT-NEXT: ldr r1, .LCPI2_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB2_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB2_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: ldr r4, .LCPI2_1
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB2_2
; SOFT-NEXT: .LBB2_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: ldr r0, .LCPI2_1
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -232,27 +239,29 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, lr}
; SOFT-NEXT: push {r4, r5, r6, lr}
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI3_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB3_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB3_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB3_4
; SOFT-NEXT: .LBB3_2:
-; SOFT-NEXT: ldr r1, .LCPI3_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB3_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB3_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: ldr r4, .LCPI3_1
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB3_2
; SOFT-NEXT: .LBB3_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: ldr r0, .LCPI3_1
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -306,27 +315,29 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, lr}
; SOFT-NEXT: push {r4, r5, r6, lr}
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI4_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB4_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB4_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB4_4
; SOFT-NEXT: .LBB4_2:
-; SOFT-NEXT: ldr r1, .LCPI4_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB4_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB4_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: ldr r4, .LCPI4_1
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB4_2
; SOFT-NEXT: .LBB4_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: ldr r0, .LCPI4_1
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -383,28 +394,31 @@ define i32 @test_signed_i32_f32(float %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI5_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r5, r0
; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB5_2
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB5_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB5_4
; SOFT-NEXT: .LBB5_2:
-; SOFT-NEXT: ldr r1, .LCPI5_0
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB5_3:
; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB5_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mvns r5, r4
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB5_2
; SOFT-NEXT: .LBB5_4:
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mvns r0, r4
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
@@ -429,39 +443,45 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bl __aeabi_fcmpge
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI6_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bl __aeabi_fcmpge
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2ulz
-; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB6_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB6_5
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB6_6
; SOFT-NEXT: .LBB6_2:
-; SOFT-NEXT: bne .LBB6_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB6_7
+; SOFT-NEXT: .LBB6_3:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB6_8
; SOFT-NEXT: .LBB6_4:
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: ldr r1, .LCPI6_0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mov r1, r0
-; SOFT-NEXT: mvns r0, r5
-; SOFT-NEXT: cmp r1, #0
-; SOFT-NEXT: bne .LBB6_6
-; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB6_5:
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB6_2
; SOFT-NEXT: .LBB6_6:
-; SOFT-NEXT: beq .LBB6_8
-; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r7, .LCPI6_1
+; SOFT-NEXT: mvns r0, r6
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB6_3
+; SOFT-NEXT: .LBB6_7:
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB6_4
; SOFT-NEXT: .LBB6_8:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: ldr r1, .LCPI6_1
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
@@ -508,43 +528,48 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: movs r4, #0
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI7_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fcmpge
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2ulz
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB7_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: .LBB7_2:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB7_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: .LBB7_4:
-; SOFT-NEXT: ldr r1, .LCPI7_0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mvns r1, r4
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: mvns r2, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: beq .LBB7_7
-; SOFT-NEXT: @ %bb.5:
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB7_8
+; SOFT-NEXT: .LBB7_4:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB7_6
+; SOFT-NEXT: .LBB7_5:
+; SOFT-NEXT: mov r2, r1
; SOFT-NEXT: .LBB7_6:
+; SOFT-NEXT: mov r1, r2
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB7_7:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: bne .LBB7_6
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB7_4
; SOFT-NEXT: .LBB7_8:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: add sp, #4
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB7_5
+; SOFT-NEXT: b .LBB7_6
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.9:
; SOFT-NEXT: .LCPI7_0:
@@ -584,71 +609,76 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI8_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __fixunssfti
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB8_11
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __fixunssfti
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB8_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB8_12
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB8_2:
+; SOFT-NEXT: mvns r2, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r6, r2
; SOFT-NEXT: bne .LBB8_4
-; SOFT-NEXT: .LBB8_3:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: .LBB8_4:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB8_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB8_6:
-; SOFT-NEXT: ldr r1, .LCPI8_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mvns r2, r6
-; SOFT-NEXT: movs r3, #15
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r2
; SOFT-NEXT: beq .LBB8_13
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB8_14
; SOFT-NEXT: .LBB8_8:
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB8_15
; SOFT-NEXT: .LBB8_9:
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB8_16
; SOFT-NEXT: .LBB8_10:
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB8_12
; SOFT-NEXT: .LBB8_11:
-; SOFT-NEXT: mov r3, r7
-; SOFT-NEXT: bne .LBB8_2
+; SOFT-NEXT: movs r3, #15
; SOFT-NEXT: .LBB8_12:
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: beq .LBB8_3
-; SOFT-NEXT: b .LBB8_4
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB8_13:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB8_8
; SOFT-NEXT: .LBB8_14:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB8_9
; SOFT-NEXT: .LBB8_15:
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB8_10
; SOFT-NEXT: .LBB8_16:
-; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB8_11
+; SOFT-NEXT: b .LBB8_12
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI8_0:
@@ -692,72 +722,75 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI9_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __fixunssfti
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB9_11
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __fixunssfti
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB9_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB9_12
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB9_2:
+; SOFT-NEXT: mvns r6, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: bne .LBB9_4
-; SOFT-NEXT: .LBB9_3:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: .LBB9_4:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB9_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB9_6:
-; SOFT-NEXT: ldr r1, .LCPI9_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mvns r3, r6
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r3
-; SOFT-NEXT: beq .LBB9_13
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bne .LBB9_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r1, r3
-; SOFT-NEXT: beq .LBB9_14
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: .LBB9_8:
-; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB9_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: .LBB9_10:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r7, r6
; SOFT-NEXT: beq .LBB9_15
-; SOFT-NEXT: .LBB9_9:
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB9_16
-; SOFT-NEXT: .LBB9_10:
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB9_11:
-; SOFT-NEXT: mov r3, r7
-; SOFT-NEXT: bne .LBB9_2
; SOFT-NEXT: .LBB9_12:
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: beq .LBB9_3
-; SOFT-NEXT: b .LBB9_4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB9_14
; SOFT-NEXT: .LBB9_13:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r3
-; SOFT-NEXT: bne .LBB9_8
+; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload
; SOFT-NEXT: .LBB9_14:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: mov r2, r3
-; SOFT-NEXT: bne .LBB9_9
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB9_15:
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: bne .LBB9_10
+; SOFT-NEXT: mov r7, r2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB9_12
; SOFT-NEXT: .LBB9_16:
-; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB9_13
+; SOFT-NEXT: b .LBB9_14
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI9_0:
@@ -819,32 +852,33 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r7, #0
+; SOFT-NEXT: ldr r3, .LCPI10_0
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: bl __aeabi_dcmpgt
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r7
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_d2uiz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB10_2
+; SOFT-NEXT: beq .LBB10_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB10_2:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: ldr r3, .LCPI10_0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB10_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: .LBB10_2:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB10_3:
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB10_2
; SOFT-NEXT: .LBB10_4:
; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: add sp, #4
@@ -897,32 +931,33 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r7, #0
+; SOFT-NEXT: ldr r3, .LCPI11_0
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: bl __aeabi_dcmpgt
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r7
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_d2uiz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB11_2
+; SOFT-NEXT: beq .LBB11_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB11_2:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: ldr r3, .LCPI11_0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB11_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: .LBB11_2:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB11_3:
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB11_2
; SOFT-NEXT: .LBB11_4:
; SOFT-NEXT: movs r0, #255
; SOFT-NEXT: add sp, #4
@@ -983,32 +1018,33 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r7, #0
+; SOFT-NEXT: ldr r3, .LCPI12_0
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: bl __aeabi_dcmpgt
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r7
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_d2uiz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB12_2
+; SOFT-NEXT: beq .LBB12_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB12_2:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: ldr r3, .LCPI12_0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB12_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: .LBB12_2:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB12_3:
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB12_2
; SOFT-NEXT: .LBB12_4:
; SOFT-NEXT: ldr r0, .LCPI12_1
; SOFT-NEXT: add sp, #4
@@ -1071,32 +1107,33 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r7, #0
+; SOFT-NEXT: ldr r3, .LCPI13_0
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: bl __aeabi_dcmpgt
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r7
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_d2uiz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB13_2
+; SOFT-NEXT: beq .LBB13_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB13_2:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: ldr r3, .LCPI13_0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB13_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: .LBB13_2:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB13_3:
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB13_2
; SOFT-NEXT: .LBB13_4:
; SOFT-NEXT: ldr r0, .LCPI13_1
; SOFT-NEXT: add sp, #4
@@ -1159,32 +1196,33 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: movs r7, #0
+; SOFT-NEXT: ldr r3, .LCPI14_0
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: bl __aeabi_dcmpgt
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r6, #0
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r7
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_d2uiz
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB14_2
+; SOFT-NEXT: beq .LBB14_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB14_2:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: ldr r3, .LCPI14_0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r2, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB14_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: .LBB14_2:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB14_3:
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB14_2
; SOFT-NEXT: .LBB14_4:
; SOFT-NEXT: ldr r0, .LCPI14_1
; SOFT-NEXT: add sp, #4
@@ -1248,34 +1286,32 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: mov r5, r1
-; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r2, .LCPI15_0
+; SOFT-NEXT: ldr r3, .LCPI15_1
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: mov r2, r4
; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: bl __aeabi_dcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: bl __aeabi_d2uiz
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: cmp r6, #0
; SOFT-NEXT: bne .LBB15_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: .LBB15_2:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: ldr r2, .LCPI15_0
-; SOFT-NEXT: ldr r3, .LCPI15_1
; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB15_4
+; SOFT-NEXT: .LBB15_2:
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: beq .LBB15_4
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: add sp, #4
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB15_4:
; SOFT-NEXT: mvns r0, r4
+; SOFT-NEXT: .LBB15_4:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
@@ -1309,47 +1345,49 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r6, r1
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: bl __aeabi_dcmpge
+; SOFT-NEXT: mov r7, r1
; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: movs r0, #7
+; SOFT-NEXT: mvns r2, r0
+; SOFT-NEXT: ldr r3, .LCPI16_0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: bl __aeabi_dcmpge
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __aeabi_d2ulz
-; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB16_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB16_2:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB16_4
+; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB16_6
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: .LBB16_4:
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: movs r0, #7
-; SOFT-NEXT: mvns r2, r0
-; SOFT-NEXT: ldr r3, .LCPI16_0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: mov r1, r0
-; SOFT-NEXT: mvns r0, r5
-; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB16_7
-; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: .LBB16_4:
+; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: bne .LBB16_8
-; SOFT-NEXT: .LBB16_6:
+; SOFT-NEXT: .LBB16_5:
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB16_6:
+; SOFT-NEXT: mvns r0, r6
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB16_4
; SOFT-NEXT: .LBB16_7:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB16_6
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB16_5
; SOFT-NEXT: .LBB16_8:
; SOFT-NEXT: ldr r1, .LCPI16_1
; SOFT-NEXT: add sp, #4
@@ -1420,46 +1458,54 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #4
; SOFT-NEXT: sub sp, #4
-; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r7, r1
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r7, #0
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mvns r4, r6
+; SOFT-NEXT: ldr r3, .LCPI17_0
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: bl __aeabi_dcmpge
; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __aeabi_d2ulz
+; SOFT-NEXT: mov r2, r0
; SOFT-NEXT: cmp r6, #0
; SOFT-NEXT: bne .LBB17_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: .LBB17_2:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB17_4
+; SOFT-NEXT: ldr r3, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: beq .LBB17_7
; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB17_8
; SOFT-NEXT: .LBB17_4:
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mvns r7, r7
-; SOFT-NEXT: ldr r3, .LCPI17_0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: cmp r3, #0
; SOFT-NEXT: bne .LBB17_6
-; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: .LBB17_5:
+; SOFT-NEXT: mov r4, r1
; SOFT-NEXT: .LBB17_6:
-; SOFT-NEXT: bne .LBB17_8
-; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload
-; SOFT-NEXT: .LBB17_8:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB17_7:
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB17_4
+; SOFT-NEXT: .LBB17_8:
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: beq .LBB17_5
+; SOFT-NEXT: b .LBB17_6
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.9:
; SOFT-NEXT: .LCPI17_0:
@@ -1527,76 +1573,82 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
-; SOFT-NEXT: mov r6, r1
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: mov r7, r1
; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mvns r2, r6
+; SOFT-NEXT: ldr r3, .LCPI18_0
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: bl __aeabi_dcmpge
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __fixunsdfti
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB18_12
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB18_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB18_13
+; SOFT-NEXT: mov r4, r6
; SOFT-NEXT: .LBB18_2:
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
; SOFT-NEXT: bne .LBB18_4
-; SOFT-NEXT: .LBB18_3:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB18_4:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r6, #0
; SOFT-NEXT: bne .LBB18_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r7, r6
; SOFT-NEXT: .LBB18_6:
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mvns r5, r5
-; SOFT-NEXT: ldr r3, .LCPI18_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: movs r3, #15
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: beq .LBB18_14
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: ldr r4, [sp] @ 4-byte Reload
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: beq .LBB18_13
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: beq .LBB18_15
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB18_14
; SOFT-NEXT: .LBB18_8:
-; SOFT-NEXT: beq .LBB18_16
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB18_15
; SOFT-NEXT: .LBB18_9:
-; SOFT-NEXT: bne .LBB18_11
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB18_16
; SOFT-NEXT: .LBB18_10:
-; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB18_12
; SOFT-NEXT: .LBB18_11:
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: movs r3, #15
; SOFT-NEXT: .LBB18_12:
-; SOFT-NEXT: mov r3, r7
-; SOFT-NEXT: bne .LBB18_2
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB18_13:
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: beq .LBB18_3
-; SOFT-NEXT: b .LBB18_4
-; SOFT-NEXT: .LBB18_14:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: cmp r6, #0
; SOFT-NEXT: bne .LBB18_8
-; SOFT-NEXT: .LBB18_15:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: .LBB18_14:
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB18_9
+; SOFT-NEXT: .LBB18_15:
+; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB18_10
; SOFT-NEXT: .LBB18_16:
-; SOFT-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB18_10
-; SOFT-NEXT: b .LBB18_11
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB18_11
+; SOFT-NEXT: b .LBB18_12
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI18_0:
@@ -1672,77 +1724,78 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
-; SOFT-NEXT: mov r6, r1
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mvns r4, r6
+; SOFT-NEXT: ldr r3, .LCPI19_0
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: bl __aeabi_dcmpgt
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: bl __aeabi_dcmpge
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: bl __fixunsdfti
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB19_12
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB19_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB19_13
+; SOFT-NEXT: mov r5, r6
; SOFT-NEXT: .LBB19_2:
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: bne .LBB19_4
-; SOFT-NEXT: .LBB19_3:
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB19_4:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r6, #0
; SOFT-NEXT: bne .LBB19_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r7, r6
; SOFT-NEXT: .LBB19_6:
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mvns r5, r5
-; SOFT-NEXT: ldr r3, .LCPI19_0
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: mov r1, r6
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: bl __aeabi_dcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: beq .LBB19_14
+; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, #0
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: bne .LBB19_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: beq .LBB19_15
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: .LBB19_8:
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: beq .LBB19_16
-; SOFT-NEXT: .LBB19_9:
-; SOFT-NEXT: bne .LBB19_11
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB19_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: .LBB19_10:
-; SOFT-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: .LBB19_11:
-; SOFT-NEXT: mov r3, r5
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: ldr r5, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: mov r5, r4
+; SOFT-NEXT: bne .LBB19_12
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: mov r5, r2
; SOFT-NEXT: .LBB19_12:
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB19_2
-; SOFT-NEXT: .LBB19_13:
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: beq .LBB19_3
-; SOFT-NEXT: b .LBB19_4
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: bne .LBB19_14
+; SOFT-NEXT: @ %bb.13:
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: .LBB19_14:
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bne .LBB19_8
-; SOFT-NEXT: .LBB19_15:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: bne .LBB19_9
+; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB19_16
+; SOFT-NEXT: @ %bb.15:
+; SOFT-NEXT: mov r4, r3
; SOFT-NEXT: .LBB19_16:
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB19_10
-; SOFT-NEXT: b .LBB19_11
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI19_0:
@@ -1836,27 +1889,30 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: movs r0, #127
+; SOFT-NEXT: lsls r1, r0, #23
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: bl __aeabi_fcmpge
; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB20_2
+; SOFT-NEXT: beq .LBB20_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB20_4
; SOFT-NEXT: .LBB20_2:
-; SOFT-NEXT: movs r0, #127
-; SOFT-NEXT: lsls r1, r0, #23
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB20_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB20_3:
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB20_2
; SOFT-NEXT: .LBB20_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: pop {r4, r5, r6, pc}
;
; VFP2-LABEL: test_signed_i1_f16:
@@ -1864,13 +1920,13 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vmov.f32 s0, #1.000000e+00
+; VFP2-NEXT: vcvt.u32.f32 s4, s2
+; VFP2-NEXT: vcmp.f32 s2, #0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmov r0, s4
; VFP2-NEXT: it lt
; VFP2-NEXT: movlt r0, #0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
@@ -1904,27 +1960,29 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI21_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB21_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB21_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB21_4
; SOFT-NEXT: .LBB21_2:
-; SOFT-NEXT: ldr r1, .LCPI21_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB21_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB21_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: movs r4, #255
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB21_2
; SOFT-NEXT: .LBB21_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: movs r0, #255
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -1982,27 +2040,29 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI22_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB22_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB22_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB22_4
; SOFT-NEXT: .LBB22_2:
-; SOFT-NEXT: ldr r1, .LCPI22_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB22_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB22_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: ldr r4, .LCPI22_1
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB22_2
; SOFT-NEXT: .LBB22_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: ldr r0, .LCPI22_1
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -2062,27 +2122,29 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI23_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB23_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB23_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB23_4
; SOFT-NEXT: .LBB23_2:
-; SOFT-NEXT: ldr r1, .LCPI23_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB23_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB23_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: ldr r4, .LCPI23_1
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB23_2
; SOFT-NEXT: .LBB23_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: ldr r0, .LCPI23_1
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -2142,27 +2204,29 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; SOFT-NEXT: push {r4, r5, r6, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: ldr r1, .LCPI24_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r6
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: cmp r6, #0
-; SOFT-NEXT: bne .LBB24_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB24_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB24_4
; SOFT-NEXT: .LBB24_2:
-; SOFT-NEXT: ldr r1, .LCPI24_0
+; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: .LBB24_3:
; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB24_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: ldr r4, .LCPI24_1
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB24_2
; SOFT-NEXT: .LBB24_4:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: ldr r0, .LCPI24_1
; SOFT-NEXT: pop {r4, r5, r6, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.5:
@@ -2225,28 +2289,31 @@ define i32 @test_signed_i32_f16(half %f) nounwind {
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI25_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r5, r0
; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2uiz
-; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bne .LBB25_2
+; SOFT-NEXT: cmp r6, #0
+; SOFT-NEXT: beq .LBB25_3
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB25_4
; SOFT-NEXT: .LBB25_2:
-; SOFT-NEXT: ldr r1, .LCPI25_0
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB25_3:
; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB25_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mvns r5, r4
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB25_2
; SOFT-NEXT: .LBB25_4:
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mvns r0, r4
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
@@ -2283,39 +2350,45 @@ define i50 @test_signed_i50_f16(half %f) nounwind {
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bl __aeabi_fcmpge
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI26_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bl __aeabi_fcmpge
+; SOFT-NEXT: mov r5, r0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2ulz
-; SOFT-NEXT: mov r7, r1
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB26_2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB26_5
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r7, r4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB26_6
; SOFT-NEXT: .LBB26_2:
-; SOFT-NEXT: bne .LBB26_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: beq .LBB26_7
+; SOFT-NEXT: .LBB26_3:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB26_8
; SOFT-NEXT: .LBB26_4:
-; SOFT-NEXT: mov r4, r0
-; SOFT-NEXT: ldr r1, .LCPI26_0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mov r1, r0
-; SOFT-NEXT: mvns r0, r5
-; SOFT-NEXT: cmp r1, #0
-; SOFT-NEXT: bne .LBB26_6
-; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: .LBB26_5:
+; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB26_2
; SOFT-NEXT: .LBB26_6:
-; SOFT-NEXT: beq .LBB26_8
-; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: ldr r7, .LCPI26_1
+; SOFT-NEXT: mvns r0, r6
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB26_3
+; SOFT-NEXT: .LBB26_7:
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB26_4
; SOFT-NEXT: .LBB26_8:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: ldr r1, .LCPI26_1
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
@@ -2392,43 +2465,48 @@ define i64 @test_signed_i64_f16(half %f) nounwind {
; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: movs r4, #0
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI27_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fcmpge
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: bl __aeabi_f2ulz
-; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB27_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: .LBB27_2:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB27_4
-; SOFT-NEXT: @ %bb.3:
-; SOFT-NEXT: mov r7, r5
-; SOFT-NEXT: .LBB27_4:
-; SOFT-NEXT: ldr r1, .LCPI27_0
-; SOFT-NEXT: mov r0, r6
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mvns r1, r4
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: mvns r2, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: beq .LBB27_7
-; SOFT-NEXT: @ %bb.5:
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB27_8
+; SOFT-NEXT: .LBB27_4:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB27_6
+; SOFT-NEXT: .LBB27_5:
+; SOFT-NEXT: mov r2, r1
; SOFT-NEXT: .LBB27_6:
+; SOFT-NEXT: mov r1, r2
; SOFT-NEXT: add sp, #4
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB27_7:
-; SOFT-NEXT: mov r0, r7
-; SOFT-NEXT: bne .LBB27_6
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB27_4
; SOFT-NEXT: .LBB27_8:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: add sp, #4
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB27_5
+; SOFT-NEXT: b .LBB27_6
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.9:
; SOFT-NEXT: .LCPI27_0:
@@ -2495,73 +2573,78 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI28_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __fixunssfti
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB28_11
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __fixunssfti
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB28_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB28_12
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB28_2:
+; SOFT-NEXT: mvns r2, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r6, r2
; SOFT-NEXT: bne .LBB28_4
-; SOFT-NEXT: .LBB28_3:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: .LBB28_4:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB28_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB28_6:
-; SOFT-NEXT: ldr r1, .LCPI28_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mvns r2, r6
-; SOFT-NEXT: movs r3, #15
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r2
; SOFT-NEXT: beq .LBB28_13
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB28_14
; SOFT-NEXT: .LBB28_8:
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: beq .LBB28_15
; SOFT-NEXT: .LBB28_9:
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB28_16
; SOFT-NEXT: .LBB28_10:
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB28_12
; SOFT-NEXT: .LBB28_11:
-; SOFT-NEXT: mov r3, r7
-; SOFT-NEXT: bne .LBB28_2
+; SOFT-NEXT: movs r3, #15
; SOFT-NEXT: .LBB28_12:
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: beq .LBB28_3
-; SOFT-NEXT: b .LBB28_4
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB28_13:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB28_8
; SOFT-NEXT: .LBB28_14:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
+; SOFT-NEXT: str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
; SOFT-NEXT: bne .LBB28_9
; SOFT-NEXT: .LBB28_15:
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB28_10
; SOFT-NEXT: .LBB28_16:
-; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB28_11
+; SOFT-NEXT: b .LBB28_12
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI28_0:
@@ -2636,74 +2719,77 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; SOFT: @ %bb.0:
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #12
-; SOFT-NEXT: sub sp, #12
+; SOFT-NEXT: .pad #4
+; SOFT-NEXT: sub sp, #4
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: mov r7, r0
+; SOFT-NEXT: ldr r1, .LCPI29_0
+; SOFT-NEXT: bl __aeabi_fcmpgt
; SOFT-NEXT: mov r4, r0
; SOFT-NEXT: movs r6, #0
+; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fcmpge
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __fixunssfti
; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: beq .LBB29_11
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bl __fixunssfti
+; SOFT-NEXT: mov r7, r1
+; SOFT-NEXT: str r3, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB29_2
; SOFT-NEXT: @ %bb.1:
-; SOFT-NEXT: beq .LBB29_12
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB29_2:
+; SOFT-NEXT: mvns r6, r6
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r3, r6
; SOFT-NEXT: bne .LBB29_4
-; SOFT-NEXT: .LBB29_3:
-; SOFT-NEXT: mov r1, r7
+; SOFT-NEXT: @ %bb.3:
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: .LBB29_4:
-; SOFT-NEXT: str r1, [sp] @ 4-byte Spill
-; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: bne .LBB29_6
; SOFT-NEXT: @ %bb.5:
-; SOFT-NEXT: mov r5, r7
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB29_6:
-; SOFT-NEXT: ldr r1, .LCPI29_0
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bl __aeabi_fcmpgt
-; SOFT-NEXT: mvns r3, r6
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: mov r0, r3
-; SOFT-NEXT: beq .LBB29_13
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: bne .LBB29_8
; SOFT-NEXT: @ %bb.7:
-; SOFT-NEXT: mov r1, r3
-; SOFT-NEXT: beq .LBB29_14
+; SOFT-NEXT: mov r1, r7
; SOFT-NEXT: .LBB29_8:
-; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB29_10
+; SOFT-NEXT: @ %bb.9:
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: .LBB29_10:
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: mov r7, r6
; SOFT-NEXT: beq .LBB29_15
-; SOFT-NEXT: .LBB29_9:
+; SOFT-NEXT: @ %bb.11:
+; SOFT-NEXT: cmp r5, #0
; SOFT-NEXT: beq .LBB29_16
-; SOFT-NEXT: .LBB29_10:
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
-; SOFT-NEXT: .LBB29_11:
-; SOFT-NEXT: mov r3, r7
-; SOFT-NEXT: bne .LBB29_2
; SOFT-NEXT: .LBB29_12:
-; SOFT-NEXT: mov r2, r7
-; SOFT-NEXT: beq .LBB29_3
-; SOFT-NEXT: b .LBB29_4
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB29_14
; SOFT-NEXT: .LBB29_13:
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: mov r1, r3
-; SOFT-NEXT: bne .LBB29_8
+; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload
; SOFT-NEXT: .LBB29_14:
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: mov r2, r3
-; SOFT-NEXT: bne .LBB29_9
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r2, r7
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: add sp, #4
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB29_15:
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: bne .LBB29_10
+; SOFT-NEXT: mov r7, r2
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: bne .LBB29_12
; SOFT-NEXT: .LBB29_16:
-; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: add sp, #12
-; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
+; SOFT-NEXT: str r5, [sp] @ 4-byte Spill
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB29_13
+; SOFT-NEXT: b .LBB29_14
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI29_0:
diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
index a1b6847d623d00..de5bd2a7040b99 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
@@ -71,12 +71,12 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: tst r2, #32
-; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: ands r3, r2, #32
; CHECK-NEXT: and r12, r2, #31
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: mov r4, #31
; CHECK-NEXT: movne r3, r1
; CHECK-NEXT: movne r1, r0
-; CHECK-NEXT: mov r4, #31
; CHECK-NEXT: bic r2, r4, r2
; CHECK-NEXT: lsl lr, r3, r12
; CHECK-NEXT: lsr r0, r1, #1
@@ -206,7 +206,7 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK-LABEL: rotr_i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: tst r2, #32
+; CHECK-NEXT: ands r3, r2, #32
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: moveq r3, r0
; CHECK-NEXT: moveq r0, r1
diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll
index 191155ae30f3e3..5a7c4384428e1a 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift.ll
@@ -47,67 +47,69 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; SCALAR-LABEL: fshl_i37:
; SCALAR: @ %bb.0:
-; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; SCALAR-NEXT: mov r8, r0
-; SCALAR-NEXT: ldr r0, [sp, #28]
+; SCALAR-NEXT: ldr r0, [sp, #36]
; SCALAR-NEXT: mov r4, r1
-; SCALAR-NEXT: mov r5, r3
+; SCALAR-NEXT: mov r6, r3
; SCALAR-NEXT: and r1, r0, #31
-; SCALAR-NEXT: ldr r0, [sp, #24]
-; SCALAR-NEXT: mov r6, r2
+; SCALAR-NEXT: ldr r0, [sp, #32]
+; SCALAR-NEXT: mov r9, r2
; SCALAR-NEXT: mov r2, #37
; SCALAR-NEXT: mov r3, #0
; SCALAR-NEXT: bl __aeabi_uldivmod
-; SCALAR-NEXT: lsl r0, r5, #27
-; SCALAR-NEXT: tst r2, #32
-; SCALAR-NEXT: orr r0, r0, r6, lsr #5
-; SCALAR-NEXT: mov r1, r8
-; SCALAR-NEXT: and r3, r2, #31
+; SCALAR-NEXT: lsl r1, r6, #27
+; SCALAR-NEXT: ands r0, r2, #32
+; SCALAR-NEXT: orr r1, r1, r9, lsr #5
+; SCALAR-NEXT: mov r3, r8
+; SCALAR-NEXT: and r6, r2, #31
; SCALAR-NEXT: mov r7, #31
-; SCALAR-NEXT: movne r1, r0
-; SCALAR-NEXT: lslne r0, r6, #27
+; SCALAR-NEXT: movne r3, r1
+; SCALAR-NEXT: cmp r0, #0
+; SCALAR-NEXT: lslne r1, r9, #27
; SCALAR-NEXT: bic r2, r7, r2
-; SCALAR-NEXT: lsl r5, r1, r3
-; SCALAR-NEXT: lsr r0, r0, #1
; SCALAR-NEXT: movne r4, r8
-; SCALAR-NEXT: lsr r1, r1, #1
-; SCALAR-NEXT: lsl r3, r4, r3
+; SCALAR-NEXT: lsl r5, r3, r6
+; SCALAR-NEXT: lsr r0, r1, #1
+; SCALAR-NEXT: lsl r1, r4, r6
+; SCALAR-NEXT: lsr r3, r3, #1
; SCALAR-NEXT: orr r0, r5, r0, lsr r2
-; SCALAR-NEXT: orr r1, r3, r1, lsr r2
-; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; SCALAR-NEXT: orr r1, r1, r3, lsr r2
+; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; NEON-LABEL: fshl_i37:
; NEON: @ %bb.0:
-; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; NEON-NEXT: mov r4, r1
; NEON-NEXT: ldr r1, [sp, #28]
-; NEON-NEXT: mov r8, r0
+; NEON-NEXT: mov r6, r0
; NEON-NEXT: ldr r0, [sp, #24]
; NEON-NEXT: and r1, r1, #31
; NEON-NEXT: mov r5, r3
-; NEON-NEXT: mov r6, r2
+; NEON-NEXT: mov r7, r2
; NEON-NEXT: mov r2, #37
; NEON-NEXT: mov r3, #0
; NEON-NEXT: bl __aeabi_uldivmod
+; NEON-NEXT: mov r0, #31
+; NEON-NEXT: bic r1, r0, r2
; NEON-NEXT: lsl r0, r5, #27
-; NEON-NEXT: tst r2, #32
-; NEON-NEXT: orr r0, r0, r6, lsr #5
-; NEON-NEXT: mov r1, r8
-; NEON-NEXT: and r3, r2, #31
-; NEON-NEXT: mov r7, #31
-; NEON-NEXT: movne r1, r0
-; NEON-NEXT: lslne r0, r6, #27
-; NEON-NEXT: bic r2, r7, r2
-; NEON-NEXT: lsl r5, r1, r3
+; NEON-NEXT: ands r12, r2, #32
+; NEON-NEXT: orr r0, r0, r7, lsr #5
+; NEON-NEXT: mov r5, r6
+; NEON-NEXT: and r2, r2, #31
+; NEON-NEXT: movne r5, r0
+; NEON-NEXT: lslne r0, r7, #27
+; NEON-NEXT: cmp r12, #0
+; NEON-NEXT: lsl r3, r5, r2
; NEON-NEXT: lsr r0, r0, #1
-; NEON-NEXT: movne r4, r8
-; NEON-NEXT: lsr r1, r1, #1
-; NEON-NEXT: lsl r3, r4, r3
-; NEON-NEXT: orr r0, r5, r0, lsr r2
-; NEON-NEXT: orr r1, r3, r1, lsr r2
-; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; NEON-NEXT: movne r4, r6
+; NEON-NEXT: orr r0, r3, r0, lsr r1
+; NEON-NEXT: lsr r3, r5, #1
+; NEON-NEXT: lsl r2, r4, r2
+; NEON-NEXT: orr r1, r2, r3, lsr r1
+; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
ret i37 %f
}
@@ -235,69 +237,71 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; SCALAR-LABEL: fshr_i37:
; SCALAR: @ %bb.0:
-; SCALAR-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; SCALAR-NEXT: push {r4, r5, r6, r7, r11, lr}
-; SCALAR-NEXT: mov r5, r0
+; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: mov r8, r0
; SCALAR-NEXT: ldr r0, [sp, #28]
; SCALAR-NEXT: mov r4, r1
-; SCALAR-NEXT: mov r6, r3
+; SCALAR-NEXT: mov r5, r3
; SCALAR-NEXT: and r1, r0, #31
; SCALAR-NEXT: ldr r0, [sp, #24]
; SCALAR-NEXT: mov r7, r2
; SCALAR-NEXT: mov r2, #37
; SCALAR-NEXT: mov r3, #0
; SCALAR-NEXT: bl __aeabi_uldivmod
+; SCALAR-NEXT: lsl r3, r5, #27
; SCALAR-NEXT: add r0, r2, #27
-; SCALAR-NEXT: lsl r2, r6, #27
-; SCALAR-NEXT: orr r2, r2, r7, lsr #5
+; SCALAR-NEXT: orr r3, r3, r7, lsr #5
+; SCALAR-NEXT: ands r2, r0, #32
+; SCALAR-NEXT: mov r5, r8
; SCALAR-NEXT: mov r1, #31
-; SCALAR-NEXT: tst r0, #32
-; SCALAR-NEXT: mov r3, r5
-; SCALAR-NEXT: moveq r3, r2
-; SCALAR-NEXT: lsleq r2, r7, #27
+; SCALAR-NEXT: moveq r5, r3
+; SCALAR-NEXT: lsleq r3, r7, #27
+; SCALAR-NEXT: cmp r2, #0
; SCALAR-NEXT: bic r1, r1, r0
+; SCALAR-NEXT: moveq r4, r8
+; SCALAR-NEXT: lsl r6, r5, #1
; SCALAR-NEXT: and r7, r0, #31
-; SCALAR-NEXT: lsl r6, r3, #1
-; SCALAR-NEXT: moveq r4, r5
-; SCALAR-NEXT: lsl r6, r6, r1
-; SCALAR-NEXT: orr r0, r6, r2, lsr r7
; SCALAR-NEXT: lsl r2, r4, #1
+; SCALAR-NEXT: lsl r6, r6, r1
; SCALAR-NEXT: lsl r1, r2, r1
-; SCALAR-NEXT: orr r1, r1, r3, lsr r7
-; SCALAR-NEXT: pop {r4, r5, r6, r7, r11, pc}
+; SCALAR-NEXT: orr r0, r6, r3, lsr r7
+; SCALAR-NEXT: orr r1, r1, r5, lsr r7
+; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
;
; NEON-LABEL: fshr_i37:
; NEON: @ %bb.0:
-; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
; NEON-NEXT: mov r4, r1
; NEON-NEXT: ldr r1, [sp, #28]
-; NEON-NEXT: mov r5, r0
+; NEON-NEXT: mov r8, r0
; NEON-NEXT: ldr r0, [sp, #24]
; NEON-NEXT: and r1, r1, #31
-; NEON-NEXT: mov r6, r3
+; NEON-NEXT: mov r5, r3
; NEON-NEXT: mov r7, r2
; NEON-NEXT: mov r2, #37
; NEON-NEXT: mov r3, #0
; NEON-NEXT: bl __aeabi_uldivmod
+; NEON-NEXT: lsl r3, r5, #27
; NEON-NEXT: add r0, r2, #27
-; NEON-NEXT: lsl r2, r6, #27
-; NEON-NEXT: orr r2, r2, r7, lsr #5
+; NEON-NEXT: orr r3, r3, r7, lsr #5
+; NEON-NEXT: ands r2, r0, #32
+; NEON-NEXT: mov r5, r8
; NEON-NEXT: mov r1, #31
-; NEON-NEXT: tst r0, #32
-; NEON-NEXT: mov r3, r5
-; NEON-NEXT: moveq r3, r2
-; NEON-NEXT: lsleq r2, r7, #27
+; NEON-NEXT: moveq r5, r3
+; NEON-NEXT: lsleq r3, r7, #27
+; NEON-NEXT: cmp r2, #0
; NEON-NEXT: bic r1, r1, r0
+; NEON-NEXT: moveq r4, r8
+; NEON-NEXT: lsl r6, r5, #1
; NEON-NEXT: and r7, r0, #31
-; NEON-NEXT: lsl r6, r3, #1
-; NEON-NEXT: moveq r4, r5
-; NEON-NEXT: lsl r6, r6, r1
-; NEON-NEXT: orr r0, r6, r2, lsr r7
; NEON-NEXT: lsl r2, r4, #1
+; NEON-NEXT: lsl r6, r6, r1
; NEON-NEXT: lsl r1, r2, r1
-; NEON-NEXT: orr r1, r1, r3, lsr r7
-; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+; NEON-NEXT: orr r0, r6, r3, lsr r7
+; NEON-NEXT: orr r1, r1, r5, lsr r7
+; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
ret i37 %f
}
diff --git a/llvm/test/CodeGen/ARM/ifcvt1.ll b/llvm/test/CodeGen/ARM/ifcvt1.ll
index 6d59869bc102ee..d419cbc48fc488 100644
--- a/llvm/test/CodeGen/ARM/ifcvt1.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt1.ll
@@ -13,10 +13,10 @@ define i32 @t1(i32 %a, i32 %b) {
;
; SWIFT-LABEL: t1:
; SWIFT: @ %bb.0: @ %common.ret
+; SWIFT-NEXT: mov r2, #1
; SWIFT-NEXT: cmp r0, #0
-; SWIFT-NEXT: mov r0, #1
-; SWIFT-NEXT: mvneq r0, #0
-; SWIFT-NEXT: add r0, r1, r0
+; SWIFT-NEXT: mvneq r2, #0
+; SWIFT-NEXT: add r0, r1, r2
; SWIFT-NEXT: bx lr
%tmp2 = icmp eq i32 %a, 0
br i1 %tmp2, label %cond_false, label %cond_true
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index dd33b09fe83004..4003af5d44be81 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -494,14 +494,14 @@ define <4 x float> @fminnumv432_intrinsic(<4 x float> %x, <4 x float> %y) {
; ARMV7-NEXT: vld1.64 {d0, d1}, [r12]
; ARMV7-NEXT: vmov d3, r2, r3
; ARMV7-NEXT: vmov d2, r0, r1
-; ARMV7-NEXT: vcmp.f32 s6, s2
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vcmp.f32 s7, s3
-; ARMV7-NEXT: vmovlt.f32 s2, s6
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vcmp.f32 s5, s1
+; ARMV7-NEXT: vcmp.f32 s6, s2
; ARMV7-NEXT: vmovlt.f32 s3, s7
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
+; ARMV7-NEXT: vcmp.f32 s5, s1
+; ARMV7-NEXT: vmovlt.f32 s2, s6
+; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vcmp.f32 s4, s0
; ARMV7-NEXT: vmovlt.f32 s1, s5
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
@@ -676,14 +676,14 @@ define <4 x float> @fmaxnumv432_intrinsic(<4 x float> %x, <4 x float> %y) {
; ARMV7-NEXT: vld1.64 {d0, d1}, [r12]
; ARMV7-NEXT: vmov d3, r2, r3
; ARMV7-NEXT: vmov d2, r0, r1
-; ARMV7-NEXT: vcmp.f32 s6, s2
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vcmp.f32 s7, s3
-; ARMV7-NEXT: vmovgt.f32 s2, s6
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vcmp.f32 s5, s1
+; ARMV7-NEXT: vcmp.f32 s6, s2
; ARMV7-NEXT: vmovgt.f32 s3, s7
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
+; ARMV7-NEXT: vcmp.f32 s5, s1
+; ARMV7-NEXT: vmovgt.f32 s2, s6
+; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vcmp.f32 s4, s0
; ARMV7-NEXT: vmovgt.f32 s1, s5
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
@@ -760,16 +760,16 @@ define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) {
; ARMV7-NEXT: vmov d3, r2, r3
; ARMV7-NEXT: vldr s0, .LCPI21_0
; ARMV7-NEXT: vmov d2, r0, r1
-; ARMV7-NEXT: vcmp.f32 s6, #0
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmov.f32 s2, s0
; ARMV7-NEXT: vcmp.f32 s7, #0
-; ARMV7-NEXT: vmovgt.f32 s2, s6
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vmov.f32 s3, s0
-; ARMV7-NEXT: vcmp.f32 s5, #0
+; ARMV7-NEXT: vcmp.f32 s6, #0
; ARMV7-NEXT: vmovgt.f32 s3, s7
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
+; ARMV7-NEXT: vmov.f32 s2, s0
+; ARMV7-NEXT: vcmp.f32 s5, #0
+; ARMV7-NEXT: vmovgt.f32 s2, s6
+; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vmov.f32 s1, s0
; ARMV7-NEXT: vcmp.f32 s4, #0
; ARMV7-NEXT: vmovgt.f32 s1, s5
@@ -812,18 +812,18 @@ define <4 x float> @fmaxnumv432_minus_zero_intrinsic(<4 x float> %x) {
; ARMV7-NEXT: vldr s0, .LCPI22_0
; ARMV7-NEXT: vmov d3, r2, r3
; ARMV7-NEXT: vmov d2, r0, r1
-; ARMV7-NEXT: vcmp.f32 s6, s0
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vcmp.f32 s7, s0
-; ARMV7-NEXT: vmov.f32 s2, s0
-; ARMV7-NEXT: vmovgt.f32 s2, s6
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vcmp.f32 s5, s0
; ARMV7-NEXT: vmov.f32 s3, s0
+; ARMV7-NEXT: vcmp.f32 s6, s0
; ARMV7-NEXT: vmovgt.f32 s3, s7
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vcmp.f32 s4, s0
+; ARMV7-NEXT: vmov.f32 s2, s0
+; ARMV7-NEXT: vcmp.f32 s5, s0
+; ARMV7-NEXT: vmovgt.f32 s2, s6
+; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vmov.f32 s1, s0
+; ARMV7-NEXT: vcmp.f32 s4, s0
; ARMV7-NEXT: vmovgt.f32 s1, s5
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
; ARMV7-NEXT: vmovgt.f32 s0, s4
@@ -933,8 +933,8 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
; ARMV8M-NEXT: vselgt.f64 d0, d0, d2
; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
; ARMV8M-NEXT: vmov r0, r1, d0
-; ARMV8M-NEXT: vselgt.f64 d0, d1, d3
-; ARMV8M-NEXT: vmov r2, r3, d0
+; ARMV8M-NEXT: vselgt.f64 d1, d1, d3
+; ARMV8M-NEXT: vmov r2, r3, d1
; ARMV8M-NEXT: bx lr
%a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %a
@@ -981,8 +981,8 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
; ARMV8M-NEXT: vselgt.f64 d0, d0, d2
; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
; ARMV8M-NEXT: vmov r0, r1, d0
-; ARMV8M-NEXT: vselgt.f64 d0, d1, d3
-; ARMV8M-NEXT: vmov r2, r3, d0
+; ARMV8M-NEXT: vselgt.f64 d1, d1, d3
+; ARMV8M-NEXT: vmov r2, r3, d1
; ARMV8M-NEXT: bx lr
%a = call nnan nsz <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %a
@@ -1225,26 +1225,26 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) {
; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: vmov d2, r0, r1
-; ARMV8M-NEXT: vldr d0, .LCPI30_0
+; ARMV8M-NEXT: vldr d1, .LCPI30_1
; ARMV8M-NEXT: vcmp.f64 d2, #0
; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov d1, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d1, d0
-; ARMV8M-NEXT: vldr d3, .LCPI30_1
-; ARMV8M-NEXT: vselgt.f64 d2, d2, d3
+; ARMV8M-NEXT: vmov d3, r2, r3
+; ARMV8M-NEXT: vcmp.f64 d3, d1
+; ARMV8M-NEXT: vldr d0, .LCPI30_0
+; ARMV8M-NEXT: vselgt.f64 d0, d2, d0
; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d2
-; ARMV8M-NEXT: vselgt.f64 d0, d1, d0
-; ARMV8M-NEXT: vmov r2, r3, d0
+; ARMV8M-NEXT: vmov r0, r1, d0
+; ARMV8M-NEXT: vselgt.f64 d1, d3, d1
+; ARMV8M-NEXT: vmov r2, r3, d1
; ARMV8M-NEXT: bx lr
; ARMV8M-NEXT: .p2align 3
; ARMV8M-NEXT: @ %bb.1:
; ARMV8M-NEXT: .LCPI30_0:
-; ARMV8M-NEXT: .long 0 @ double -0
-; ARMV8M-NEXT: .long 2147483648
-; ARMV8M-NEXT: .LCPI30_1:
; ARMV8M-NEXT: .long 0 @ double 0
; ARMV8M-NEXT: .long 0
+; ARMV8M-NEXT: .LCPI30_1:
+; ARMV8M-NEXT: .long 0 @ double -0
+; ARMV8M-NEXT: .long 2147483648
%a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double><double 0.0, double -0.0>)
ret <2 x double> %a
}
diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll
index ffc72b242f829f..8a268d46304cf9 100644
--- a/llvm/test/CodeGen/ARM/neon_vabd.ll
+++ b/llvm/test/CodeGen/ARM/neon_vabd.ll
@@ -144,25 +144,25 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: vmov r0, r1, d1
+; CHECK-NEXT: vmov r0, r12, d0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: vmov r2, r3, d3
-; CHECK-NEXT: vmov r12, lr, d0
-; CHECK-NEXT: vmov r4, r5, d2
+; CHECK-NEXT: vmov r2, r3, d2
+; CHECK-NEXT: vmov r1, lr, d1
+; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: sbcs r0, r3, r12
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r1, r4, r12
+; CHECK-NEXT: subs r1, r4, r1
; CHECK-NEXT: sbcs r1, r5, lr
-; CHECK-NEXT: vdup.32 d19, r0
; CHECK-NEXT: movwlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: vdup.32 d18, r6
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vdup.32 d19, r6
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: veor q8, q8, q9
; CHECK-NEXT: vsub.i64 q0, q9, q8
; CHECK-NEXT: pop {r4, r5, r6, pc}
@@ -475,25 +475,25 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: vmov r0, r1, d1
+; CHECK-NEXT: vmov r0, r12, d0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: vmov r2, r3, d3
-; CHECK-NEXT: vmov r12, lr, d0
-; CHECK-NEXT: vmov r4, r5, d2
+; CHECK-NEXT: vmov r2, r3, d2
+; CHECK-NEXT: vmov r1, lr, d1
+; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: sbcs r0, r3, r12
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r1, r4, r12
+; CHECK-NEXT: subs r1, r4, r1
; CHECK-NEXT: sbcs r1, r5, lr
-; CHECK-NEXT: vdup.32 d19, r0
; CHECK-NEXT: movwlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: vdup.32 d18, r6
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vdup.32 d19, r6
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: veor q8, q8, q9
; CHECK-NEXT: vsub.i64 q0, q9, q8
; CHECK-NEXT: pop {r4, r5, r6, pc}
diff --git a/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
index 2631189979e37e..198927d1da3a45 100644
--- a/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
+++ b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
@@ -164,9 +164,9 @@ cont2:
define void @extern_loop(i32 %n) local_unnamed_addr #0 {
; Do not replace the compare around the clobbering call.
-; CHECK: bl external_fn
-; CHECK-NEXT: adds
-; CHECK-NEXT: bvs
+; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1
+; CHECK-NEXT: bl external_fn
+; CHECK: cmp
entry:
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1)
%1 = extractvalue { i32, i1 } %0, 1
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index b8f7a2daaeabab..0060b4458081bc 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -72,21 +72,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T16-NEXT: adcs r3, r4
; CHECK-T16-NEXT: eors r4, r3
; CHECK-T16-NEXT: bics r4, r1
-; CHECK-T16-NEXT: asrs r0, r3, #31
-; CHECK-T16-NEXT: movs r1, #1
-; CHECK-T16-NEXT: lsls r1, r1, #31
-; CHECK-T16-NEXT: eors r1, r0
+; CHECK-T16-NEXT: asrs r1, r3, #31
; CHECK-T16-NEXT: cmp r4, #0
-; CHECK-T16-NEXT: bpl .LBB1_3
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: bmi .LBB1_2
; CHECK-T16-NEXT: @ %bb.1:
-; CHECK-T16-NEXT: bpl .LBB1_4
+; CHECK-T16-NEXT: mov r0, r2
; CHECK-T16-NEXT: .LBB1_2:
+; CHECK-T16-NEXT: cmp r4, #0
+; CHECK-T16-NEXT: bmi .LBB1_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r1, r3
; CHECK-T16-NEXT: pop {r4, pc}
-; CHECK-T16-NEXT: .LBB1_3:
-; CHECK-T16-NEXT: mov r0, r2
-; CHECK-T16-NEXT: bmi .LBB1_2
; CHECK-T16-NEXT: .LBB1_4:
-; CHECK-T16-NEXT: mov r1, r3
+; CHECK-T16-NEXT: movs r2, #1
+; CHECK-T16-NEXT: lsls r2, r2, #31
+; CHECK-T16-NEXT: eors r1, r2
; CHECK-T16-NEXT: pop {r4, pc}
;
; CHECK-T2-LABEL: func2:
@@ -127,22 +128,23 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T15TE-NEXT: adcs r3, r4
; CHECK-T15TE-NEXT: eors r4, r3
; CHECK-T15TE-NEXT: bics r4, r1
-; CHECK-T15TE-NEXT: asrs r0, r3, #31
-; CHECK-T15TE-NEXT: movs r1, #1
-; CHECK-T15TE-NEXT: lsls r1, r1, #31
-; CHECK-T15TE-NEXT: eors r1, r0
+; CHECK-T15TE-NEXT: asrs r1, r3, #31
; CHECK-T15TE-NEXT: cmp r4, #0
-; CHECK-T15TE-NEXT: bpl .LBB1_3
+; CHECK-T15TE-NEXT: mov r12, r1
+; CHECK-T15TE-NEXT: mov r0, r12
+; CHECK-T15TE-NEXT: bmi .LBB1_2
; CHECK-T15TE-NEXT: @ %bb.1:
-; CHECK-T15TE-NEXT: bpl .LBB1_4
+; CHECK-T15TE-NEXT: movs r0, r2
; CHECK-T15TE-NEXT: .LBB1_2:
+; CHECK-T15TE-NEXT: cmp r4, #0
+; CHECK-T15TE-NEXT: bmi .LBB1_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r1, r3
; CHECK-T15TE-NEXT: pop {r4, pc}
-; CHECK-T15TE-NEXT: .LBB1_3:
-; CHECK-T15TE-NEXT: mov r12, r2
-; CHECK-T15TE-NEXT: mov r0, r12
-; CHECK-T15TE-NEXT: bmi .LBB1_2
; CHECK-T15TE-NEXT: .LBB1_4:
-; CHECK-T15TE-NEXT: movs r1, r3
+; CHECK-T15TE-NEXT: movs r2, #1
+; CHECK-T15TE-NEXT: lsls r2, r2, #31
+; CHECK-T15TE-NEXT: eors r1, r2
; CHECK-T15TE-NEXT: pop {r4, pc}
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y)
ret i64 %tmp
diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
index 0ddb64fc3f2d13..859aedc7a3f019 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
@@ -63,22 +63,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-T1-NEXT: adcs r3, r1
; CHECK-T1-NEXT: eors r1, r3
; CHECK-T1-NEXT: bics r1, r2
-; CHECK-T1-NEXT: asrs r0, r3, #31
-; CHECK-T1-NEXT: movs r2, #1
-; CHECK-T1-NEXT: lsls r2, r2, #31
-; CHECK-T1-NEXT: eors r2, r0
+; CHECK-T1-NEXT: asrs r2, r3, #31
; CHECK-T1-NEXT: cmp r1, #0
-; CHECK-T1-NEXT: bpl .LBB1_3
+; CHECK-T1-NEXT: mov r0, r2
+; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: bpl .LBB1_4
+; CHECK-T1-NEXT: mov r0, r4
; CHECK-T1-NEXT: .LBB1_2:
-; CHECK-T1-NEXT: mov r1, r2
+; CHECK-T1-NEXT: cmp r1, #0
+; CHECK-T1-NEXT: bmi .LBB1_4
+; CHECK-T1-NEXT: @ %bb.3:
+; CHECK-T1-NEXT: mov r1, r3
; CHECK-T1-NEXT: pop {r4, pc}
-; CHECK-T1-NEXT: .LBB1_3:
-; CHECK-T1-NEXT: mov r0, r4
-; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
-; CHECK-T1-NEXT: mov r2, r3
+; CHECK-T1-NEXT: movs r1, #1
+; CHECK-T1-NEXT: lsls r1, r1, #31
+; CHECK-T1-NEXT: eors r2, r1
; CHECK-T1-NEXT: mov r1, r2
; CHECK-T1-NEXT: pop {r4, pc}
;
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939e..496a6c0f5acbbe 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -320,11 +320,11 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
;
; CHECK-VFP-LABEL: f10:
; CHECK-VFP: @ %bb.0:
-; CHECK-VFP-NEXT: vmov.f32 s0, #1.000000e+00
-; CHECK-VFP-NEXT: vldr s2, .LCPI9_0
+; CHECK-VFP-NEXT: vmov.f32 s2, #1.000000e+00
+; CHECK-VFP-NEXT: vldr s0, .LCPI9_0
; CHECK-VFP-NEXT: cmp r0, r1
-; CHECK-VFP-NEXT: vmoveq.f32 s2, s0
-; CHECK-VFP-NEXT: vmov r0, s2
+; CHECK-VFP-NEXT: vmoveq.f32 s0, s2
+; CHECK-VFP-NEXT: vmov r0, s0
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
@@ -333,12 +333,12 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
;
; CHECK-NEON-LABEL: f10:
; CHECK-NEON: @ %bb.0:
-; CHECK-NEON-NEXT: vldr s2, LCPI9_0
-; CHECK-NEON-NEXT: vmov.f32 s0, #1.000000e+00
+; CHECK-NEON-NEXT: vldr s0, LCPI9_0
+; CHECK-NEON-NEXT: vmov.f32 s2, #1.000000e+00
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it eq
-; CHECK-NEON-NEXT: vmoveq.f32 s2, s0
-; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmoveq.f32 s0, s2
+; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
@@ -364,11 +364,11 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
;
; CHECK-VFP-LABEL: f11:
; CHECK-VFP: @ %bb.0:
-; CHECK-VFP-NEXT: vmov.f32 s0, #-1.000000e+00
-; CHECK-VFP-NEXT: vldr s2, .LCPI10_0
+; CHECK-VFP-NEXT: vmov.f32 s2, #-1.000000e+00
+; CHECK-VFP-NEXT: vldr s0, .LCPI10_0
; CHECK-VFP-NEXT: cmp r0, r1
-; CHECK-VFP-NEXT: vmoveq.f32 s2, s0
-; CHECK-VFP-NEXT: vmov r0, s2
+; CHECK-VFP-NEXT: vmoveq.f32 s0, s2
+; CHECK-VFP-NEXT: vmov r0, s0
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
@@ -377,12 +377,12 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
;
; CHECK-NEON-LABEL: f11:
; CHECK-NEON: @ %bb.0:
-; CHECK-NEON-NEXT: vldr s2, LCPI10_0
-; CHECK-NEON-NEXT: vmov.f32 s0, #-1.000000e+00
+; CHECK-NEON-NEXT: vldr s0, LCPI10_0
+; CHECK-NEON-NEXT: vmov.f32 s2, #-1.000000e+00
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it eq
-; CHECK-NEON-NEXT: vmoveq.f32 s2, s0
-; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmoveq.f32 s0, s2
+; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
@@ -406,11 +406,11 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
;
; CHECK-VFP-LABEL: f12:
; CHECK-VFP: @ %bb.0:
-; CHECK-VFP-NEXT: vmov.f32 s0, #1.000000e+00
-; CHECK-VFP-NEXT: vldr s2, .LCPI11_0
+; CHECK-VFP-NEXT: vmov.f32 s2, #1.000000e+00
+; CHECK-VFP-NEXT: vldr s0, .LCPI11_0
; CHECK-VFP-NEXT: cmp r0, r1
-; CHECK-VFP-NEXT: vmoveq.f32 s2, s0
-; CHECK-VFP-NEXT: vmov r0, s2
+; CHECK-VFP-NEXT: vmoveq.f32 s0, s2
+; CHECK-VFP-NEXT: vmov r0, s0
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
@@ -419,12 +419,12 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
;
; CHECK-NEON-LABEL: f12:
; CHECK-NEON: @ %bb.0:
-; CHECK-NEON-NEXT: vldr s2, LCPI11_0
-; CHECK-NEON-NEXT: vmov.f32 s0, #1.000000e+00
+; CHECK-NEON-NEXT: vldr s0, LCPI11_0
+; CHECK-NEON-NEXT: vmov.f32 s2, #1.000000e+00
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it eq
-; CHECK-NEON-NEXT: vmoveq.f32 s2, s0
-; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmoveq.f32 s0, s2
+; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index 180daa12e7c52d..df95af313eac66 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -645,13 +645,12 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB2-NEXT: push {r7, lr}
; THUMB2-NEXT: ands r12, r0, #1
; THUMB2-NEXT: mov.w lr, #1
-; THUMB2-NEXT: it ne
-; THUMB2-NEXT: movne.w r12, #1
-; THUMB2-NEXT: it ne
+; THUMB2-NEXT: itt ne
; THUMB2-NEXT: movne.w lr, #65536
+; THUMB2-NEXT: movne.w r12, #1
; THUMB2-NEXT: subs.w r0, lr, #1
-; THUMB2-NEXT: eor r3, r3, #1
; THUMB2-NEXT: sbc r1, r12, #0
+; THUMB2-NEXT: eor r3, r3, #1
; THUMB2-NEXT: eor r2, r2, #65537
; THUMB2-NEXT: orrs r2, r3
; THUMB2-NEXT: itt ne
@@ -689,12 +688,11 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: ldr r6, .LCPI24_0
; THUMB-NEXT: eors r2, r6
; THUMB-NEXT: orrs r2, r3
-; THUMB-NEXT: cmp r2, #0
; THUMB-NEXT: beq .LBB24_5
; THUMB-NEXT: @ %bb.4:
-; THUMB-NEXT: mov r12, r4
-; THUMB-NEXT: mov r1, r12
+; THUMB-NEXT: movs r1, r4
; THUMB-NEXT: .LBB24_5:
+; THUMB-NEXT: cmp r2, #0
; THUMB-NEXT: beq .LBB24_7
; THUMB-NEXT: @ %bb.6:
; THUMB-NEXT: movs r0, r5
diff --git a/llvm/test/CodeGen/ARM/shift-i64.ll b/llvm/test/CodeGen/ARM/shift-i64.ll
index c326ac1529b2f5..33e0ba1457e72b 100644
--- a/llvm/test/CodeGen/ARM/shift-i64.ll
+++ b/llvm/test/CodeGen/ARM/shift-i64.ll
@@ -52,14 +52,14 @@ define i64 @test_lshr(i64 %val, i64 %amt) {
define i64 @test_ashr(i64 %val, i64 %amt) {
; CHECK-LABEL: test_ashr:
; CHECK: @ %bb.0:
-; CHECK-NEXT: rsb r3, r2, #32
+; CHECK-NEXT: asr r3, r1, r2
+; CHECK-NEXT: subs r12, r2, #32
; CHECK-NEXT: lsr r0, r0, r2
-; CHECK-NEXT: orr r0, r0, r1, lsl r3
-; CHECK-NEXT: subs r3, r2, #32
-; CHECK-NEXT: asr r2, r1, r2
-; CHECK-NEXT: asrpl r2, r1, #31
-; CHECK-NEXT: asrpl r0, r1, r3
-; CHECK-NEXT: mov r1, r2
+; CHECK-NEXT: rsb r2, r2, #32
+; CHECK-NEXT: asrpl r3, r1, #31
+; CHECK-NEXT: orr r0, r0, r1, lsl r2
+; CHECK-NEXT: asrpl r0, r1, r12
+; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: mov pc, lr
;
; EXPAND-LABEL: test_ashr:
diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll
index 0978bfd1f0140b..1bafba3b49ed7d 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat.ll
@@ -71,21 +71,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T1-NEXT: sbcs r2, r3
; CHECK-T1-NEXT: eors r4, r2
; CHECK-T1-NEXT: ands r4, r1
-; CHECK-T1-NEXT: asrs r0, r2, #31
-; CHECK-T1-NEXT: movs r1, #1
-; CHECK-T1-NEXT: lsls r1, r1, #31
-; CHECK-T1-NEXT: eors r1, r0
+; CHECK-T1-NEXT: asrs r1, r2, #31
; CHECK-T1-NEXT: cmp r4, #0
-; CHECK-T1-NEXT: bpl .LBB1_3
+; CHECK-T1-NEXT: mov r0, r1
+; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: bpl .LBB1_4
+; CHECK-T1-NEXT: mov r0, r5
; CHECK-T1-NEXT: .LBB1_2:
+; CHECK-T1-NEXT: cmp r4, #0
+; CHECK-T1-NEXT: bmi .LBB1_4
+; CHECK-T1-NEXT: @ %bb.3:
+; CHECK-T1-NEXT: mov r1, r2
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
-; CHECK-T1-NEXT: .LBB1_3:
-; CHECK-T1-NEXT: mov r0, r5
-; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
-; CHECK-T1-NEXT: mov r1, r2
+; CHECK-T1-NEXT: movs r2, #1
+; CHECK-T1-NEXT: lsls r2, r2, #31
+; CHECK-T1-NEXT: eors r1, r2
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-T2-LABEL: func2:
diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
index adf6cafc6ccb87..0a2d1f0e7a240e 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
@@ -65,22 +65,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-T1-NEXT: sbcs r3, r2
; CHECK-T1-NEXT: eors r1, r3
; CHECK-T1-NEXT: ands r1, r5
-; CHECK-T1-NEXT: asrs r0, r3, #31
-; CHECK-T1-NEXT: movs r2, #1
-; CHECK-T1-NEXT: lsls r2, r2, #31
-; CHECK-T1-NEXT: eors r2, r0
+; CHECK-T1-NEXT: asrs r2, r3, #31
; CHECK-T1-NEXT: cmp r1, #0
-; CHECK-T1-NEXT: bpl .LBB1_3
+; CHECK-T1-NEXT: mov r0, r2
+; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: bpl .LBB1_4
+; CHECK-T1-NEXT: mov r0, r4
; CHECK-T1-NEXT: .LBB1_2:
-; CHECK-T1-NEXT: mov r1, r2
+; CHECK-T1-NEXT: cmp r1, #0
+; CHECK-T1-NEXT: bmi .LBB1_4
+; CHECK-T1-NEXT: @ %bb.3:
+; CHECK-T1-NEXT: mov r1, r3
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
-; CHECK-T1-NEXT: .LBB1_3:
-; CHECK-T1-NEXT: mov r0, r4
-; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
-; CHECK-T1-NEXT: mov r2, r3
+; CHECK-T1-NEXT: movs r1, #1
+; CHECK-T1-NEXT: lsls r1, r1, #31
+; CHECK-T1-NEXT: eors r2, r1
; CHECK-T1-NEXT: mov r1, r2
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
;
diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
index fb966c29f39a2e..046bbbde686426 100644
--- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -136,8 +136,8 @@ define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
;
; CHECK-V8-LABEL: float_sel:
; CHECK-V8: @ %bb.0: @ %entry
-; CHECK-V8-NEXT: subs r0, r0, r1
; CHECK-V8-NEXT: vmov s0, r3
+; CHECK-V8-NEXT: subs r0, r0, r1
; CHECK-V8-NEXT: vmov s2, r2
; CHECK-V8-NEXT: vseleq.f32 s0, s2, s0
; CHECK-V8-NEXT: vmov r0, s0
diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll
index 5549d9c6c29c03..39c79f4104e6e6 100644
--- a/llvm/test/CodeGen/ARM/uadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat.ll
@@ -45,19 +45,21 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T1-NEXT: movs r5, #0
; CHECK-T1-NEXT: adds r4, r0, r2
; CHECK-T1-NEXT: adcs r1, r3
-; CHECK-T1-NEXT: mov r0, r5
-; CHECK-T1-NEXT: adcs r0, r5
+; CHECK-T1-NEXT: mov r3, r5
+; CHECK-T1-NEXT: adcs r3, r5
; CHECK-T1-NEXT: mvns r2, r5
-; CHECK-T1-NEXT: cmp r0, #0
+; CHECK-T1-NEXT: cmp r3, #0
; CHECK-T1-NEXT: mov r0, r2
; CHECK-T1-NEXT: beq .LBB1_3
; CHECK-T1-NEXT: @ %bb.1:
+; CHECK-T1-NEXT: cmp r3, #0
; CHECK-T1-NEXT: beq .LBB1_4
; CHECK-T1-NEXT: .LBB1_2:
; CHECK-T1-NEXT: mov r1, r2
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
; CHECK-T1-NEXT: .LBB1_3:
; CHECK-T1-NEXT: mov r0, r4
+; CHECK-T1-NEXT: cmp r3, #0
; CHECK-T1-NEXT: bne .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
; CHECK-T1-NEXT: mov r2, r1
diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
index ffacba8cf01240..451b32f7304240 100644
--- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
@@ -44,29 +44,31 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-T1-LABEL: func64:
; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: .save {r4, lr}
-; CHECK-T1-NEXT: push {r4, lr}
-; CHECK-T1-NEXT: movs r4, #0
-; CHECK-T1-NEXT: ldr r2, [sp, #12]
-; CHECK-T1-NEXT: ldr r3, [sp, #8]
+; CHECK-T1-NEXT: .save {r4, r5, r7, lr}
+; CHECK-T1-NEXT: push {r4, r5, r7, lr}
+; CHECK-T1-NEXT: movs r5, #0
+; CHECK-T1-NEXT: ldr r2, [sp, #20]
+; CHECK-T1-NEXT: ldr r3, [sp, #16]
; CHECK-T1-NEXT: adds r3, r0, r3
; CHECK-T1-NEXT: adcs r2, r1
-; CHECK-T1-NEXT: mov r0, r4
-; CHECK-T1-NEXT: adcs r0, r4
-; CHECK-T1-NEXT: mvns r1, r4
-; CHECK-T1-NEXT: cmp r0, #0
+; CHECK-T1-NEXT: mov r4, r5
+; CHECK-T1-NEXT: adcs r4, r5
+; CHECK-T1-NEXT: mvns r1, r5
+; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: beq .LBB1_3
; CHECK-T1-NEXT: @ %bb.1:
+; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: beq .LBB1_4
; CHECK-T1-NEXT: .LBB1_2:
-; CHECK-T1-NEXT: pop {r4, pc}
+; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
; CHECK-T1-NEXT: .LBB1_3:
; CHECK-T1-NEXT: mov r0, r3
+; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: bne .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
; CHECK-T1-NEXT: mov r1, r2
-; CHECK-T1-NEXT: pop {r4, pc}
+; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-T2-LABEL: func64:
; CHECK-T2: @ %bb.0:
diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index 4eb82c80e2bff3..464808ec8861b3 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -7,209 +7,207 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; ARMV6: @ %bb.0: @ %start
; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ARMV6-NEXT: sub sp, sp, #28
-; ARMV6-NEXT: ldr r4, [sp, #72]
-; ARMV6-NEXT: mov r7, r0
-; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV6-NEXT: ldr r12, [sp, #64]
-; ARMV6-NEXT: umull r1, r0, r2, r4
-; ARMV6-NEXT: ldr r5, [sp, #68]
-; ARMV6-NEXT: str r1, [r7]
-; ARMV6-NEXT: ldr r1, [sp, #76]
-; ARMV6-NEXT: umull r7, r6, r1, r12
-; ARMV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV6-NEXT: umull r6, r9, r5, r4
-; ARMV6-NEXT: add r7, r6, r7
-; ARMV6-NEXT: umull r4, r6, r12, r4
-; ARMV6-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: mov r4, #0
-; ARMV6-NEXT: adds r8, r6, r7
-; ARMV6-NEXT: ldr r6, [sp, #80]
-; ARMV6-NEXT: adc r7, r4, #0
+; ARMV6-NEXT: ldr lr, [sp, #72]
+; ARMV6-NEXT: mov r6, r0
+; ARMV6-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARMV6-NEXT: ldr r4, [sp, #84]
-; ARMV6-NEXT: str r7, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT: umull r12, lr, r3, r6
-; ARMV6-NEXT: umull r11, r7, r4, r2
+; ARMV6-NEXT: umull r1, r0, r2, lr
+; ARMV6-NEXT: umull r5, r10, r4, r2
+; ARMV6-NEXT: str r1, [r6]
+; ARMV6-NEXT: ldr r6, [sp, #80]
+; ARMV6-NEXT: umull r1, r7, r3, r6
+; ARMV6-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; ARMV6-NEXT: add r1, r5, r1
+; ARMV6-NEXT: umull r7, r5, r6, r2
+; ARMV6-NEXT: mov r6, lr
+; ARMV6-NEXT: str r7, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: adds r1, r5, r1
+; ARMV6-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT: adc r1, r7, #0
+; ARMV6-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT: ldr r1, [sp, #64]
+; ARMV6-NEXT: ldr r7, [sp, #76]
+; ARMV6-NEXT: ldr r5, [sp, #64]
+; ARMV6-NEXT: umull r12, r9, r7, r1
+; ARMV6-NEXT: ldr r1, [sp, #68]
+; ARMV6-NEXT: umull r11, r8, r1, lr
; ARMV6-NEXT: add r12, r11, r12
-; ARMV6-NEXT: umull r11, r10, r6, r2
-; ARMV6-NEXT: adds r12, r10, r12
-; ARMV6-NEXT: mov r10, #0
-; ARMV6-NEXT: adc r6, r10, #0
+; ARMV6-NEXT: umull r11, lr, r5, lr
+; ARMV6-NEXT: mov r5, r6
+; ARMV6-NEXT: mov r6, #0
+; ARMV6-NEXT: adds r12, lr, r12
+; ARMV6-NEXT: umull r2, lr, r2, r7
+; ARMV6-NEXT: adc r6, r6, #0
; ARMV6-NEXT: str r6, [sp, #20] @ 4-byte Spill
; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: adds r6, r6, r11
-; ARMV6-NEXT: str r6, [sp, #12] @ 4-byte Spill
-; ARMV6-NEXT: adc r6, r8, r12
-; ARMV6-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #72]
+; ARMV6-NEXT: adds r11, r11, r6
+; ARMV6-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT: adc r6, r12, r6
; ARMV6-NEXT: mov r12, #0
-; ARMV6-NEXT: umull r2, r8, r2, r1
-; ARMV6-NEXT: umlal r0, r12, r3, r6
+; ARMV6-NEXT: umlal r0, r12, r3, r5
+; ARMV6-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; ARMV6-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT: ldr r6, [sp, #64]
; ARMV6-NEXT: adds r0, r2, r0
-; ARMV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; ARMV6-NEXT: adcs r8, r12, r8
-; ARMV6-NEXT: adc r12, r10, #0
-; ARMV6-NEXT: cmp lr, #0
-; ARMV6-NEXT: str r0, [r2, #4]
+; ARMV6-NEXT: str r0, [r5, #4]
+; ARMV6-NEXT: adcs r0, r12, lr
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: adc r2, r2, #0
+; ARMV6-NEXT: orrs lr, r6, r1
+; ARMV6-NEXT: ldr r6, [sp, #80]
; ARMV6-NEXT: movne lr, #1
-; ARMV6-NEXT: ldr r11, [sp, #8] @ 4-byte Reload
-; ARMV6-NEXT: cmp r7, #0
-; ARMV6-NEXT: movne r7, #1
-; ARMV6-NEXT: ldr r0, [sp, #64]
-; ARMV6-NEXT: cmp r11, #0
-; ARMV6-NEXT: umlal r8, r12, r3, r1
-; ARMV6-NEXT: movne r11, #1
+; ARMV6-NEXT: umlal r0, r2, r3, r7
+; ARMV6-NEXT: orrs r12, r6, r4
+; ARMV6-NEXT: movne r12, #1
; ARMV6-NEXT: cmp r9, #0
+; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; ARMV6-NEXT: movne r9, #1
-; ARMV6-NEXT: orrs r10, r0, r5
-; ARMV6-NEXT: ldr r0, [sp, #80]
+; ARMV6-NEXT: cmp r8, #0
+; ARMV6-NEXT: movne r8, #1
+; ARMV6-NEXT: cmp r6, #0
+; ARMV6-NEXT: movne r6, #1
+; ARMV6-NEXT: cmp r10, #0
; ARMV6-NEXT: movne r10, #1
-; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; ARMV6-NEXT: orrs r0, r0, r4
-; ARMV6-NEXT: movne r0, #1
+; ARMV6-NEXT: cmp r1, #0
+; ARMV6-NEXT: movne r1, #1
+; ARMV6-NEXT: cmp r7, #0
+; ARMV6-NEXT: movne r7, #1
; ARMV6-NEXT: cmp r4, #0
; ARMV6-NEXT: movne r4, #1
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: movne r3, #1
-; ARMV6-NEXT: cmp r5, #0
-; ARMV6-NEXT: movne r5, #1
-; ARMV6-NEXT: cmp r1, #0
-; ARMV6-NEXT: movne r1, #1
-; ARMV6-NEXT: adds r6, r8, r6
-; ARMV6-NEXT: str r6, [r2, #8]
-; ARMV6-NEXT: and r1, r5, r1
-; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT: adds r0, r0, r11
+; ARMV6-NEXT: str r0, [r5, #8]
+; ARMV6-NEXT: and r1, r1, r7
+; ARMV6-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT: orr r1, r1, r8
; ARMV6-NEXT: orr r1, r1, r9
-; ARMV6-NEXT: orr r1, r1, r11
-; ARMV6-NEXT: and r0, r10, r0
-; ARMV6-NEXT: adcs r6, r12, r6
-; ARMV6-NEXT: str r6, [r2, #12]
-; ARMV6-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r6
-; ARMV6-NEXT: orr r0, r0, r1
-; ARMV6-NEXT: and r1, r4, r3
-; ARMV6-NEXT: orr r1, r1, r7
-; ARMV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, lr
-; ARMV6-NEXT: orr r1, r1, r3
-; ARMV6-NEXT: orr r0, r0, r1
+; ARMV6-NEXT: adcs r0, r2, r0
+; ARMV6-NEXT: str r0, [r5, #12]
+; ARMV6-NEXT: and r0, r4, r3
+; ARMV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT: orr r0, r0, r10
+; ARMV6-NEXT: orr r0, r0, r6
+; ARMV6-NEXT: orr r0, r0, r2
+; ARMV6-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
+; ARMV6-NEXT: orr r1, r1, r2
+; ARMV6-NEXT: and r2, lr, r12
+; ARMV6-NEXT: orr r1, r2, r1
+; ARMV6-NEXT: orr r0, r1, r0
; ARMV6-NEXT: mov r1, #0
; ARMV6-NEXT: adc r1, r1, #0
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: and r0, r0, #1
-; ARMV6-NEXT: strb r0, [r2, #16]
+; ARMV6-NEXT: strb r0, [r5, #16]
; ARMV6-NEXT: add sp, sp, #28
; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; ARMV7-LABEL: muloti_test:
; ARMV7: @ %bb.0: @ %start
; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT: sub sp, sp, #44
-; ARMV7-NEXT: ldr r8, [sp, #88]
-; ARMV7-NEXT: mov r9, r0
-; ARMV7-NEXT: ldr r7, [sp, #96]
-; ARMV7-NEXT: ldr lr, [sp, #100]
-; ARMV7-NEXT: umull r0, r5, r2, r8
-; ARMV7-NEXT: ldr r4, [sp, #80]
+; ARMV7-NEXT: sub sp, sp, #36
+; ARMV7-NEXT: ldr r5, [sp, #84]
+; ARMV7-NEXT: mov r8, r0
+; ARMV7-NEXT: ldr r1, [sp, #72]
+; ARMV7-NEXT: ldr r10, [sp, #80]
+; ARMV7-NEXT: ldr r9, [sp, #76]
+; ARMV7-NEXT: umull r4, lr, r5, r1
+; ARMV7-NEXT: umull r0, r7, r2, r10
+; ARMV7-NEXT: str r4, [sp, #24] @ 4-byte Spill
+; ARMV7-NEXT: ldr r4, [sp, #88]
+; ARMV7-NEXT: umull r1, r6, r1, r10
; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: umull r1, r0, r3, r7
-; ARMV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r11, lr, r2
-; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT: umull r11, r0, r2, r5
+; ARMV7-NEXT: str r6, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT: umull r6, r12, r3, r4
; ARMV7-NEXT: ldr r1, [sp, #92]
-; ARMV7-NEXT: str r0, [sp] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r10, r7, r2
-; ARMV7-NEXT: mov r7, r1
-; ARMV7-NEXT: umull r6, r12, r1, r4
-; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; ARMV7-NEXT: ldr r0, [sp, #84]
-; ARMV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r1, r0, r8
+; ARMV7-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: mov r0, #0
+; ARMV7-NEXT: umlal r7, r0, r3, r10
; ARMV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r2, r2, r7
-; ARMV7-NEXT: mov r7, r4
-; ARMV7-NEXT: str r6, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: umull r6, r1, r1, r2
+; ARMV7-NEXT: umull r2, r4, r4, r2
+; ARMV7-NEXT: str r6, [sp, #4] @ 4-byte Spill
; ARMV7-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; ARMV7-NEXT: umull r2, r6, r4, r8
-; ARMV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
-; ARMV7-NEXT: mov r6, #0
-; ARMV7-NEXT: str r2, [r9]
-; ARMV7-NEXT: umlal r5, r6, r3, r8
+; ARMV7-NEXT: adds r2, r11, r7
+; ARMV7-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT: mov r11, #0
+; ARMV7-NEXT: str r4, [sp] @ 4-byte Spill
+; ARMV7-NEXT: umull r6, r4, r9, r10
+; ARMV7-NEXT: adcs r9, r0, r7
+; ARMV7-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT: adc r10, r11, #0
+; ARMV7-NEXT: stm r8, {r0, r2}
+; ARMV7-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; ARMV7-NEXT: umlal r9, r10, r3, r5
; ARMV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; ARMV7-NEXT: add r4, r4, r2
-; ARMV7-NEXT: adds r2, r10, r4
-; ARMV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: mov r2, #0
-; ARMV7-NEXT: adc r2, r2, #0
-; ARMV7-NEXT: cmp r12, #0
+; ARMV7-NEXT: add r0, r6, r0
+; ARMV7-NEXT: adds r0, r2, r0
+; ARMV7-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: adc r2, r11, #0
; ARMV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: movwne r12, #1
-; ARMV7-NEXT: cmp r1, #0
-; ARMV7-NEXT: ldr r2, [sp, #96]
-; ARMV7-NEXT: movwne r1, #1
-; ARMV7-NEXT: orrs r10, r7, r0
-; ARMV7-NEXT: movwne r10, #1
-; ARMV7-NEXT: orrs r7, r2, lr
-; ARMV7-NEXT: ldr r2, [sp, #92]
-; ARMV7-NEXT: movwne r7, #1
+; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; ARMV7-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
+; ARMV7-NEXT: add r2, r6, r2
+; ARMV7-NEXT: ldr r6, [sp] @ 4-byte Reload
+; ARMV7-NEXT: adds r2, r6, r2
+; ARMV7-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
+; ARMV7-NEXT: adc r11, r11, #0
+; ARMV7-NEXT: adds r7, r7, r6
+; ARMV7-NEXT: ldr r6, [sp, #92]
+; ARMV7-NEXT: adc r0, r0, r2
+; ARMV7-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT: ldr r0, [sp, #92]
+; ARMV7-NEXT: cmp r3, #0
+; ARMV7-NEXT: movwne r3, #1
+; ARMV7-NEXT: ldr r2, [sp, #76]
; ARMV7-NEXT: cmp r0, #0
; ARMV7-NEXT: movwne r0, #1
-; ARMV7-NEXT: cmp r2, #0
-; ARMV7-NEXT: mov r4, r2
-; ARMV7-NEXT: mov r8, r2
-; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; ARMV7-NEXT: movwne r4, #1
-; ARMV7-NEXT: and r0, r0, r4
-; ARMV7-NEXT: mov r4, #0
-; ARMV7-NEXT: adds r5, r2, r5
-; ARMV7-NEXT: str r5, [r9, #4]
+; ARMV7-NEXT: cmp r1, #0
+; ARMV7-NEXT: movwne r1, #1
+; ARMV7-NEXT: cmp r12, #0
+; ARMV7-NEXT: and r0, r0, r3
+; ARMV7-NEXT: movwne r12, #1
+; ARMV7-NEXT: cmp r5, #0
; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT: and r5, r10, r7
-; ARMV7-NEXT: orr r0, r0, r12
-; ARMV7-NEXT: mov r12, #0
-; ARMV7-NEXT: add r1, r2, r1
-; ARMV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; ARMV7-NEXT: adcs r2, r6, r2
-; ARMV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT: adc r7, r4, #0
-; ARMV7-NEXT: adds r1, r6, r1
-; ARMV7-NEXT: umlal r2, r7, r3, r8
-; ARMV7-NEXT: adc r4, r4, #0
-; ARMV7-NEXT: orr r0, r0, r4
-; ARMV7-NEXT: orr r0, r5, r0
-; ARMV7-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; ARMV7-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
-; ARMV7-NEXT: adds r5, r5, r4
-; ARMV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: adc r1, r1, r4
-; ARMV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: movwne r5, #1
+; ARMV7-NEXT: cmp r2, #0
+; ARMV7-NEXT: mov r1, r2
+; ARMV7-NEXT: mov r3, r2
+; ARMV7-NEXT: movwne r1, #1
; ARMV7-NEXT: cmp r4, #0
+; ARMV7-NEXT: ldr r2, [sp, #72]
; ARMV7-NEXT: movwne r4, #1
-; ARMV7-NEXT: cmp r3, #0
-; ARMV7-NEXT: movwne r3, #1
; ARMV7-NEXT: cmp lr, #0
+; ARMV7-NEXT: and r1, r1, r5
; ARMV7-NEXT: movwne lr, #1
-; ARMV7-NEXT: cmp r11, #0
-; ARMV7-NEXT: movwne r11, #1
-; ARMV7-NEXT: adds r2, r2, r5
-; ARMV7-NEXT: and r3, lr, r3
-; ARMV7-NEXT: str r2, [r9, #8]
-; ARMV7-NEXT: adcs r1, r7, r1
-; ARMV7-NEXT: str r1, [r9, #12]
-; ARMV7-NEXT: orr r1, r3, r11
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT: orrs r2, r2, r3
+; ARMV7-NEXT: ldr r3, [sp, #88]
+; ARMV7-NEXT: movwne r2, #1
; ARMV7-NEXT: orr r1, r1, r4
-; ARMV7-NEXT: orr r1, r1, r2
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: adc r1, r12, #0
+; ARMV7-NEXT: orr r0, r0, r12
+; ARMV7-NEXT: orrs r3, r3, r6
+; ARMV7-NEXT: orr r1, r1, lr
+; ARMV7-NEXT: movwne r3, #1
+; ARMV7-NEXT: adds r7, r9, r7
+; ARMV7-NEXT: str r7, [r8, #8]
+; ARMV7-NEXT: and r2, r2, r3
+; ARMV7-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
+; ARMV7-NEXT: orr r0, r0, r11
+; ARMV7-NEXT: adcs r7, r10, r7
+; ARMV7-NEXT: str r7, [r8, #12]
+; ARMV7-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT: orr r1, r1, r7
+; ARMV7-NEXT: orr r1, r2, r1
+; ARMV7-NEXT: orr r0, r1, r0
+; ARMV7-NEXT: mov r1, #0
+; ARMV7-NEXT: adc r1, r1, #0
; ARMV7-NEXT: orr r0, r0, r1
; ARMV7-NEXT: and r0, r0, #1
-; ARMV7-NEXT: strb r0, [r9, #16]
-; ARMV7-NEXT: add sp, sp, #44
+; ARMV7-NEXT: strb r0, [r8, #16]
+; ARMV7-NEXT: add sp, sp, #36
; ARMV7-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
index 64d98314429707..ddf033b19b949d 100644
--- a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
@@ -5,49 +5,50 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; ARMV6-LABEL: mulodi_test:
; ARMV6: @ %bb.0: @ %start
-; ARMV6-NEXT: push {r4, r5, r11, lr}
-; ARMV6-NEXT: umull r12, lr, r1, r2
-; ARMV6-NEXT: umull r4, r5, r3, r0
-; ARMV6-NEXT: cmp lr, #0
-; ARMV6-NEXT: movne lr, #1
+; ARMV6-NEXT: push {r4, r5, r6, lr}
+; ARMV6-NEXT: umull r12, lr, r3, r0
+; ARMV6-NEXT: mov r6, #0
+; ARMV6-NEXT: umull r4, r5, r1, r2
+; ARMV6-NEXT: umull r0, r2, r0, r2
+; ARMV6-NEXT: add r4, r4, r12
+; ARMV6-NEXT: adds r12, r2, r4
+; ARMV6-NEXT: adc r2, r6, #0
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: movne r3, #1
; ARMV6-NEXT: cmp r1, #0
-; ARMV6-NEXT: umull r0, r2, r0, r2
; ARMV6-NEXT: movne r1, #1
-; ARMV6-NEXT: and r1, r1, r3
; ARMV6-NEXT: cmp r5, #0
-; ARMV6-NEXT: orr r1, r1, lr
+; ARMV6-NEXT: and r1, r1, r3
; ARMV6-NEXT: movne r5, #1
-; ARMV6-NEXT: orr r3, r1, r5
-; ARMV6-NEXT: add r1, r12, r4
-; ARMV6-NEXT: adds r1, r2, r1
-; ARMV6-NEXT: mov r5, #0
-; ARMV6-NEXT: adc r2, r5, #0
-; ARMV6-NEXT: orr r2, r3, r2
-; ARMV6-NEXT: pop {r4, r5, r11, pc}
+; ARMV6-NEXT: cmp lr, #0
+; ARMV6-NEXT: orr r1, r1, r5
+; ARMV6-NEXT: movne lr, #1
+; ARMV6-NEXT: orr r1, r1, lr
+; ARMV6-NEXT: orr r2, r1, r2
+; ARMV6-NEXT: mov r1, r12
+; ARMV6-NEXT: pop {r4, r5, r6, pc}
;
; ARMV7-LABEL: mulodi_test:
; ARMV7: @ %bb.0: @ %start
; ARMV7-NEXT: push {r4, r5, r11, lr}
-; ARMV7-NEXT: umull r12, lr, r3, r0
+; ARMV7-NEXT: umull r12, lr, r1, r2
; ARMV7-NEXT: cmp r3, #0
+; ARMV7-NEXT: umull r4, r5, r3, r0
; ARMV7-NEXT: movwne r3, #1
; ARMV7-NEXT: cmp r1, #0
-; ARMV7-NEXT: umull r0, r4, r0, r2
-; ARMV7-NEXT: umull r2, r5, r1, r2
; ARMV7-NEXT: movwne r1, #1
+; ARMV7-NEXT: umull r0, r2, r0, r2
+; ARMV7-NEXT: cmp lr, #0
; ARMV7-NEXT: and r1, r1, r3
+; ARMV7-NEXT: movwne lr, #1
; ARMV7-NEXT: cmp r5, #0
+; ARMV7-NEXT: orr r1, r1, lr
; ARMV7-NEXT: movwne r5, #1
-; ARMV7-NEXT: cmp lr, #0
-; ARMV7-NEXT: orr r1, r1, r5
-; ARMV7-NEXT: movwne lr, #1
-; ARMV7-NEXT: orr r3, r1, lr
-; ARMV7-NEXT: add r1, r2, r12
-; ARMV7-NEXT: mov r2, #0
-; ARMV7-NEXT: adds r1, r4, r1
-; ARMV7-NEXT: adc r2, r2, #0
+; ARMV7-NEXT: orr r3, r1, r5
+; ARMV7-NEXT: add r1, r12, r4
+; ARMV7-NEXT: mov r5, #0
+; ARMV7-NEXT: adds r1, r2, r1
+; ARMV7-NEXT: adc r2, r5, #0
; ARMV7-NEXT: orr r2, r3, r2
; ARMV7-NEXT: pop {r4, r5, r11, pc}
start:
diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll
index 73e6dafc085903..9c2fd3966ea984 100644
--- a/llvm/test/CodeGen/ARM/usub_sat.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat.ll
@@ -49,15 +49,16 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T1-NEXT: adcs r0, r1
; CHECK-T1-NEXT: movs r3, #1
; CHECK-T1-NEXT: eors r3, r0
-; CHECK-T1-NEXT: cmp r3, #0
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: beq .LBB1_3
; CHECK-T1-NEXT: @ %bb.1:
+; CHECK-T1-NEXT: cmp r3, #0
; CHECK-T1-NEXT: beq .LBB1_4
; CHECK-T1-NEXT: .LBB1_2:
; CHECK-T1-NEXT: pop {r4, pc}
; CHECK-T1-NEXT: .LBB1_3:
; CHECK-T1-NEXT: mov r0, r2
+; CHECK-T1-NEXT: cmp r3, #0
; CHECK-T1-NEXT: bne .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
; CHECK-T1-NEXT: mov r1, r4
@@ -69,7 +70,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T2-NEXT: mov.w r12, #0
; CHECK-T2-NEXT: sbcs r1, r3
; CHECK-T2-NEXT: adc r2, r12, #0
-; CHECK-T2-NEXT: teq.w r2, #1
+; CHECK-T2-NEXT: eors r2, r2, #1
; CHECK-T2-NEXT: itt ne
; CHECK-T2-NEXT: movne r0, #0
; CHECK-T2-NEXT: movne r1, #0
@@ -81,7 +82,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-ARM-NEXT: mov r12, #0
; CHECK-ARM-NEXT: sbcs r1, r1, r3
; CHECK-ARM-NEXT: adc r2, r12, #0
-; CHECK-ARM-NEXT: teq r2, #1
+; CHECK-ARM-NEXT: eors r2, r2, #1
; CHECK-ARM-NEXT: movwne r0, #0
; CHECK-ARM-NEXT: movwne r1, #0
; CHECK-ARM-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
index a465a413c6d0ed..51ec83c707603b 100644
--- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
@@ -55,15 +55,16 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-T1-NEXT: adcs r0, r1
; CHECK-T1-NEXT: movs r4, #1
; CHECK-T1-NEXT: eors r4, r0
-; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: beq .LBB1_3
; CHECK-T1-NEXT: @ %bb.1:
+; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: beq .LBB1_4
; CHECK-T1-NEXT: .LBB1_2:
; CHECK-T1-NEXT: pop {r4, pc}
; CHECK-T1-NEXT: .LBB1_3:
; CHECK-T1-NEXT: mov r0, r3
+; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: bne .LBB1_2
; CHECK-T1-NEXT: .LBB1_4:
; CHECK-T1-NEXT: mov r1, r2
@@ -76,7 +77,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-T2-NEXT: subs r0, r0, r2
; CHECK-T2-NEXT: sbcs r1, r3
; CHECK-T2-NEXT: adc r2, r12, #0
-; CHECK-T2-NEXT: teq.w r2, #1
+; CHECK-T2-NEXT: eors r2, r2, #1
; CHECK-T2-NEXT: itt ne
; CHECK-T2-NEXT: movne r0, #0
; CHECK-T2-NEXT: movne r1, #0
@@ -90,7 +91,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-ARM-NEXT: subs r0, r0, r2
; CHECK-ARM-NEXT: sbcs r1, r1, r3
; CHECK-ARM-NEXT: adc r2, r12, #0
-; CHECK-ARM-NEXT: teq r2, #1
+; CHECK-ARM-NEXT: eors r2, r2, #1
; CHECK-ARM-NEXT: movwne r0, #0
; CHECK-ARM-NEXT: movwne r1, #0
; CHECK-ARM-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index bd5e3061f0d180..9f0edb7117bd15 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -111,8 +111,8 @@ define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
; CHECK-LABEL: func_blend18:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
; CHECK-NEXT: vmov r4, r6, d16
@@ -122,6 +122,7 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vmov r2, r1, d20
; CHECK-NEXT: subs r2, r2, lr
+; CHECK-NEXT: vmov r7, lr, d17
; CHECK-NEXT: vmov r2, r5, d22
; CHECK-NEXT: sbcs r1, r1, r12
; CHECK-NEXT: mov r1, #0
@@ -130,34 +131,33 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: mvnne r1, #0
; CHECK-NEXT: subs r2, r2, r4
; CHECK-NEXT: sbcs r6, r5, r6
-; CHECK-NEXT: vmov r2, r12, d17
-; CHECK-NEXT: vmov r5, r4, d23
+; CHECK-NEXT: vmov r2, r12, d19
+; CHECK-NEXT: vmov r5, r4, d21
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvnne r6, #0
; CHECK-NEXT: subs r2, r5, r2
-; CHECK-NEXT: sbcs r2, r4, r12
-; CHECK-NEXT: vmov lr, r12, d19
-; CHECK-NEXT: vmov r4, r5, d21
+; CHECK-NEXT: sbcs r4, r4, r12
; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: vmov r4, r5, d23
; CHECK-NEXT: movlt r2, #1
+; CHECK-NEXT: subs r7, r4, r7
+; CHECK-NEXT: sbcs r7, r5, lr
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vdup.32 d25, r0
; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d25, r2
; CHECK-NEXT: vdup.32 d24, r6
+; CHECK-NEXT: vdup.32 d27, r2
; CHECK-NEXT: vbit q8, q11, q12
-; CHECK-NEXT: subs r4, r4, lr
-; CHECK-NEXT: sbcs r5, r5, r12
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d27, r0
; CHECK-NEXT: vdup.32 d26, r1
; CHECK-NEXT: vbit q9, q10, q13
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]!
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
-; CHECK-NEXT: pop {r4, r5, r6, lr}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: mov pc, lr
; COST: func_blend18
; COST: cost of 0 {{.*}} icmp
@@ -198,21 +198,12 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: mvnne r12, #0
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: sbcs r0, r4, r0
-; CHECK-NEXT: vmov r2, r4, d24
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d1, r0
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs r0, r4, r1
; CHECK-NEXT: vmov r2, r4, d26
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d0, r0
+; CHECK-NEXT: vdup.32 d1, r0
; CHECK-NEXT: vmov r0, r1, d22
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: mov r2, #0
@@ -233,15 +224,14 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: vmov r0, r1, d28
; CHECK-NEXT: subs r0, r4, r0
; CHECK-NEXT: sbcs r0, r5, r1
-; CHECK-NEXT: vmov r4, r5, d27
+; CHECK-NEXT: vmov r4, r5, d24
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vdup.32 d2, r0
-; CHECK-NEXT: vmov r0, r1, d23
+; CHECK-NEXT: vmov r0, r1, d20
; CHECK-NEXT: vbit q14, q15, q1
-; CHECK-NEXT: vbit q10, q12, q0
; CHECK-NEXT: subs r0, r4, r0
; CHECK-NEXT: sbcs r0, r5, r1
; CHECK-NEXT: vmov r1, r4, d17
@@ -250,17 +240,27 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d31, r0
-; CHECK-NEXT: vdup.32 d30, r2
-; CHECK-NEXT: vbit q11, q13, q15
-; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]!
+; CHECK-NEXT: vdup.32 d0, r0
+; CHECK-NEXT: vbit q10, q12, q0
; CHECK-NEXT: subs r1, r5, r1
; CHECK-NEXT: sbcs r1, r6, r4
+; CHECK-NEXT: vmov r4, r5, d27
+; CHECK-NEXT: vmov r0, r1, d23
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movlt r6, #1
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r1
; CHECK-NEXT: movlt lr, #1
; CHECK-NEXT: cmp lr, #0
; CHECK-NEXT: mvnne lr, #0
-; CHECK-NEXT: vdup.32 d3, lr
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vdup.32 d31, lr
+; CHECK-NEXT: mvnne r6, #0
+; CHECK-NEXT: vdup.32 d30, r2
+; CHECK-NEXT: vdup.32 d3, r6
+; CHECK-NEXT: vbit q11, q13, q15
; CHECK-NEXT: vdup.32 d2, r12
+; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]!
; CHECK-NEXT: vbit q8, q9, q1
; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]!
; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]!
@@ -283,194 +283,198 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
; CHECK-LABEL: func_blend20:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: add r8, r1, #64
-; CHECK-NEXT: add lr, r0, #64
-; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: mov lr, r0
+; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]!
+; CHECK-NEXT: add r9, r0, #64
+; CHECK-NEXT: add r10, r1, #64
; CHECK-NEXT: mov r12, #0
-; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
-; CHECK-NEXT: vmov r4, r5, d17
-; CHECK-NEXT: vmov r6, r7, d25
-; CHECK-NEXT: vld1.64 {d18, d19}, [lr:128]!
-; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]!
-; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]!
-; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]!
-; CHECK-NEXT: subs r4, r6, r4
-; CHECK-NEXT: sbcs r4, r7, r5
-; CHECK-NEXT: vmov r5, r6, d16
-; CHECK-NEXT: vmov r7, r2, d24
+; CHECK-NEXT: vld1.64 {d22, d23}, [lr:128]!
+; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128]!
+; CHECK-NEXT: vld1.64 {d20, d21}, [lr:128]!
+; CHECK-NEXT: vmov r6, r4, d19
+; CHECK-NEXT: vmov r5, r7, d21
+; CHECK-NEXT: vld1.64 {d4, d5}, [r9:128]!
+; CHECK-NEXT: vld1.64 {d6, d7}, [r10:128]!
+; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128]!
+; CHECK-NEXT: vld1.64 {d2, d3}, [r9:128]!
+; CHECK-NEXT: subs r6, r5, r6
+; CHECK-NEXT: sbcs r4, r7, r4
+; CHECK-NEXT: vmov r5, r6, d18
+; CHECK-NEXT: vmov r7, r2, d20
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vdup.32 d27, r4
+; CHECK-NEXT: vdup.32 d31, r4
; CHECK-NEXT: subs r5, r7, r5
; CHECK-NEXT: sbcs r2, r2, r6
-; CHECK-NEXT: vmov r5, r6, d1
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d26, r2
-; CHECK-NEXT: vmov r2, r4, d23
-; CHECK-NEXT: vbit q8, q12, q13
-; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
-; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]!
-; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]!
-; CHECK-NEXT: subs r2, r5, r2
-; CHECK-NEXT: sbcs r2, r6, r4
-; CHECK-NEXT: vmov r4, r5, d22
-; CHECK-NEXT: vmov r6, r7, d0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d3, r2
-; CHECK-NEXT: subs r4, r6, r4
-; CHECK-NEXT: sbcs r4, r7, r5
-; CHECK-NEXT: vmov r2, r5, d27
-; CHECK-NEXT: vmov r6, r7, d25
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: movlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vdup.32 d2, r4
-; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
-; CHECK-NEXT: vmov r6, r7, d24
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d5, r2
-; CHECK-NEXT: vmov r2, r5, d26
-; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
-; CHECK-NEXT: vmov r6, r7, d19
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d4, r2
-; CHECK-NEXT: vmov r2, r5, d21
-; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
-; CHECK-NEXT: vmov r6, r7, d18
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d31, r2
-; CHECK-NEXT: vmov r2, r5, d20
-; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
+; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: vdup.32 d30, r2
-; CHECK-NEXT: vbif q9, q10, q15
-; CHECK-NEXT: vld1.64 {d30, d31}, [r8:128]!
-; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]
-; CHECK-NEXT: vbit q13, q12, q2
-; CHECK-NEXT: vld1.64 {d24, d25}, [lr:128]
-; CHECK-NEXT: vmov r2, r7, d21
-; CHECK-NEXT: vbit q11, q0, q1
+; CHECK-NEXT: vmov r0, r2, d1
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r4, r5, d2
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d9, r0
+; CHECK-NEXT: vmov r0, r2, d0
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r4, r5, d5
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d8, r0
+; CHECK-NEXT: vmov r0, r2, d7
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r4, r5, d4
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d11, r0
+; CHECK-NEXT: vmov r0, r2, d6
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r4, r5, d23
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d10, r0
+; CHECK-NEXT: vmov r0, r2, d17
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r4, r5, d22
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d25, r0
+; CHECK-NEXT: vmov r0, r2, d16
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d24, r0
+; CHECK-NEXT: vorr q13, q12, q12
+; CHECK-NEXT: vbsl q13, q11, q8
+; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]!
+; CHECK-NEXT: vorr q8, q5, q5
+; CHECK-NEXT: vld1.64 {d28, d29}, [r10:128]!
+; CHECK-NEXT: vbsl q8, q2, q3
+; CHECK-NEXT: vld1.64 {d6, d7}, [r8:128]!
+; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]
+; CHECK-NEXT: vld1.64 {d4, d5}, [lr:128]!
+; CHECK-NEXT: vbif q10, q9, q15
+; CHECK-NEXT: vorr q9, q4, q4
+; CHECK-NEXT: vmov r0, r2, d22
+; CHECK-NEXT: vbsl q9, q1, q0
+; CHECK-NEXT: vld1.64 {d30, d31}, [lr:128]
; CHECK-NEXT: mov lr, #0
-; CHECK-NEXT: vmov r6, r5, d25
-; CHECK-NEXT: vld1.64 {d4, d5}, [r1:128]!
-; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]!
-; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128]
-; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128]
-; CHECK-NEXT: subs r1, r6, r2
-; CHECK-NEXT: vmov r0, r6, d2
-; CHECK-NEXT: sbcs r1, r5, r7
-; CHECK-NEXT: vmov r2, r7, d0
+; CHECK-NEXT: vmov r7, r5, d30
+; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128]
+; CHECK-NEXT: vld1.64 {d2, d3}, [r10:128]
+; CHECK-NEXT: subs r0, r7, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r5, r4, d24
+; CHECK-NEXT: vmov r0, r7, d28
; CHECK-NEXT: movlt lr, #1
; CHECK-NEXT: cmp lr, #0
; CHECK-NEXT: mvnne lr, #0
-; CHECK-NEXT: subs r0, r0, r2
-; CHECK-NEXT: sbcs r0, r6, r7
-; CHECK-NEXT: vmov r2, r7, d30
-; CHECK-NEXT: vmov r6, r5, d28
+; CHECK-NEXT: subs r0, r5, r0
+; CHECK-NEXT: sbcs r0, r4, r7
+; CHECK-NEXT: vmov r7, r5, d29
+; CHECK-NEXT: vmov r4, r6, d25
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r5, r7
-; CHECK-NEXT: vmov r7, r6, d31
-; CHECK-NEXT: vmov r5, r4, d29
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r7, r5, r7
-; CHECK-NEXT: vmov r5, r1, d7
-; CHECK-NEXT: sbcs r7, r4, r6
+; CHECK-NEXT: subs r7, r4, r7
; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: vmov r7, r6, d5
+; CHECK-NEXT: sbcs r7, r6, r5
+; CHECK-NEXT: vmov r5, r1, d31
+; CHECK-NEXT: vmov r7, r6, d23
; CHECK-NEXT: movlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: subs r5, r5, r7
+; CHECK-NEXT: subs r7, r5, r7
+; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: sbcs r1, r1, r6
-; CHECK-NEXT: vmov r6, r7, d6
+; CHECK-NEXT: vmov r6, r2, d5
+; CHECK-NEXT: vmov r1, r7, d7
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mvnne r5, #0
+; CHECK-NEXT: subs r1, r6, r1
+; CHECK-NEXT: sbcs r1, r2, r7
+; CHECK-NEXT: vmov r6, r7, d4
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvnne r1, #0
; CHECK-NEXT: vdup.32 d9, r1
-; CHECK-NEXT: vmov r1, r5, d4
+; CHECK-NEXT: vmov r1, r2, d6
; CHECK-NEXT: subs r1, r6, r1
-; CHECK-NEXT: sbcs r1, r7, r5
-; CHECK-NEXT: vmov r6, r7, d3
+; CHECK-NEXT: sbcs r1, r7, r2
+; CHECK-NEXT: vmov r6, r7, d0
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvnne r1, #0
; CHECK-NEXT: vdup.32 d8, r1
-; CHECK-NEXT: vmov r1, r5, d1
-; CHECK-NEXT: vbit q2, q3, q4
+; CHECK-NEXT: vmov r1, r2, d2
+; CHECK-NEXT: vbif q2, q3, q4
+; CHECK-NEXT: vdup.32 d7, r5
; CHECK-NEXT: vdup.32 d9, r4
-; CHECK-NEXT: vdup.32 d8, r2
+; CHECK-NEXT: vmov r4, r5, d1
+; CHECK-NEXT: vdup.32 d8, r0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]!
+; CHECK-NEXT: vbif q12, q14, q4
+; CHECK-NEXT: vdup.32 d6, lr
+; CHECK-NEXT: vbit q11, q15, q3
+; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]!
; CHECK-NEXT: subs r1, r6, r1
-; CHECK-NEXT: sbcs r1, r7, r5
-; CHECK-NEXT: vmov r5, r6, d24
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: vdup.32 d7, r1
-; CHECK-NEXT: vmov r1, r4, d20
-; CHECK-NEXT: vdup.32 d6, r0
-; CHECK-NEXT: subs r1, r5, r1
-; CHECK-NEXT: mov r1, r3
-; CHECK-NEXT: sbcs r0, r6, r4
-; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]!
-; CHECK-NEXT: vorr q8, q4, q4
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: sbcs r1, r7, r2
+; CHECK-NEXT: vmov r1, r2, d3
+; CHECK-NEXT: movlt r6, #1
+; CHECK-NEXT: subs r1, r4, r1
+; CHECK-NEXT: sbcs r1, r5, r2
; CHECK-NEXT: movlt r12, #1
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: vbsl q8, q14, q15
-; CHECK-NEXT: vdup.32 d29, lr
-; CHECK-NEXT: vorr q15, q3, q3
; CHECK-NEXT: mvnne r12, #0
-; CHECK-NEXT: vdup.32 d28, r12
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vdup.32 d27, r12
+; CHECK-NEXT: mvnne r6, #0
+; CHECK-NEXT: vdup.32 d26, r6
+; CHECK-NEXT: vorr q10, q13, q13
+; CHECK-NEXT: vbsl q10, q0, q1
+; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128]!
+; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]
; CHECK-NEXT: add r0, r3, #64
-; CHECK-NEXT: vbsl q15, q1, q0
-; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]!
-; CHECK-NEXT: vbit q10, q12, q14
-; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]!
-; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]!
-; CHECK-NEXT: vst1.64 {d4, d5}, [r1:128]!
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]!
-; CHECK-NEXT: vst1.64 {d30, d31}, [r1:128]
+; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128]!
; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: mov pc, lr
; COST: func_blend20
; COST: cost of 0 {{.*}} icmp
diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll
index 9acf8d249ddf18..09e3592b6d420e 100644
--- a/llvm/test/CodeGen/ARM/wide-compares.ll
+++ b/llvm/test/CodeGen/ARM/wide-compares.ll
@@ -129,16 +129,19 @@ declare void @g()
define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
; CHECK-ARM-LABEL: test_slt_select:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: push {r4, r5, r6, lr}
-; CHECK-ARM-NEXT: ldr r12, [sp, #24]
-; CHECK-ARM-NEXT: ldr lr, [sp, #16]
-; CHECK-ARM-NEXT: ldr r6, [sp, #28]
-; CHECK-ARM-NEXT: ldr r5, [sp, #20]
+; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-ARM-NEXT: ldr r12, [sp, #32]
+; CHECK-ARM-NEXT: mov r6, #0
+; CHECK-ARM-NEXT: ldr lr, [sp, #24]
+; CHECK-ARM-NEXT: ldr r7, [sp, #36]
+; CHECK-ARM-NEXT: ldr r5, [sp, #28]
; CHECK-ARM-NEXT: subs r4, lr, r12
-; CHECK-ARM-NEXT: sbcs r6, r5, r6
-; CHECK-ARM-NEXT: movhs r0, r2
-; CHECK-ARM-NEXT: movhs r1, r3
-; CHECK-ARM-NEXT: pop {r4, r5, r6, pc}
+; CHECK-ARM-NEXT: sbcs r7, r5, r7
+; CHECK-ARM-NEXT: movwlo r6, #1
+; CHECK-ARM-NEXT: cmp r6, #0
+; CHECK-ARM-NEXT: moveq r0, r2
+; CHECK-ARM-NEXT: moveq r1, r3
+; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-THUMB1-NOMOV-LABEL: test_slt_select:
; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry
@@ -154,13 +157,22 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
; CHECK-THUMB1-NOMOV-NEXT: sbcs r5, r4
; CHECK-THUMB1-NOMOV-NEXT: blo .LBB2_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT: mov r12, r2
-; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
-; CHECK-THUMB1-NOMOV-NEXT: .LBB2_2: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT: blo .LBB2_4
-; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT: movs r1, r3
+; CHECK-THUMB1-NOMOV-NEXT: movs r4, #0
+; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0
+; CHECK-THUMB1-NOMOV-NEXT: beq .LBB2_3
+; CHECK-THUMB1-NOMOV-NEXT: b .LBB2_4
+; CHECK-THUMB1-NOMOV-NEXT: .LBB2_2:
+; CHECK-THUMB1-NOMOV-NEXT: movs r4, #1
+; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0
+; CHECK-THUMB1-NOMOV-NEXT: bne .LBB2_4
+; CHECK-THUMB1-NOMOV-NEXT: .LBB2_3: @ %entry
+; CHECK-THUMB1-NOMOV-NEXT: movs r0, r2
; CHECK-THUMB1-NOMOV-NEXT: .LBB2_4: @ %entry
+; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0
+; CHECK-THUMB1-NOMOV-NEXT: bne .LBB2_6
+; CHECK-THUMB1-NOMOV-NEXT: @ %bb.5: @ %entry
+; CHECK-THUMB1-NOMOV-NEXT: movs r1, r3
+; CHECK-THUMB1-NOMOV-NEXT: .LBB2_6: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: add sp, #4
; CHECK-THUMB1-NOMOV-NEXT: pop {r4, r5, r6, r7}
; CHECK-THUMB1-NOMOV-NEXT: pop {r2}
@@ -176,31 +188,46 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
; CHECK-THUMB1-NEXT: ldr r7, [sp, #24]
; CHECK-THUMB1-NEXT: subs r6, r7, r6
; CHECK-THUMB1-NEXT: sbcs r5, r4
-; CHECK-THUMB1-NEXT: bhs .LBB2_3
+; CHECK-THUMB1-NEXT: blo .LBB2_2
; CHECK-THUMB1-NEXT: @ %bb.1: @ %entry
-; CHECK-THUMB1-NEXT: bhs .LBB2_4
-; CHECK-THUMB1-NEXT: .LBB2_2: @ %entry
-; CHECK-THUMB1-NEXT: add sp, #4
-; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-THUMB1-NEXT: movs r4, #0
+; CHECK-THUMB1-NEXT: cmp r4, #0
+; CHECK-THUMB1-NEXT: beq .LBB2_3
+; CHECK-THUMB1-NEXT: b .LBB2_4
+; CHECK-THUMB1-NEXT: .LBB2_2:
+; CHECK-THUMB1-NEXT: movs r4, #1
+; CHECK-THUMB1-NEXT: cmp r4, #0
+; CHECK-THUMB1-NEXT: bne .LBB2_4
; CHECK-THUMB1-NEXT: .LBB2_3: @ %entry
; CHECK-THUMB1-NEXT: mov r0, r2
-; CHECK-THUMB1-NEXT: blo .LBB2_2
; CHECK-THUMB1-NEXT: .LBB2_4: @ %entry
+; CHECK-THUMB1-NEXT: cmp r4, #0
+; CHECK-THUMB1-NEXT: beq .LBB2_6
+; CHECK-THUMB1-NEXT: @ %bb.5: @ %entry
+; CHECK-THUMB1-NEXT: add sp, #4
+; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-THUMB1-NEXT: .LBB2_6: @ %entry
; CHECK-THUMB1-NEXT: mov r1, r3
; CHECK-THUMB1-NEXT: add sp, #4
; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
;
; CHECK-THUMB2-LABEL: test_slt_select:
; CHECK-THUMB2: @ %bb.0: @ %entry
-; CHECK-THUMB2-NEXT: push {r4, r5, r6, lr}
-; CHECK-THUMB2-NEXT: ldrd r12, r6, [sp, #24]
-; CHECK-THUMB2-NEXT: ldrd lr, r5, [sp, #16]
+; CHECK-THUMB2-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-THUMB2-NEXT: sub sp, #4
+; CHECK-THUMB2-NEXT: ldrd r12, r7, [sp, #32]
+; CHECK-THUMB2-NEXT: movs r6, #0
+; CHECK-THUMB2-NEXT: ldrd lr, r5, [sp, #24]
; CHECK-THUMB2-NEXT: subs.w r4, lr, r12
-; CHECK-THUMB2-NEXT: sbcs.w r6, r5, r6
-; CHECK-THUMB2-NEXT: itt hs
-; CHECK-THUMB2-NEXT: movhs r0, r2
-; CHECK-THUMB2-NEXT: movhs r1, r3
-; CHECK-THUMB2-NEXT: pop {r4, r5, r6, pc}
+; CHECK-THUMB2-NEXT: sbcs.w r7, r5, r7
+; CHECK-THUMB2-NEXT: it lo
+; CHECK-THUMB2-NEXT: movlo r6, #1
+; CHECK-THUMB2-NEXT: cmp r6, #0
+; CHECK-THUMB2-NEXT: itt eq
+; CHECK-THUMB2-NEXT: moveq r0, r2
+; CHECK-THUMB2-NEXT: moveq r1, r3
+; CHECK-THUMB2-NEXT: add sp, #4
+; CHECK-THUMB2-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp = icmp ult i64 %a, %b
%r1 = select i1 %cmp, i64 %c, i64 %d
diff --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index 9d69417e8f6b5b..fdfbf3393098e4 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -501,8 +501,9 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #48]
; CHECK-NEXT: adds r1, #64
-; CHECK-NEXT: adds r0, #32
; CHECK-NEXT: subs r3, r3, #4
+; CHECK-NEXT: adds r0, #32
+; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: bne .LBB1_5
; CHECK-NEXT: .LBB1_6: @ %while.end
; CHECK-NEXT: movs r2, #3
diff --git a/llvm/test/CodeGen/Thumb/select.ll b/llvm/test/CodeGen/Thumb/select.ll
index 89cc4c10fe51d6..0065616c4646e9 100644
--- a/llvm/test/CodeGen/Thumb/select.ll
+++ b/llvm/test/CodeGen/Thumb/select.ll
@@ -94,8 +94,8 @@ entry:
; CHECK-LABEL: f8:
; CHECK: cmp r0, r1
; CHECK: blt
-; CHECK: mov
-; CHECK: mov
+; CHECK: movs
+; CHECK: cmp r0, r1
; CHECK: blt
; CHECK: movs
; CHECK: movs
diff --git a/llvm/test/CodeGen/Thumb/smul_fix_sat.ll b/llvm/test/CodeGen/Thumb/smul_fix_sat.ll
index 8bc39ea0370a60..f8557419c41990 100644
--- a/llvm/test/CodeGen/Thumb/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/Thumb/smul_fix_sat.ll
@@ -49,24 +49,25 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; ARM-NEXT: sub sp, #36
; ARM-NEXT: str r3, [sp, #28] @ 4-byte Spill
; ARM-NEXT: mov r6, r1
-; ARM-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARM-NEXT: movs r4, #0
+; ARM-NEXT: str r4, [sp, #32] @ 4-byte Spill
; ARM-NEXT: mov r5, r0
-; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r7, r2
-; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; ARM-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill
; ARM-NEXT: mov r0, r6
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r1
-; ARM-NEXT: ldr r1, [sp, #32] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; ARM-NEXT: adds r7, r0, r1
; ARM-NEXT: adcs r6, r4
; ARM-NEXT: mov r0, r5
@@ -76,108 +77,118 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r7
-; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
; ARM-NEXT: adcs r1, r4
; ARM-NEXT: adds r0, r6, r1
-; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r6, r4
; ARM-NEXT: adcs r6, r4
-; ARM-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r5
-; ARM-NEXT: str r4, [sp, #32] @ 4-byte Spill
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r5, r1
-; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
-; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: adcs r5, r6
-; ARM-NEXT: mov r4, r7
; ARM-NEXT: asrs r2, r7, #31
-; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; ARM-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
-; ARM-NEXT: mov r1, r7
+; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; ARM-NEXT: ldr r4, [sp, #28] @ 4-byte Reload
+; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r3, r2
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r0
-; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; ARM-NEXT: asrs r0, r7, #31
+; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: asrs r0, r4, #31
; ARM-NEXT: mov r1, r0
-; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; ARM-NEXT: mov r3, r4
+; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
+; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r6
-; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
-; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; ARM-NEXT: adds r3, r2, r0
+; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: adds r0, r2, r0
; ARM-NEXT: adcs r1, r5
-; ARM-NEXT: rsbs r2, r1, #0
-; ARM-NEXT: adcs r2, r1
-; ARM-NEXT: movs r0, #1
-; ARM-NEXT: cmp r3, #1
-; ARM-NEXT: mov r5, r0
+; ARM-NEXT: rsbs r5, r1, #0
+; ARM-NEXT: adcs r5, r1
+; ARM-NEXT: movs r2, #1
+; ARM-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; ARM-NEXT: cmp r0, #1
+; ARM-NEXT: mov r3, r2
; ARM-NEXT: bhi .LBB1_2
; ARM-NEXT: @ %bb.1:
-; ARM-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
+; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
; ARM-NEXT: .LBB1_2:
-; ARM-NEXT: ands r2, r5
+; ARM-NEXT: ands r5, r3
; ARM-NEXT: cmp r1, #0
-; ARM-NEXT: mov r5, r0
+; ARM-NEXT: mov r3, r2
; ARM-NEXT: bgt .LBB1_4
; ARM-NEXT: @ %bb.3:
-; ARM-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
+; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
; ARM-NEXT: .LBB1_4:
-; ARM-NEXT: orrs r5, r2
-; ARM-NEXT: lsls r2, r3, #30
-; ARM-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
-; ARM-NEXT: lsrs r4, r6, #2
-; ARM-NEXT: adds r2, r2, r4
-; ARM-NEXT: lsls r4, r6, #30
-; ARM-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; ARM-NEXT: lsrs r6, r6, #2
-; ARM-NEXT: adds r7, r4, r6
-; ARM-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; ARM-NEXT: mvns r6, r4
-; ARM-NEXT: cmp r5, #0
-; ARM-NEXT: beq .LBB1_6
+; ARM-NEXT: orrs r3, r5
+; ARM-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; ARM-NEXT: mvns r6, r0
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: str r6, [sp, #20] @ 4-byte Spill
+; ARM-NEXT: bne .LBB1_6
; ARM-NEXT: @ %bb.5:
-; ARM-NEXT: ldr r2, .LCPI1_0
+; ARM-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: lsls r0, r0, #30
+; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: lsrs r4, r4, #2
+; ARM-NEXT: adds r0, r0, r4
+; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARM-NEXT: .LBB1_6:
-; ARM-NEXT: mov r5, r6
-; ARM-NEXT: bne .LBB1_8
+; ARM-NEXT: adds r0, r1, #1
+; ARM-NEXT: rsbs r7, r0, #0
+; ARM-NEXT: adcs r7, r0
+; ARM-NEXT: mvns r0, r2
+; ARM-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
+; ARM-NEXT: cmp r5, r0
+; ARM-NEXT: mov r0, r2
+; ARM-NEXT: blo .LBB1_8
; ARM-NEXT: @ %bb.7:
-; ARM-NEXT: mov r5, r7
+; ARM-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
; ARM-NEXT: .LBB1_8:
-; ARM-NEXT: adds r4, r1, #1
-; ARM-NEXT: rsbs r7, r4, #0
-; ARM-NEXT: adcs r7, r4
-; ARM-NEXT: mvns r4, r0
-; ARM-NEXT: cmp r3, r4
-; ARM-NEXT: mov r3, r0
-; ARM-NEXT: blo .LBB1_10
+; ARM-NEXT: ands r7, r0
+; ARM-NEXT: cmp r1, r6
+; ARM-NEXT: mov r6, r2
+; ARM-NEXT: bge .LBB1_12
; ARM-NEXT: @ %bb.9:
-; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
+; ARM-NEXT: orrs r6, r7
+; ARM-NEXT: beq .LBB1_13
; ARM-NEXT: .LBB1_10:
-; ARM-NEXT: ands r7, r3
-; ARM-NEXT: cmp r1, r6
-; ARM-NEXT: mov r3, r0
-; ARM-NEXT: blt .LBB1_12
-; ARM-NEXT: @ %bb.11:
-; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
-; ARM-NEXT: .LBB1_12:
-; ARM-NEXT: orrs r3, r7
-; ARM-NEXT: lsls r1, r0, #31
; ARM-NEXT: cmp r3, #0
; ARM-NEXT: bne .LBB1_14
-; ARM-NEXT: @ %bb.13:
-; ARM-NEXT: str r5, [sp, #32] @ 4-byte Spill
+; ARM-NEXT: .LBB1_11:
+; ARM-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; ARM-NEXT: lsls r0, r0, #30
+; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: lsrs r1, r1, #2
+; ARM-NEXT: adds r1, r0, r1
+; ARM-NEXT: cmp r6, #0
+; ARM-NEXT: bne .LBB1_15
+; ARM-NEXT: b .LBB1_16
+; ARM-NEXT: .LBB1_12:
+; ARM-NEXT: ldr r6, [sp, #32] @ 4-byte Reload
+; ARM-NEXT: orrs r6, r7
+; ARM-NEXT: bne .LBB1_10
+; ARM-NEXT: .LBB1_13:
+; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; ARM-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: beq .LBB1_11
; ARM-NEXT: .LBB1_14:
-; ARM-NEXT: bne .LBB1_16
-; ARM-NEXT: @ %bb.15:
-; ARM-NEXT: mov r1, r2
+; ARM-NEXT: ldr r1, .LCPI1_0
+; ARM-NEXT: cmp r6, #0
+; ARM-NEXT: beq .LBB1_16
+; ARM-NEXT: .LBB1_15:
+; ARM-NEXT: lsls r1, r2, #31
; ARM-NEXT: .LBB1_16:
; ARM-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
; ARM-NEXT: add sp, #36
@@ -271,44 +282,44 @@ define i64 @func5(i64 %x, i64 %y) {
; ARM-NEXT: push {r4, r5, r6, r7, lr}
; ARM-NEXT: .pad #28
; ARM-NEXT: sub sp, #28
-; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill
-; ARM-NEXT: mov r4, r2
+; ARM-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; ARM-NEXT: mov r5, r2
; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
-; ARM-NEXT: mov r5, r1
+; ARM-NEXT: mov r6, r1
; ARM-NEXT: movs r7, #0
-; ARM-NEXT: mov r6, r0
+; ARM-NEXT: mov r4, r0
; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; ARM-NEXT: mov r0, r5
+; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; ARM-NEXT: mov r0, r6
; ARM-NEXT: mov r1, r7
-; ARM-NEXT: mov r2, r4
+; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: mov r4, r1
-; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: mov r5, r1
+; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
-; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; ARM-NEXT: adcs r4, r7
-; ARM-NEXT: mov r0, r6
+; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; ARM-NEXT: adcs r5, r7
+; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r7
-; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r2
-; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
; ARM-NEXT: adcs r1, r7
-; ARM-NEXT: adds r0, r4, r1
+; ARM-NEXT: adds r0, r5, r1
; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; ARM-NEXT: mov r6, r7
-; ARM-NEXT: adcs r6, r7
-; ARM-NEXT: mov r0, r5
+; ARM-NEXT: mov r5, r7
+; ARM-NEXT: adcs r5, r7
+; ARM-NEXT: mov r0, r6
; ARM-NEXT: mov r1, r7
-; ARM-NEXT: ldr r4, [sp, #24] @ 4-byte Reload
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
@@ -316,48 +327,50 @@ define i64 @func5(i64 %x, i64 %y) {
; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; ARM-NEXT: adcs r7, r6
-; ARM-NEXT: asrs r2, r5, #31
+; ARM-NEXT: adcs r7, r5
+; ARM-NEXT: asrs r2, r6, #31
; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; ARM-NEXT: mov r6, r4
+; ARM-NEXT: mov r5, r4
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r3, r2
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r4, r0
; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; ARM-NEXT: asrs r0, r6, #31
+; ARM-NEXT: asrs r0, r5, #31
; ARM-NEXT: mov r1, r0
; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; ARM-NEXT: mov r3, r5
+; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r4
+; ARM-NEXT: ldr r4, [sp, #24] @ 4-byte Reload
; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; ARM-NEXT: adds r2, r2, r0
+; ARM-NEXT: adds r3, r2, r0
; ARM-NEXT: adcs r1, r7
-; ARM-NEXT: asrs r0, r3, #31
-; ARM-NEXT: eors r1, r0
-; ARM-NEXT: eors r2, r0
-; ARM-NEXT: orrs r2, r1
-; ARM-NEXT: eors r5, r6
-; ARM-NEXT: asrs r0, r5, #31
-; ARM-NEXT: ldr r1, .LCPI4_0
-; ARM-NEXT: eors r1, r0
-; ARM-NEXT: mvns r0, r0
-; ARM-NEXT: cmp r2, #0
-; ARM-NEXT: beq .LBB4_3
+; ARM-NEXT: asrs r2, r4, #31
+; ARM-NEXT: eors r1, r2
+; ARM-NEXT: eors r3, r2
+; ARM-NEXT: orrs r3, r1
+; ARM-NEXT: eors r6, r5
+; ARM-NEXT: asrs r1, r6, #31
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: bne .LBB4_3
; ARM-NEXT: @ %bb.1:
+; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: cmp r3, #0
; ARM-NEXT: beq .LBB4_4
; ARM-NEXT: .LBB4_2:
+; ARM-NEXT: ldr r2, .LCPI4_0
+; ARM-NEXT: eors r1, r2
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
; ARM-NEXT: .LBB4_3:
-; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: mvns r0, r1
+; ARM-NEXT: cmp r3, #0
; ARM-NEXT: bne .LBB4_2
; ARM-NEXT: .LBB4_4:
-; ARM-NEXT: mov r1, r3
+; ARM-NEXT: mov r1, r4
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
; ARM-NEXT: .p2align 2
@@ -470,67 +483,79 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; ARM-NEXT: bl __aeabi_lmul
+; ARM-NEXT: mov r2, r1
; ARM-NEXT: adds r0, r0, r4
-; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; ARM-NEXT: adcs r1, r2
-; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; ARM-NEXT: adds r0, r2, r0
-; ARM-NEXT: adcs r1, r7
-; ARM-NEXT: rsbs r5, r1, #0
-; ARM-NEXT: adcs r5, r1
-; ARM-NEXT: movs r2, #1
+; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: adcs r2, r1
+; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: adds r0, r1, r0
+; ARM-NEXT: adcs r2, r7
+; ARM-NEXT: rsbs r5, r2, #0
+; ARM-NEXT: adcs r5, r2
+; ARM-NEXT: movs r4, #1
; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
; ARM-NEXT: cmp r0, #0
-; ARM-NEXT: mov r3, r2
-; ARM-NEXT: bge .LBB6_2
+; ARM-NEXT: mov r3, r4
+; ARM-NEXT: bmi .LBB6_2
; ARM-NEXT: @ %bb.1:
; ARM-NEXT: mov r3, r6
; ARM-NEXT: .LBB6_2:
-; ARM-NEXT: mov r4, r2
-; ARM-NEXT: bmi .LBB6_4
+; ARM-NEXT: ands r5, r3
+; ARM-NEXT: cmp r2, #0
+; ARM-NEXT: mov r1, r4
+; ARM-NEXT: mov r3, r4
+; ARM-NEXT: bgt .LBB6_4
; ARM-NEXT: @ %bb.3:
-; ARM-NEXT: mov r4, r6
+; ARM-NEXT: mov r3, r6
; ARM-NEXT: .LBB6_4:
-; ARM-NEXT: ands r5, r4
-; ARM-NEXT: cmp r1, #0
-; ARM-NEXT: mov r7, r2
-; ARM-NEXT: bgt .LBB6_6
+; ARM-NEXT: orrs r3, r5
+; ARM-NEXT: mvns r4, r6
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: mov r5, r4
+; ARM-NEXT: bne .LBB6_6
; ARM-NEXT: @ %bb.5:
-; ARM-NEXT: mov r7, r6
+; ARM-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
; ARM-NEXT: .LBB6_6:
-; ARM-NEXT: orrs r7, r5
-; ARM-NEXT: mvns r4, r6
-; ARM-NEXT: cmp r7, #0
-; ARM-NEXT: beq .LBB6_8
+; ARM-NEXT: adds r0, r2, #1
+; ARM-NEXT: rsbs r7, r0, #0
+; ARM-NEXT: adcs r7, r0
+; ARM-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: cmp r0, #0
+; ARM-NEXT: mov r0, r1
+; ARM-NEXT: bge .LBB6_8
; ARM-NEXT: @ %bb.7:
-; ARM-NEXT: ldr r0, .LCPI6_0
-; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; ARM-NEXT: mov r0, r6
; ARM-NEXT: .LBB6_8:
-; ARM-NEXT: mov r5, r4
-; ARM-NEXT: bne .LBB6_10
+; ARM-NEXT: ands r7, r0
+; ARM-NEXT: cmp r2, r4
+; ARM-NEXT: mov r0, r1
+; ARM-NEXT: mov r2, r1
+; ARM-NEXT: bge .LBB6_12
; ARM-NEXT: @ %bb.9:
-; ARM-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: orrs r2, r7
+; ARM-NEXT: beq .LBB6_13
; ARM-NEXT: .LBB6_10:
-; ARM-NEXT: adds r0, r1, #1
-; ARM-NEXT: rsbs r7, r0, #0
-; ARM-NEXT: adcs r7, r0
-; ARM-NEXT: ands r7, r3
-; ARM-NEXT: cmp r1, r4
-; ARM-NEXT: mov r3, r2
-; ARM-NEXT: blt .LBB6_12
-; ARM-NEXT: @ %bb.11:
-; ARM-NEXT: mov r3, r6
-; ARM-NEXT: .LBB6_12:
-; ARM-NEXT: orrs r3, r7
-; ARM-NEXT: lsls r1, r2, #31
; ARM-NEXT: cmp r3, #0
; ARM-NEXT: bne .LBB6_14
-; ARM-NEXT: @ %bb.13:
+; ARM-NEXT: .LBB6_11:
+; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: cmp r2, #0
+; ARM-NEXT: bne .LBB6_15
+; ARM-NEXT: b .LBB6_16
+; ARM-NEXT: .LBB6_12:
+; ARM-NEXT: mov r2, r6
+; ARM-NEXT: orrs r2, r7
+; ARM-NEXT: bne .LBB6_10
+; ARM-NEXT: .LBB6_13:
; ARM-NEXT: mov r6, r5
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: beq .LBB6_11
; ARM-NEXT: .LBB6_14:
-; ARM-NEXT: bne .LBB6_16
-; ARM-NEXT: @ %bb.15:
-; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: ldr r1, .LCPI6_0
+; ARM-NEXT: cmp r2, #0
+; ARM-NEXT: beq .LBB6_16
+; ARM-NEXT: .LBB6_15:
+; ARM-NEXT: lsls r1, r0, #31
; ARM-NEXT: .LBB6_16:
; ARM-NEXT: mov r0, r6
; ARM-NEXT: add sp, #28
@@ -552,23 +577,23 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-NEXT: sub sp, #28
; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill
; ARM-NEXT: mov r5, r2
-; ARM-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r4, r1
; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill
; ARM-NEXT: movs r7, #0
; ARM-NEXT: mov r6, r0
-; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r5, r1
-; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; ARM-NEXT: adds r4, r0, r1
; ARM-NEXT: adcs r5, r7
; ARM-NEXT: mov r0, r6
@@ -578,10 +603,10 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r4
-; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARM-NEXT: adcs r1, r7
; ARM-NEXT: adds r0, r5, r1
-; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r4, r7
; ARM-NEXT: adcs r4, r7
; ARM-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
@@ -591,70 +616,75 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-NEXT: mov r3, r7
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r1
-; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
-; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARM-NEXT: adcs r6, r4
; ARM-NEXT: asrs r2, r5, #31
-; ARM-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARM-NEXT: ldr r4, [sp, #24] @ 4-byte Reload
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r3, r2
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r5, r0
-; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARM-NEXT: asrs r0, r4, #31
; ARM-NEXT: mov r1, r0
-; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r5
-; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
-; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; ARM-NEXT: adds r2, r2, r0
+; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: adds r0, r2, r0
; ARM-NEXT: adcs r1, r6
-; ARM-NEXT: lsls r0, r1, #1
-; ARM-NEXT: lsrs r3, r2, #31
-; ARM-NEXT: adds r0, r0, r3
-; ARM-NEXT: lsls r2, r2, #1
-; ARM-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; ARM-NEXT: lsrs r3, r3, #31
-; ARM-NEXT: adds r2, r2, r3
-; ARM-NEXT: mvns r3, r7
-; ARM-NEXT: ldr r4, .LCPI7_1
-; ARM-NEXT: cmp r1, r4
-; ARM-NEXT: ble .LBB7_2
+; ARM-NEXT: ldr r2, .LCPI7_0
+; ARM-NEXT: cmp r1, r2
+; ARM-NEXT: bgt .LBB7_2
; ARM-NEXT: @ %bb.1:
-; ARM-NEXT: ldr r0, .LCPI7_0
+; ARM-NEXT: lsls r3, r0, #1
+; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: lsrs r4, r4, #31
+; ARM-NEXT: adds r5, r3, r4
+; ARM-NEXT: b .LBB7_3
; ARM-NEXT: .LBB7_2:
-; ARM-NEXT: bgt .LBB7_4
-; ARM-NEXT: @ %bb.3:
-; ARM-NEXT: mov r3, r2
-; ARM-NEXT: .LBB7_4:
-; ARM-NEXT: movs r2, #1
-; ARM-NEXT: lsls r2, r2, #31
-; ARM-NEXT: movs r4, #3
-; ARM-NEXT: lsls r4, r4, #30
-; ARM-NEXT: cmp r1, r4
-; ARM-NEXT: blt .LBB7_6
-; ARM-NEXT: @ %bb.5:
-; ARM-NEXT: mov r7, r3
-; ARM-NEXT: .LBB7_6:
+; ARM-NEXT: mvns r5, r7
+; ARM-NEXT: .LBB7_3:
+; ARM-NEXT: movs r3, #3
+; ARM-NEXT: lsls r3, r3, #30
+; ARM-NEXT: cmp r1, r3
+; ARM-NEXT: blt .LBB7_5
+; ARM-NEXT: @ %bb.4:
+; ARM-NEXT: mov r7, r5
+; ARM-NEXT: .LBB7_5:
+; ARM-NEXT: cmp r1, r2
+; ARM-NEXT: bgt .LBB7_7
+; ARM-NEXT: @ %bb.6:
+; ARM-NEXT: lsls r2, r1, #1
+; ARM-NEXT: lsrs r0, r0, #31
+; ARM-NEXT: adds r2, r2, r0
+; ARM-NEXT: cmp r1, r3
; ARM-NEXT: blt .LBB7_8
-; ARM-NEXT: @ %bb.7:
-; ARM-NEXT: mov r2, r0
+; ARM-NEXT: b .LBB7_9
+; ARM-NEXT: .LBB7_7:
+; ARM-NEXT: ldr r2, .LCPI7_1
+; ARM-NEXT: cmp r1, r3
+; ARM-NEXT: bge .LBB7_9
; ARM-NEXT: .LBB7_8:
+; ARM-NEXT: movs r0, #1
+; ARM-NEXT: lsls r2, r0, #31
+; ARM-NEXT: .LBB7_9:
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r2
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
; ARM-NEXT: .p2align 2
-; ARM-NEXT: @ %bb.9:
+; ARM-NEXT: @ %bb.10:
; ARM-NEXT: .LCPI7_0:
-; ARM-NEXT: .long 2147483647 @ 0x7fffffff
-; ARM-NEXT: .LCPI7_1:
; ARM-NEXT: .long 1073741823 @ 0x3fffffff
+; ARM-NEXT: .LCPI7_1:
+; ARM-NEXT: .long 2147483647 @ 0x7fffffff
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63)
ret i64 %tmp
}
diff --git a/llvm/test/CodeGen/Thumb/stack-guard-xo.ll b/llvm/test/CodeGen/Thumb/stack-guard-xo.ll
index 421583d07ac7fe..c344e4f7fc022d 100644
--- a/llvm/test/CodeGen/Thumb/stack-guard-xo.ll
+++ b/llvm/test/CodeGen/Thumb/stack-guard-xo.ll
@@ -33,9 +33,9 @@ define dso_local i32 @main() #0 {
; V6M-LABEL: main:
; V6M: @ %bb.0: @ %entry
; V6M-NEXT: push {r7, lr}
-; V6M-NEXT: sub sp, #24
+; V6M-NEXT: sub sp, #
; V6M-NEXT: movs r0, #0
-; V6M-NEXT: str r0, [sp, #4]
+; V6M-NEXT: str r0, [sp, #
; V6M-NEXT: mrs r12, apsr
; V6M-NEXT: movs r0, :upper8_15:__stack_chk_guard
; V6M-NEXT: lsls r0, r0, #8
@@ -46,10 +46,10 @@ define dso_local i32 @main() #0 {
; V6M-NEXT: adds r0, :lower0_7:__stack_chk_guard
; V6M-NEXT: msr apsr, r12
; V6M-NEXT: ldr r0, [r0]
-; V6M-NEXT: str r0, [sp, #20]
-; V6M-NEXT: add r0, sp, #8
+; V6M-NEXT: str r0, [sp, #
+; V6M-NEXT: add r0, sp, #
; V6M-NEXT: ldrb r0, [r0]
-; V6M-NEXT: ldr r1, [sp, #20]
+; V6M-NEXT: ldr r1, [sp, #
; V6M-NEXT: mrs r12, apsr
; V6M-NEXT: movs r2, :upper8_15:__stack_chk_guard
; V6M-NEXT: lsls r2, r2, #8
@@ -63,7 +63,7 @@ define dso_local i32 @main() #0 {
; V6M-NEXT: cmp r2, r1
; V6M-NEXT: bne .LBB0_2
; V6M-NEXT: @ %bb.1: @ %entry
-; V6M-NEXT: add sp, #24
+; V6M-NEXT: add sp, #
; V6M-NEXT: pop {r7, pc}
; V6M-NEXT: .LBB0_2: @ %entry
; V6M-NEXT: bl __stack_chk_fail
@@ -105,177 +105,77 @@ entry:
@bb = hidden local_unnamed_addr global i64 0, align 8
define dso_local i64 @cc() local_unnamed_addr #1 {
-; CHECK-LABEL: cc:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: movs r0, #1
-; CHECK-NEXT: lsls r3, r0, #31
-; CHECK-NEXT: ldr r0, .LCPI1_0
-; CHECK-NEXT: ldr r2, [r0]
-; CHECK-NEXT: asrs r4, r2, #31
-; CHECK-NEXT: eors r3, r4
-; CHECK-NEXT: ldr r0, .LCPI1_1
-; CHECK-NEXT: ldm r0!, {r1, r5}
-; CHECK-NEXT: subs r0, r2, r1
-; CHECK-NEXT: sbcs r3, r5
-; CHECK-NEXT: subs r0, r2, r1
-; CHECK-NEXT: ldr r1, .LCPI1_2
-; CHECK-NEXT: ldr r1, [r1]
-; CHECK-NEXT: str r1, [sp, #4]
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: sbcs r1, r5
-; CHECK-NEXT: ands r3, r4
-; CHECK-NEXT: ands r2, r0
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: orrs r4, r3
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: beq .LBB1_2
-; CHECK-NEXT: @ %bb.1: @ %entry
-; CHECK-NEXT: mov r1, r3
-; CHECK-NEXT: .LBB1_2: @ %entry
-; CHECK-NEXT: beq .LBB1_4
-; CHECK-NEXT: @ %bb.3: @ %entry
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: .LBB1_4: @ %entry
-; CHECK-NEXT: ldr r2, [sp, #4]
-; CHECK-NEXT: ldr r3, .LCPI1_2
-; CHECK-NEXT: ldr r3, [r3]
-; CHECK-NEXT: cmp r3, r2
-; CHECK-NEXT: bne .LBB1_6
-; CHECK-NEXT: @ %bb.5: @ %entry
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r4, r5, r7, pc}
-; CHECK-NEXT: .LBB1_6: @ %entry
-; CHECK-NEXT: bl __stack_chk_fail
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.7:
-; CHECK-NEXT: .LCPI1_0:
-; CHECK-NEXT: .long aa
-; CHECK-NEXT: .LCPI1_1:
-; CHECK-NEXT: .long bb
-; CHECK-NEXT: .LCPI1_2:
-; CHECK-NEXT: .long __stack_chk_guard
-;
; V6M-LABEL: cc:
; V6M: @ %bb.0: @ %entry
-; V6M-NEXT: push {r4, r5, r7, lr}
-; V6M-NEXT: sub sp, #8
-; V6M-NEXT: movs r0, #1
-; V6M-NEXT: lsls r3, r0, #31
-; V6M-NEXT: movs r0, :upper8_15:aa
-; V6M-NEXT: lsls r0, r0, #8
-; V6M-NEXT: adds r0, :upper0_7:aa
-; V6M-NEXT: lsls r0, r0, #8
-; V6M-NEXT: adds r0, :lower8_15:aa
-; V6M-NEXT: lsls r0, r0, #8
-; V6M-NEXT: adds r0, :lower0_7:aa
-; V6M-NEXT: ldr r2, [r0]
-; V6M-NEXT: asrs r4, r2, #31
-; V6M-NEXT: eors r3, r4
-; V6M-NEXT: movs r0, :upper8_15:bb
-; V6M-NEXT: lsls r0, r0, #8
-; V6M-NEXT: adds r0, :upper0_7:bb
-; V6M-NEXT: lsls r0, r0, #8
-; V6M-NEXT: adds r0, :lower8_15:bb
-; V6M-NEXT: lsls r0, r0, #8
-; V6M-NEXT: adds r0, :lower0_7:bb
-; V6M-NEXT: ldm r0!, {r1, r5}
-; V6M-NEXT: subs r0, r2, r1
-; V6M-NEXT: sbcs r3, r5
-; V6M-NEXT: subs r0, r2, r1
-; V6M-NEXT: mrs r12, apsr
-; V6M-NEXT: movs r1, :upper8_15:__stack_chk_guard
-; V6M-NEXT: lsls r1, r1, #8
-; V6M-NEXT: adds r1, :upper0_7:__stack_chk_guard
-; V6M-NEXT: lsls r1, r1, #8
-; V6M-NEXT: adds r1, :lower8_15:__stack_chk_guard
-; V6M-NEXT: lsls r1, r1, #8
-; V6M-NEXT: adds r1, :lower0_7:__stack_chk_guard
-; V6M-NEXT: msr apsr, r12
-; V6M-NEXT: ldr r1, [r1]
-; V6M-NEXT: str r1, [sp, #4]
-; V6M-NEXT: mov r1, r4
-; V6M-NEXT: sbcs r1, r5
-; V6M-NEXT: ands r3, r4
-; V6M-NEXT: ands r2, r0
-; V6M-NEXT: mov r4, r2
-; V6M-NEXT: orrs r4, r3
-; V6M-NEXT: cmp r4, #0
-; V6M-NEXT: beq .LBB1_2
-; V6M-NEXT: @ %bb.1: @ %entry
-; V6M-NEXT: mov r1, r3
-; V6M-NEXT: .LBB1_2: @ %entry
-; V6M-NEXT: beq .LBB1_4
-; V6M-NEXT: @ %bb.3: @ %entry
-; V6M-NEXT: mov r0, r2
-; V6M-NEXT: .LBB1_4: @ %entry
-; V6M-NEXT: ldr r2, [sp, #4]
-; V6M-NEXT: mrs r12, apsr
-; V6M-NEXT: movs r3, :upper8_15:__stack_chk_guard
-; V6M-NEXT: lsls r3, r3, #8
-; V6M-NEXT: adds r3, :upper0_7:__stack_chk_guard
-; V6M-NEXT: lsls r3, r3, #8
-; V6M-NEXT: adds r3, :lower8_15:__stack_chk_guard
-; V6M-NEXT: lsls r3, r3, #8
-; V6M-NEXT: adds r3, :lower0_7:__stack_chk_guard
-; V6M-NEXT: msr apsr, r12
-; V6M-NEXT: ldr r3, [r3]
-; V6M-NEXT: cmp r3, r2
-; V6M-NEXT: bne .LBB1_6
-; V6M-NEXT: @ %bb.5: @ %entry
-; V6M-NEXT: add sp, #8
-; V6M-NEXT: pop {r4, r5, r7, pc}
-; V6M-NEXT: .LBB1_6: @ %entry
-; V6M-NEXT: bl __stack_chk_fail
-;
-; V8MBASE-LABEL: cc:
-; V8MBASE: @ %bb.0: @ %entry
-; V8MBASE-NEXT: push {r4, r5, r7, lr}
-; V8MBASE-NEXT: sub sp, #8
-; V8MBASE-NEXT: movs r0, #1
-; V8MBASE-NEXT: lsls r3, r0, #31
-; V8MBASE-NEXT: movw r0, :lower16:aa
-; V8MBASE-NEXT: movt r0, :upper16:aa
-; V8MBASE-NEXT: ldr r2, [r0]
-; V8MBASE-NEXT: asrs r4, r2, #31
-; V8MBASE-NEXT: eors r3, r4
-; V8MBASE-NEXT: movw r0, :lower16:bb
-; V8MBASE-NEXT: movt r0, :upper16:bb
-; V8MBASE-NEXT: ldm r0!, {r1, r5}
-; V8MBASE-NEXT: subs r0, r2, r1
-; V8MBASE-NEXT: sbcs r3, r5
-; V8MBASE-NEXT: subs r0, r2, r1
-; V8MBASE-NEXT: movw r1, :lower16:__stack_chk_guard
-; V8MBASE-NEXT: movt r1, :upper16:__stack_chk_guard
-; V8MBASE-NEXT: ldr r1, [r1]
-; V8MBASE-NEXT: str r1, [sp, #4]
-; V8MBASE-NEXT: mov r1, r4
-; V8MBASE-NEXT: sbcs r1, r5
-; V8MBASE-NEXT: ands r3, r4
-; V8MBASE-NEXT: ands r2, r0
-; V8MBASE-NEXT: mov r4, r2
-; V8MBASE-NEXT: orrs r4, r3
-; V8MBASE-NEXT: cmp r4, #0
-; V8MBASE-NEXT: beq .LBB1_2
-; V8MBASE-NEXT: @ %bb.1: @ %entry
-; V8MBASE-NEXT: mov r1, r3
-; V8MBASE-NEXT: .LBB1_2: @ %entry
-; V8MBASE-NEXT: beq .LBB1_4
-; V8MBASE-NEXT: @ %bb.3: @ %entry
-; V8MBASE-NEXT: mov r0, r2
-; V8MBASE-NEXT: .LBB1_4: @ %entry
-; V8MBASE-NEXT: ldr r2, [sp, #4]
-; V8MBASE-NEXT: movw r3, :lower16:__stack_chk_guard
-; V8MBASE-NEXT: movt r3, :upper16:__stack_chk_guard
-; V8MBASE-NEXT: ldr r3, [r3]
-; V8MBASE-NEXT: cmp r3, r2
-; V8MBASE-NEXT: bne .LBB1_6
-; V8MBASE-NEXT: @ %bb.5: @ %entry
-; V8MBASE-NEXT: add sp, #8
-; V8MBASE-NEXT: pop {r4, r5, r7, pc}
-; V8MBASE-NEXT: .LBB1_6: @ %entry
-; V8MBASE-NEXT: bl __stack_chk_fail
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: sub sp, #8
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: lsls r3, r0, #31
+; V6M-NEXT: movs r0, :upper8_15:aa
+; V6M-NEXT: lsls r0, r0, #8
+; V6M-NEXT: adds r0, :upper0_7:aa
+; V6M-NEXT: lsls r0, r0, #8
+; V6M-NEXT: adds r0, :lower8_15:aa
+; V6M-NEXT: lsls r0, r0, #8
+; V6M-NEXT: adds r0, :lower0_7:aa
+; V6M-NEXT: ldr r2, [r0]
+; V6M-NEXT: asrs r4, r2, #31
+; V6M-NEXT: eors r3, r4
+; V6M-NEXT: movs r0, :upper8_15:bb
+; V6M-NEXT: lsls r0, r0, #8
+; V6M-NEXT: adds r0, :upper0_7:bb
+; V6M-NEXT: lsls r0, r0, #8
+; V6M-NEXT: adds r0, :lower8_15:bb
+; V6M-NEXT: lsls r0, r0, #8
+; V6M-NEXT: adds r0, :lower0_7:bb
+; V6M-NEXT: ldm r0!, {r1, r5}
+; V6M-NEXT: subs r0, r2, r1
+; V6M-NEXT: sbcs r3, r5
+; V6M-NEXT: subs r0, r2, r1
+; V6M-NEXT: mrs r12, apsr
+; V6M-NEXT: movs r1, :upper8_15:__stack_chk_guard
+; V6M-NEXT: lsls r1, r1, #8
+; V6M-NEXT: adds r1, :upper0_7:__stack_chk_guard
+; V6M-NEXT: lsls r1, r1, #8
+; V6M-NEXT: adds r1, :lower8_15:__stack_chk_guard
+; V6M-NEXT: lsls r1, r1, #8
+; V6M-NEXT: adds r1, :lower0_7:__stack_chk_guard
+; V6M-NEXT: msr apsr, r12
+; V6M-NEXT: ldr r1, [r1]
+; V6M-NEXT: str r1, [sp, #4]
+; V6M-NEXT: mov r1, r4
+; V6M-NEXT: sbcs r1, r5
+; V6M-NEXT: ands r3, r4
+; V6M-NEXT: ands r2, r0
+; V6M-NEXT: mov r4, r2
+; V6M-NEXT: orrs r4, r3
+; V6M-NEXT: beq .LBB1_2
+; V6M-NEXT: @ %bb.1: @ %entry
+; V6M-NEXT: mov r1, r3
+; V6M-NEXT: .LBB1_2: @ %entry
+; V6M-NEXT: cmp r4, #0
+; V6M-NEXT: beq .LBB1_4
+; V6M-NEXT: @ %bb.3: @ %entry
+; V6M-NEXT: mov r0, r2
+; V6M-NEXT: .LBB1_4: @ %entry
+; V6M-NEXT: ldr r2, [sp, #4]
+; V6M-NEXT: mrs r12, apsr
+; V6M-NEXT: movs r3, :upper8_15:__stack_chk_guard
+; V6M-NEXT: lsls r3, r3, #8
+; V6M-NEXT: adds r3, :upper0_7:__stack_chk_guard
+; V6M-NEXT: lsls r3, r3, #8
+; V6M-NEXT: adds r3, :lower8_15:__stack_chk_guard
+; V6M-NEXT: lsls r3, r3, #8
+; V6M-NEXT: adds r3, :lower0_7:__stack_chk_guard
+; V6M-NEXT: msr apsr, r12
+; V6M-NEXT: ldr r3, [r3]
+; V6M-NEXT: cmp r3, r2
+; V6M-NEXT: bne .LBB1_6
+; V6M-NEXT: @ %bb.5: @ %entry
+; V6M-NEXT: add sp, #8
+; V6M-NEXT: pop {r4, r5, r7, pc}
+; V6M-NEXT: .LBB1_6: @ %entry
+; V6M-NEXT: bl __stack_chk_fail
entry:
%0 = load i32, ptr @aa, align 4
diff --git a/llvm/test/CodeGen/Thumb/umul_fix_sat.ll b/llvm/test/CodeGen/Thumb/umul_fix_sat.ll
index a43b22102c24bf..fa88024315211b 100644
--- a/llvm/test/CodeGen/Thumb/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/Thumb/umul_fix_sat.ll
@@ -38,26 +38,26 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; ARM-NEXT: sub sp, #28
; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill
; ARM-NEXT: mov r5, r1
-; ARM-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARM-NEXT: movs r4, #0
; ARM-NEXT: mov r6, r0
-; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r7, r2
-; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill
; ARM-NEXT: mov r0, r5
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r5, r1
-; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
-; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARM-NEXT: adcs r5, r4
; ARM-NEXT: mov r0, r6
; ARM-NEXT: mov r1, r4
@@ -65,33 +65,33 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r2
-; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARM-NEXT: adcs r1, r4
; ARM-NEXT: adds r0, r5, r1
-; ARM-NEXT: str r0, [sp] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARM-NEXT: mov r6, r4
; ARM-NEXT: adcs r6, r4
-; ARM-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; ARM-NEXT: mov r0, r5
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r7, r1
-; ARM-NEXT: ldr r1, [sp] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
-; ARM-NEXT: str r0, [sp] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARM-NEXT: adcs r7, r6
-; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r0
; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill
-; ARM-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r4
@@ -99,33 +99,34 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; ARM-NEXT: adds r0, r0, r6
; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
-; ARM-NEXT: ldr r2, [sp] @ 4-byte Reload
-; ARM-NEXT: adds r0, r2, r0
+; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: adds r2, r2, r0
; ARM-NEXT: adcs r1, r7
-; ARM-NEXT: lsrs r5, r0, #2
-; ARM-NEXT: orrs r5, r1
-; ARM-NEXT: lsls r0, r0, #30
-; ARM-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; ARM-NEXT: lsrs r1, r3, #2
-; ARM-NEXT: adds r2, r0, r1
-; ARM-NEXT: lsls r0, r3, #30
-; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; ARM-NEXT: lsrs r1, r1, #2
-; ARM-NEXT: adds r3, r0, r1
+; ARM-NEXT: lsrs r3, r2, #2
+; ARM-NEXT: orrs r3, r1
; ARM-NEXT: mvns r1, r4
-; ARM-NEXT: cmp r5, #0
+; ARM-NEXT: cmp r3, #0
; ARM-NEXT: mov r0, r1
; ARM-NEXT: beq .LBB1_3
; ARM-NEXT: @ %bb.1:
+; ARM-NEXT: cmp r3, #0
; ARM-NEXT: beq .LBB1_4
; ARM-NEXT: .LBB1_2:
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
; ARM-NEXT: .LBB1_3:
-; ARM-NEXT: mov r0, r3
+; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; ARM-NEXT: lsls r0, r0, #30
+; ARM-NEXT: ldr r4, [sp] @ 4-byte Reload
+; ARM-NEXT: lsrs r4, r4, #2
+; ARM-NEXT: adds r0, r0, r4
+; ARM-NEXT: cmp r3, #0
; ARM-NEXT: bne .LBB1_2
; ARM-NEXT: .LBB1_4:
-; ARM-NEXT: mov r1, r2
+; ARM-NEXT: lsls r1, r2, #30
+; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
+; ARM-NEXT: lsrs r2, r2, #2
+; ARM-NEXT: adds r1, r1, r2
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
%tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 2)
@@ -224,16 +225,17 @@ define i64 @func5(i64 %x, i64 %y) {
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r3, r1, r6
-; ARM-NEXT: mov r2, r5
-; ARM-NEXT: adcs r2, r5
-; ARM-NEXT: orrs r2, r4
+; ARM-NEXT: mov r6, r5
+; ARM-NEXT: adcs r6, r5
+; ARM-NEXT: orrs r6, r4
; ARM-NEXT: mvns r1, r5
-; ARM-NEXT: cmp r2, #0
+; ARM-NEXT: cmp r6, #0
; ARM-NEXT: mov r2, r1
; ARM-NEXT: bne .LBB4_2
; ARM-NEXT: @ %bb.1:
; ARM-NEXT: mov r2, r0
; ARM-NEXT: .LBB4_2:
+; ARM-NEXT: cmp r6, #0
; ARM-NEXT: bne .LBB4_4
; ARM-NEXT: @ %bb.3:
; ARM-NEXT: mov r1, r3
@@ -397,27 +399,25 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: mov r2, r1
; ARM-NEXT: adds r0, r0, r5
-; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; ARM-NEXT: adcs r2, r1
-; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; ARM-NEXT: adds r3, r1, r0
-; ARM-NEXT: adcs r2, r6
-; ARM-NEXT: mvns r1, r4
-; ARM-NEXT: cmp r2, #0
-; ARM-NEXT: mov r0, r1
-; ARM-NEXT: beq .LBB7_3
+; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; ARM-NEXT: adcs r1, r2
+; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: adds r3, r2, r0
+; ARM-NEXT: adcs r1, r6
+; ARM-NEXT: mvns r2, r4
+; ARM-NEXT: cmp r1, #0
+; ARM-NEXT: mov r0, r2
+; ARM-NEXT: bne .LBB7_2
; ARM-NEXT: @ %bb.1:
-; ARM-NEXT: beq .LBB7_4
-; ARM-NEXT: .LBB7_2:
-; ARM-NEXT: add sp, #28
-; ARM-NEXT: pop {r4, r5, r6, r7, pc}
-; ARM-NEXT: .LBB7_3:
; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; ARM-NEXT: bne .LBB7_2
+; ARM-NEXT: .LBB7_2:
+; ARM-NEXT: cmp r1, #0
+; ARM-NEXT: bne .LBB7_4
+; ARM-NEXT: @ %bb.3:
+; ARM-NEXT: mov r2, r3
; ARM-NEXT: .LBB7_4:
-; ARM-NEXT: mov r1, r3
+; ARM-NEXT: mov r1, r2
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
%tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 32)
@@ -433,23 +433,23 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-NEXT: sub sp, #28
; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill
; ARM-NEXT: mov r7, r2
-; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
+; ARM-NEXT: str r2, [sp, #20] @ 4-byte Spill
; ARM-NEXT: mov r5, r1
-; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARM-NEXT: movs r4, #0
; ARM-NEXT: mov r6, r0
-; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
-; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r0, r5
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r7, r1
-; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; ARM-NEXT: adds r5, r0, r1
; ARM-NEXT: adcs r7, r4
; ARM-NEXT: mov r0, r6
@@ -459,31 +459,31 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r5
-; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARM-NEXT: adcs r1, r4
; ARM-NEXT: adds r0, r7, r1
-; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r5, r4
; ARM-NEXT: adcs r5, r4
-; ARM-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r1
-; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
-; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARM-NEXT: adcs r6, r5
-; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r4
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r5, r0
; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill
-; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r4
@@ -491,28 +491,25 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-NEXT: adds r0, r0, r5
; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
-; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; ARM-NEXT: adds r0, r2, r0
+; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARM-NEXT: adds r5, r2, r0
; ARM-NEXT: adcs r1, r6
-; ARM-NEXT: lsls r1, r1, #1
-; ARM-NEXT: lsrs r5, r0, #31
-; ARM-NEXT: adds r2, r1, r5
-; ARM-NEXT: lsls r0, r0, #1
-; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; ARM-NEXT: lsrs r1, r1, #31
-; ARM-NEXT: adds r3, r0, r1
-; ARM-NEXT: mvns r1, r4
-; ARM-NEXT: cmp r5, #0
-; ARM-NEXT: mov r0, r1
-; ARM-NEXT: beq .LBB8_3
+; ARM-NEXT: lsrs r3, r5, #31
+; ARM-NEXT: mvns r2, r4
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: mov r0, r2
+; ARM-NEXT: bne .LBB8_2
; ARM-NEXT: @ %bb.1:
-; ARM-NEXT: beq .LBB8_4
+; ARM-NEXT: lsls r0, r5, #1
+; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARM-NEXT: lsrs r4, r4, #31
+; ARM-NEXT: adds r0, r0, r4
; ARM-NEXT: .LBB8_2:
-; ARM-NEXT: add sp, #28
-; ARM-NEXT: pop {r4, r5, r6, r7, pc}
-; ARM-NEXT: .LBB8_3:
-; ARM-NEXT: mov r0, r3
-; ARM-NEXT: bne .LBB8_2
+; ARM-NEXT: cmp r3, #0
+; ARM-NEXT: bne .LBB8_4
+; ARM-NEXT: @ %bb.3:
+; ARM-NEXT: lsls r1, r1, #1
+; ARM-NEXT: adds r2, r1, r3
; ARM-NEXT: .LBB8_4:
; ARM-NEXT: mov r1, r2
; ARM-NEXT: add sp, #28
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
index a87d363fa61ee7..13080fcfa13574 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll
@@ -15,13 +15,13 @@ define void @arm_cmplx_dot_prod_f32(ptr %pSrcA, ptr %pSrcB, i32 %numSamples, ptr
; CHECK-NEXT: lsrs r4, r2, #2
; CHECK-NEXT: mov.w lr, #2
; CHECK-NEXT: cmp r4, #2
-; CHECK-NEXT: vldrw.u32 q2, [r1], #32
-; CHECK-NEXT: vldrw.u32 q1, [r0], #32
; CHECK-NEXT: it lt
; CHECK-NEXT: lsrlt.w lr, r2, #2
; CHECK-NEXT: rsb r4, lr, r2, lsr #2
-; CHECK-NEXT: vmov.i32 q0, #0x0
+; CHECK-NEXT: vldrw.u32 q2, [r1], #32
; CHECK-NEXT: add.w lr, r4, #1
+; CHECK-NEXT: vldrw.u32 q1, [r0], #32
+; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB0_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
index 3c1510623e5c43..b9a80af649f293 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
@@ -7,26 +7,26 @@ define void @foo(ptr nocapture readonly %st, ptr %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: ldrd r12, r3, [r0]
-; CHECK-NEXT: ldrd r4, r2, [r0, #8]
-; CHECK-NEXT: rsb r12, r12, r3, lsl #1
+; CHECK-NEXT: ldrd r12, r2, [r0]
+; CHECK-NEXT: ldrd r4, r3, [r0, #8]
+; CHECK-NEXT: rsb r12, r12, r2, lsl #1
; CHECK-NEXT: dlstp.16 lr, r12
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q0, [r2], #16
+; CHECK-NEXT: vldrh.u16 q0, [r3], #16
; CHECK-NEXT: vstrh.16 q0, [r4], #16
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %do.end
-; CHECK-NEXT: ldr r3, [r0]
+; CHECK-NEXT: ldr r2, [r0]
; CHECK-NEXT: ldr r0, [r0, #8]
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
-; CHECK-NEXT: mov.w r2, #6144
-; CHECK-NEXT: dlstp.16 lr, r3
+; CHECK-NEXT: mov.w r3, #6144
+; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB0_3: @ %do.body6
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
; CHECK-NEXT: vcvt.f16.s16 q0, q0
-; CHECK-NEXT: vmul.f16 q0, q0, r2
+; CHECK-NEXT: vmul.f16 q0, q0, r3
; CHECK-NEXT: vstrh.16 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end13
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
index 6cb98557c9bc13..2fdf534d526565 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -9,12 +9,12 @@ define arm_aapcs_vfpcc void @fast_float_mul(ptr nocapture %a, ptr nocapture read
; CHECK-NEXT: beq.w .LBB0_11
; CHECK-NEXT: @ %bb.1: @ %vector.memcheck
; CHECK-NEXT: add.w r4, r2, r3, lsl #2
+; CHECK-NEXT: add.w lr, r0, r3, lsl #2
; CHECK-NEXT: cmp r4, r0
-; CHECK-NEXT: add.w r4, r0, r3, lsl #2
-; CHECK-NEXT: cset r12, hi
-; CHECK-NEXT: cmp r4, r2
-; CHECK-NEXT: csel r12, zr, r12, ls
-; CHECK-NEXT: cmp r4, r1
+; CHECK-NEXT: cset r4, hi
+; CHECK-NEXT: cmp lr, r2
+; CHECK-NEXT: csel r12, zr, r4, ls
+; CHECK-NEXT: cmp lr, r1
; CHECK-NEXT: add.w r4, r1, r3, lsl #2
; CHECK-NEXT: cset lr, hi
; CHECK-NEXT: cmp r4, r0
diff --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll
index 191c775be3420e..d2b1dd6f05a3f9 100644
--- a/llvm/test/CodeGen/Thumb2/float-ops.ll
+++ b/llvm/test/CodeGen/Thumb2/float-ops.ll
@@ -289,15 +289,15 @@ define float @select_f(float %a, float %b, i1 %c) {
define double @select_d(double %a, double %b, i1 %c) {
; CHECK-LABEL: select_d:
; NOREGS: ldr{{(.w)?}} [[REG:r[0-9]+]], [sp]
-; NOREGS: lsls.w [[REG]], [[REG]], #31
-; ONLYREGS: lsls r0, r0, #31
+; NOREGS: ands [[REG]], [[REG]], #1
+; ONLYREGS: ands r0, r0, #1
; NOREGS-DAG: moveq r0, r2
; NOREGS-DAG: moveq r1, r3
-; ONLYREGS-DAG: csel r0, r2, r1
-; ONLYREGS-DAG: csel r1, r12, r3
+; ONLYREGS-DAG: csel r0, r0, r2
+; ONLYREGS-DAG: csel r1, r1, r3
+; SP: ands r0, r0, #1
; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
-; SP: lsls r0, r0, #31
; SP: itt ne
; SP-DAG: movne [[BLO]], [[ALO]]
; SP-DAG: movne [[BHI]], [[AHI]]
diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index d076cb00ad7e0e..7087041e8dace6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -489,10 +489,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: @ Parent Loop BB1_10 Depth=2
; CHECK-NEXT: @ => This Inner Loop Header: Depth=3
; CHECK-NEXT: vqadd.u32 q2, q5, r1
-; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vcmp.u32 hi, q7, q2
; CHECK-NEXT: vshl.i32 q2, q1, #2
-; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: add.w r1, r1, #4
; CHECK-NEXT: vadd.i32 q2, q2, r8
; CHECK-NEXT: vadd.i32 q1, q1, r9
; CHECK-NEXT: vpst
@@ -508,10 +508,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: @ Parent Loop BB1_10 Depth=2
; CHECK-NEXT: @ => This Inner Loop Header: Depth=3
; CHECK-NEXT: vqadd.u32 q2, q5, r1
-; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vcmp.u32 hi, q6, q2
; CHECK-NEXT: vshl.i32 q2, q1, #2
-; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: add.w r1, r1, #4
; CHECK-NEXT: vadd.i32 q2, q2, r8
; CHECK-NEXT: vadd.i32 q1, q1, r9
; CHECK-NEXT: vpst
diff --git a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
index 9ee6ec345d964b..67723e8aa41ad7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll
@@ -411,12 +411,12 @@ define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
; CHECK: @ %bb.0:
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: mvn r2, #-2147483648
-; CHECK-NEXT: vminv.s32 r3, q1
-; CHECK-NEXT: vminv.s32 r2, q0
-; CHECK-NEXT: cmp r3, r1
-; CHECK-NEXT: csel r1, r3, r1, lt
-; CHECK-NEXT: cmp r2, r0
-; CHECK-NEXT: csel r0, r2, r0, lt
+; CHECK-NEXT: vminv.s32 r3, q0
+; CHECK-NEXT: vminv.s32 r2, q1
+; CHECK-NEXT: cmp r3, r0
+; CHECK-NEXT: csel r0, r3, r0, lt
+; CHECK-NEXT: cmp r2, r1
+; CHECK-NEXT: csel r1, r2, r1, lt
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, lt
; CHECK-NEXT: bx lr
@@ -433,12 +433,12 @@ define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
; CHECK: @ %bb.0:
; CHECK-NEXT: mov.w r3, #-2147483648
; CHECK-NEXT: mov.w r2, #-2147483648
-; CHECK-NEXT: vmaxv.s32 r3, q1
-; CHECK-NEXT: vmaxv.s32 r2, q0
-; CHECK-NEXT: cmp r3, r1
-; CHECK-NEXT: csel r1, r3, r1, gt
-; CHECK-NEXT: cmp r2, r0
-; CHECK-NEXT: csel r0, r2, r0, gt
+; CHECK-NEXT: vmaxv.s32 r3, q0
+; CHECK-NEXT: vmaxv.s32 r2, q1
+; CHECK-NEXT: cmp r3, r0
+; CHECK-NEXT: csel r0, r3, r0, gt
+; CHECK-NEXT: cmp r2, r1
+; CHECK-NEXT: csel r1, r2, r1, gt
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, gt
; CHECK-NEXT: bx lr
@@ -455,12 +455,12 @@ define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
; CHECK: @ %bb.0:
; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: mov.w r2, #-1
-; CHECK-NEXT: vminv.u32 r3, q1
-; CHECK-NEXT: vminv.u32 r2, q0
-; CHECK-NEXT: cmp r3, r1
-; CHECK-NEXT: csel r1, r3, r1, lo
-; CHECK-NEXT: cmp r2, r0
-; CHECK-NEXT: csel r0, r2, r0, lo
+; CHECK-NEXT: vminv.u32 r3, q0
+; CHECK-NEXT: vminv.u32 r2, q1
+; CHECK-NEXT: cmp r3, r0
+; CHECK-NEXT: csel r0, r3, r0, lo
+; CHECK-NEXT: cmp r2, r1
+; CHECK-NEXT: csel r1, r2, r1, lo
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, lo
; CHECK-NEXT: bx lr
@@ -477,12 +477,12 @@ define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
; CHECK: @ %bb.0:
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: vmaxv.u32 r3, q1
-; CHECK-NEXT: vmaxv.u32 r2, q0
-; CHECK-NEXT: cmp r3, r1
-; CHECK-NEXT: csel r1, r3, r1, hi
-; CHECK-NEXT: cmp r2, r0
-; CHECK-NEXT: csel r0, r2, r0, hi
+; CHECK-NEXT: vmaxv.u32 r3, q0
+; CHECK-NEXT: vmaxv.u32 r2, q1
+; CHECK-NEXT: cmp r3, r0
+; CHECK-NEXT: csel r0, r3, r0, hi
+; CHECK-NEXT: cmp r2, r1
+; CHECK-NEXT: csel r1, r2, r1, hi
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, hi
; CHECK-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index c8dd949ca9d882..1c95d28b5eed1b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1365,8 +1365,8 @@ define void @arm_biquad_cascade_df2T_f16(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1
; CHECK-NEXT: vstr.16 s5, [r6, #2]
; CHECK-NEXT: add.w r12, r12, #10
-; CHECK-NEXT: adds r6, #4
; CHECK-NEXT: subs.w r9, r9, #1
+; CHECK-NEXT: add.w r6, r6, #4
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: beq .LBB17_8
; CHECK-NEXT: .LBB17_3: @ %do.body
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 495ffe809f70fe..808626d9a0aebe 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1375,8 +1375,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(ptr nocapture re
; CHECK-NEXT: le lr, .LBB17_3
; CHECK-NEXT: @ %bb.4: @ %bb75
; CHECK-NEXT: @ in Loop: Header=BB17_2 Depth=1
-; CHECK-NEXT: adds r3, #20
; CHECK-NEXT: subs.w r12, r12, #1
+; CHECK-NEXT: add.w r3, r3, #20
; CHECK-NEXT: vstrb.8 q3, [r0], #16
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: bne .LBB17_2
@@ -1514,8 +1514,8 @@ define arm_aapcs_vfpcc void @fms(ptr nocapture readonly %pSrc1, ptr nocapture re
; CHECK-NEXT: le lr, .LBB18_3
; CHECK-NEXT: @ %bb.4: @ %while.end
; CHECK-NEXT: @ in Loop: Header=BB18_2 Depth=1
-; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: subs.w r12, r12, #1
+; CHECK-NEXT: add.w r2, r2, #4
; CHECK-NEXT: bne .LBB18_2
; CHECK-NEXT: .LBB18_5: @ %do.end
; CHECK-NEXT: pop {r4, r5, r7, pc}
@@ -1918,8 +1918,8 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
; CHECK-NEXT: vstr s6, [r6, #4]
; CHECK-NEXT: add.w r12, r12, #20
-; CHECK-NEXT: adds r6, #8
; CHECK-NEXT: subs r0, #1
+; CHECK-NEXT: add.w r6, r6, #8
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: beq .LBB20_8
; CHECK-NEXT: .LBB20_3: @ %do.body
diff --git a/llvm/test/CodeGen/Thumb2/mve-fmas.ll b/llvm/test/CodeGen/Thumb2/mve-fmas.ll
index 94921c78ad912e..377440e1bbc939 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmas.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmas.ll
@@ -893,19 +893,19 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float
;
; CHECK-MVE-LABEL: vfma32_v1_pred:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s5, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s7, #0
-; CHECK-MVE-NEXT: vmov.f32 s12, s2
-; CHECK-MVE-NEXT: vmov.f32 s14, s3
-; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10
+; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vmov.f32 s14, s2
+; CHECK-MVE-NEXT: vmov.f32 s12, s3
+; CHECK-MVE-NEXT: vmla.f32 s14, s6, s10
; CHECK-MVE-NEXT: vmov.f32 s10, s1
-; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11
+; CHECK-MVE-NEXT: vmla.f32 s12, s7, s11
; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9
; CHECK-MVE-NEXT: vmov.f32 s9, s0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s5, #0
+; CHECK-MVE-NEXT: vcmp.f32 s7, #0
; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -913,13 +913,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
-; CHECK-MVE-NEXT: cmp r3, #0
-; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
; CHECK-MVE-NEXT: bx lr
entry:
@@ -946,19 +946,19 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float
;
; CHECK-MVE-LABEL: vfma32_v2_pred:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s5, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s7, #0
-; CHECK-MVE-NEXT: vmov.f32 s12, s2
-; CHECK-MVE-NEXT: vmov.f32 s14, s3
-; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10
+; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vmov.f32 s14, s2
+; CHECK-MVE-NEXT: vmov.f32 s12, s3
+; CHECK-MVE-NEXT: vmla.f32 s14, s6, s10
; CHECK-MVE-NEXT: vmov.f32 s10, s1
-; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11
+; CHECK-MVE-NEXT: vmla.f32 s12, s7, s11
; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9
; CHECK-MVE-NEXT: vmov.f32 s9, s0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s5, #0
+; CHECK-MVE-NEXT: vcmp.f32 s7, #0
; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -966,13 +966,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
-; CHECK-MVE-NEXT: cmp r3, #0
-; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
; CHECK-MVE-NEXT: bx lr
entry:
@@ -999,19 +999,19 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %
;
; CHECK-MVE-LABEL: vfms32_pred:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s5, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s7, #0
-; CHECK-MVE-NEXT: vmov.f32 s12, s2
-; CHECK-MVE-NEXT: vmov.f32 s14, s3
-; CHECK-MVE-NEXT: vmls.f32 s12, s6, s10
+; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vmov.f32 s14, s2
+; CHECK-MVE-NEXT: vmov.f32 s12, s3
+; CHECK-MVE-NEXT: vmls.f32 s14, s6, s10
; CHECK-MVE-NEXT: vmov.f32 s10, s1
-; CHECK-MVE-NEXT: vmls.f32 s14, s7, s11
+; CHECK-MVE-NEXT: vmls.f32 s12, s7, s11
; CHECK-MVE-NEXT: vmls.f32 s10, s5, s9
; CHECK-MVE-NEXT: vmov.f32 s9, s0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s5, #0
+; CHECK-MVE-NEXT: vcmp.f32 s7, #0
; CHECK-MVE-NEXT: vmls.f32 s9, s4, s8
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -1019,13 +1019,13 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
-; CHECK-MVE-NEXT: cmp r3, #0
-; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
; CHECK-MVE-NEXT: bx lr
entry:
@@ -1055,33 +1055,33 @@ define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float>
;
; CHECK-MVE-LABEL: vfmar32_pred:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s5, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s7, #0
-; CHECK-MVE-NEXT: vmov.f32 s10, s2
-; CHECK-MVE-NEXT: vmov.f32 s12, s1
-; CHECK-MVE-NEXT: vmov.f32 s14, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vmov.f32 s10, s3
+; CHECK-MVE-NEXT: vmov.f32 s12, s2
+; CHECK-MVE-NEXT: vmov.f32 s14, s1
; CHECK-MVE-NEXT: vmov.f32 s9, s0
-; CHECK-MVE-NEXT: vmla.f32 s10, s6, s8
-; CHECK-MVE-NEXT: vmla.f32 s12, s5, s8
-; CHECK-MVE-NEXT: vmla.f32 s14, s7, s8
+; CHECK-MVE-NEXT: vmla.f32 s10, s7, s8
+; CHECK-MVE-NEXT: vmla.f32 s12, s6, s8
+; CHECK-MVE-NEXT: vmla.f32 s14, s5, s8
; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s5, #0
+; CHECK-MVE-NEXT: vcmp.f32 s7, #0
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
-; CHECK-MVE-NEXT: cmp r3, #0
-; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10
; CHECK-MVE-NEXT: cmp r2, #0
-; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s10
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s14
+; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
; CHECK-MVE-NEXT: bx lr
entry:
@@ -1112,32 +1112,32 @@ define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float>
;
; CHECK-MVE-LABEL: vfmas32_pred:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s5, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s7, #0
+; CHECK-MVE-NEXT: vcmp.f32 s4, #0
; CHECK-MVE-NEXT: vmov.f32 s10, s8
; CHECK-MVE-NEXT: vmov.f32 s12, s8
; CHECK-MVE-NEXT: vmov.f32 s14, s8
; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4
-; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6
-; CHECK-MVE-NEXT: vmla.f32 s12, s1, s5
-; CHECK-MVE-NEXT: vmla.f32 s14, s3, s7
+; CHECK-MVE-NEXT: vmla.f32 s10, s3, s7
+; CHECK-MVE-NEXT: vmla.f32 s12, s2, s6
+; CHECK-MVE-NEXT: vmla.f32 s14, s1, s5
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s5, #0
+; CHECK-MVE-NEXT: vcmp.f32 s7, #0
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
-; CHECK-MVE-NEXT: cmp r3, #0
-; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10
; CHECK-MVE-NEXT: cmp r2, #0
-; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
+; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s10
+; CHECK-MVE-NEXT: cmp r3, #0
+; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s14
+; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s8
; CHECK-MVE-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 742f2a75a1aa80..f2ac5268921800 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -668,63 +668,62 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-LABEL: stest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: vmov r12, lr, d9
-; CHECK-NEXT: subs.w r5, r0, #-1
-; CHECK-NEXT: mvn r4, #-2147483648
-; CHECK-NEXT: sbcs.w r5, r1, r4
-; CHECK-NEXT: sbcs r5, r2, #0
-; CHECK-NEXT: mov.w r7, #-2147483648
-; CHECK-NEXT: sbcs r5, r3, #0
+; CHECK-NEXT: vmov r12, lr, d8
+; CHECK-NEXT: subs.w r4, r0, #-1
+; CHECK-NEXT: mvn r9, #-2147483648
+; CHECK-NEXT: sbcs.w r4, r1, r9
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: mov.w r7, #-1
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov.w r10, #-2147483648
+; CHECK-NEXT: cset r4, lt
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r3, r3, r4, ne
+; CHECK-NEXT: csel r2, r2, r4, ne
+; CHECK-NEXT: csel r4, r0, r7, ne
+; CHECK-NEXT: csel r1, r1, r9, ne
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: sbcs.w r0, r10, r1
+; CHECK-NEXT: sbcs.w r0, r7, r2
+; CHECK-NEXT: sbcs.w r0, r7, r3
; CHECK-NEXT: cset r5, lt
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r3, r3, r5, ne
-; CHECK-NEXT: csel r2, r2, r5, ne
-; CHECK-NEXT: mov.w r5, #-1
-; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: csel r0, r0, r5, ne
-; CHECK-NEXT: rsbs r6, r0, #0
-; CHECK-NEXT: sbcs.w r6, r7, r1
-; CHECK-NEXT: sbcs.w r2, r5, r2
-; CHECK-NEXT: sbcs.w r2, r5, r3
-; CHECK-NEXT: csel r8, r1, r7, lt
-; CHECK-NEXT: cset r1, lt
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: csel r9, r0, r1, ne
+; CHECK-NEXT: csel r8, r1, r10, ne
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs.w r6, r0, #-1
-; CHECK-NEXT: sbcs.w r6, r1, r4
+; CHECK-NEXT: sbcs.w r6, r1, r9
; CHECK-NEXT: sbcs r6, r2, #0
; CHECK-NEXT: sbcs r6, r3, #0
; CHECK-NEXT: cset r6, lt
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r0, r0, r5, ne
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: csel r1, r1, r9, ne
; CHECK-NEXT: csel r3, r3, r6, ne
; CHECK-NEXT: csel r2, r2, r6, ne
-; CHECK-NEXT: csel r1, r1, r4, ne
; CHECK-NEXT: rsbs r6, r0, #0
-; CHECK-NEXT: sbcs.w r6, r7, r1
-; CHECK-NEXT: sbcs.w r2, r5, r2
-; CHECK-NEXT: sbcs.w r2, r5, r3
+; CHECK-NEXT: sbcs.w r6, r10, r1
+; CHECK-NEXT: sbcs.w r2, r7, r2
+; CHECK-NEXT: sbcs.w r2, r7, r3
; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: csel r1, r1, r7, lt
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r1, r1, r10, ne
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r3, r4, r5, ne
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r9, r0
-; CHECK-NEXT: vmov q0[3], q0[1], r8, r1
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r3
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r8
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -738,33 +737,38 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-LABEL: utest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: vmov r12, lr, d8
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmov r4, r1, d8
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: cset r2, lo
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r4, r1, r2, ne
-; CHECK-NEXT: csel r5, r0, r2, ne
-; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: cset r6, lo
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r7, r0, r6, ne
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: cset r2, lo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r5, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -776,8 +780,10 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
@@ -787,44 +793,47 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NEXT: subs r4, r2, #1
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov.w r8, #1
-; CHECK-NEXT: cset r4, lt
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r0, r0, r4, ne
-; CHECK-NEXT: csel r3, r3, r4, ne
-; CHECK-NEXT: csel r1, r1, r4, ne
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r0, r0, r5, ne
+; CHECK-NEXT: csel r3, r3, r5, ne
; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r5, r0, #0
-; CHECK-NEXT: mov.w r4, #0
-; CHECK-NEXT: sbcs.w r5, r4, r1
-; CHECK-NEXT: sbcs.w r2, r4, r2
-; CHECK-NEXT: sbcs.w r2, r4, r3
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r5, r1, r2, ne
-; CHECK-NEXT: csel r7, r0, r2, ne
+; CHECK-NEXT: csel r4, r1, r5, ne
+; CHECK-NEXT: rsbs r1, r0, #0
+; CHECK-NEXT: sbcs.w r1, r7, r4
+; CHECK-NEXT: sbcs.w r1, r7, r2
+; CHECK-NEXT: sbcs.w r1, r7, r3
+; CHECK-NEXT: cset r6, lt
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r9, r0, r6, ne
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs r6, r2, #1
-; CHECK-NEXT: sbcs r6, r3, #0
-; CHECK-NEXT: cset r6, lt
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
-; CHECK-NEXT: csel r3, r3, r6, ne
-; CHECK-NEXT: csel r1, r1, r6, ne
+; CHECK-NEXT: subs r5, r2, #1
+; CHECK-NEXT: sbcs r5, r3, #0
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r0, r0, r5, ne
; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r6, r0, #0
-; CHECK-NEXT: sbcs.w r6, r4, r1
-; CHECK-NEXT: sbcs.w r2, r4, r2
-; CHECK-NEXT: sbcs.w r2, r4, r3
+; CHECK-NEXT: csel r3, r3, r5, ne
+; CHECK-NEXT: csel r1, r1, r5, ne
+; CHECK-NEXT: rsbs r5, r0, #0
+; CHECK-NEXT: sbcs.w r5, r7, r1
+; CHECK-NEXT: sbcs.w r2, r7, r2
+; CHECK-NEXT: sbcs.w r2, r7, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r4, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -838,54 +847,59 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: vmov r0, r9, d0
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vmov r9, r0, d0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs.w r7, r0, #-1
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs.w r7, r1, r5
-; CHECK-NEXT: mov.w r6, #-1
+; CHECK-NEXT: mvn r10, #-2147483648
+; CHECK-NEXT: sbcs.w r7, r1, r10
+; CHECK-NEXT: mov.w r4, #-1
; CHECK-NEXT: sbcs r7, r2, #0
+; CHECK-NEXT: mov.w r11, #-2147483648
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: cset r7, lt
; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: csel r5, r0, r4, ne
; CHECK-NEXT: csel r3, r3, r7, ne
; CHECK-NEXT: csel r2, r2, r7, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: rsbs r4, r0, #0
-; CHECK-NEXT: mov.w r7, #-2147483648
-; CHECK-NEXT: sbcs.w r4, r7, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
-; CHECK-NEXT: csel r8, r1, r7, lt
-; CHECK-NEXT: cset r1, lt
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: csel r10, r0, r1, ne
+; CHECK-NEXT: csel r1, r1, r10, ne
+; CHECK-NEXT: rsbs r0, r5, #0
+; CHECK-NEXT: sbcs.w r0, r11, r1
+; CHECK-NEXT: sbcs.w r0, r4, r2
+; CHECK-NEXT: sbcs.w r0, r4, r3
+; CHECK-NEXT: cset r6, lt
; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r8, r1, r11, ne
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs.w r4, r0, #-1
-; CHECK-NEXT: sbcs.w r4, r1, r5
-; CHECK-NEXT: sbcs r4, r2, #0
-; CHECK-NEXT: sbcs r4, r3, #0
-; CHECK-NEXT: cset r4, lt
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
-; CHECK-NEXT: csel r3, r3, r4, ne
-; CHECK-NEXT: csel r2, r2, r4, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: rsbs r5, r0, #0
-; CHECK-NEXT: sbcs.w r5, r7, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
+; CHECK-NEXT: subs.w r7, r0, #-1
+; CHECK-NEXT: sbcs.w r7, r1, r10
+; CHECK-NEXT: sbcs r7, r2, #0
+; CHECK-NEXT: sbcs r7, r3, #0
+; CHECK-NEXT: cset r7, lt
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: csel r1, r1, r10, ne
+; CHECK-NEXT: csel r3, r3, r7, ne
+; CHECK-NEXT: csel r2, r2, r7, ne
+; CHECK-NEXT: rsbs r7, r0, #0
+; CHECK-NEXT: sbcs.w r7, r11, r1
+; CHECK-NEXT: sbcs.w r2, r4, r2
+; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: csel r1, r1, r7, lt
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r1, r1, r11, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r5, r6, ne
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r10, r0
-; CHECK-NEXT: vmov q0[3], q0[1], r8, r1
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r3
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r8
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -899,27 +913,33 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-LABEL: utest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vmov r4, r0, d0
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: subs r2, #1
-; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: cset r2, lo
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r6, r0, r2, ne
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: cset r6, lo
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r7, r0, r6, ne
; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: csel r5, r1, r2, ne
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: cset r2, lo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r5, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -931,49 +951,54 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: vmov r5, r0, d0
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vmov r6, r0, d0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: subs r5, r2, #1
; CHECK-NEXT: mov.w r8, #1
-; CHECK-NEXT: sbcs r4, r3, #0
-; CHECK-NEXT: mov.w r6, #0
+; CHECK-NEXT: sbcs r5, r3, #0
; CHECK-NEXT: cset r4, lt
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: csel r0, r0, r4, ne
; CHECK-NEXT: csel r3, r3, r4, ne
-; CHECK-NEXT: csel r1, r1, r4, ne
+; CHECK-NEXT: csel r5, r1, r4, ne
; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r4, r0, #0
-; CHECK-NEXT: sbcs.w r4, r6, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r7, r0, r2, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: csel r4, r1, r2, ne
+; CHECK-NEXT: rsbs r1, r0, #0
+; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: sbcs.w r1, r4, r5
+; CHECK-NEXT: sbcs.w r1, r4, r2
+; CHECK-NEXT: sbcs.w r1, r4, r3
+; CHECK-NEXT: cset r7, lt
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r9, r0, r7, ne
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r5, r2, #1
-; CHECK-NEXT: sbcs r5, r3, #0
-; CHECK-NEXT: cset r5, lt
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r0, r0, r5, ne
-; CHECK-NEXT: csel r3, r3, r5, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
+; CHECK-NEXT: subs r6, r2, #1
+; CHECK-NEXT: sbcs r6, r3, #0
+; CHECK-NEXT: cset r6, lt
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r0, r0, r6, ne
; CHECK-NEXT: csel r2, r2, r8, ne
-; CHECK-NEXT: rsbs r5, r0, #0
-; CHECK-NEXT: sbcs.w r5, r6, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
+; CHECK-NEXT: csel r3, r3, r6, ne
+; CHECK-NEXT: csel r1, r1, r6, ne
+; CHECK-NEXT: rsbs r6, r0, #0
+; CHECK-NEXT: sbcs.w r6, r4, r1
+; CHECK-NEXT: sbcs.w r2, r4, r2
+; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r3, r5, r7, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1041,39 +1066,39 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-LABEL: ustest_f16i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: bl __fixhfti
-; CHECK-NEXT: rsbs r4, r0, #0
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: mov.w r5, #0
-; CHECK-NEXT: sbcs.w r4, r5, r1
-; CHECK-NEXT: sbcs.w r2, r5, r2
-; CHECK-NEXT: sbcs.w r2, r5, r3
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r6, r0, r2, ne
+; CHECK-NEXT: sbcs.w r1, r5, r4
+; CHECK-NEXT: sbcs.w r1, r5, r2
+; CHECK-NEXT: sbcs.w r1, r5, r3
+; CHECK-NEXT: cset r6, lt
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r8, r0, r6, ne
; CHECK-NEXT: vmov.u16 r0, q4[0]
-; CHECK-NEXT: csel r7, r1, r2, ne
; CHECK-NEXT: bl __fixhfti
-; CHECK-NEXT: rsbs r4, r0, #0
-; CHECK-NEXT: sbcs.w r4, r5, r1
+; CHECK-NEXT: rsbs r7, r0, #0
+; CHECK-NEXT: sbcs.w r7, r5, r1
; CHECK-NEXT: sbcs.w r2, r5, r2
; CHECK-NEXT: sbcs.w r2, r5, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r4, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r8
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
@@ -1723,63 +1748,62 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: stest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: vmov r12, lr, d8
-; CHECK-NEXT: subs.w r5, r0, #-1
-; CHECK-NEXT: mvn r4, #-2147483648
-; CHECK-NEXT: sbcs.w r5, r1, r4
-; CHECK-NEXT: sbcs r5, r2, #0
-; CHECK-NEXT: mov.w r6, #-1
-; CHECK-NEXT: sbcs r5, r3, #0
+; CHECK-NEXT: subs.w r4, r0, #-1
+; CHECK-NEXT: mvn r9, #-2147483648
+; CHECK-NEXT: sbcs.w r4, r1, r9
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: mov.w r7, #-1
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov.w r10, #-2147483648
+; CHECK-NEXT: cset r4, lt
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r3, r3, r4, ne
+; CHECK-NEXT: csel r2, r2, r4, ne
+; CHECK-NEXT: csel r4, r0, r7, ne
+; CHECK-NEXT: csel r1, r1, r9, ne
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: sbcs.w r0, r10, r1
+; CHECK-NEXT: sbcs.w r0, r7, r2
+; CHECK-NEXT: sbcs.w r0, r7, r3
; CHECK-NEXT: cset r5, lt
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
-; CHECK-NEXT: csel r3, r3, r5, ne
-; CHECK-NEXT: csel r2, r2, r5, ne
-; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: rsbs r7, r0, #0
-; CHECK-NEXT: mov.w r5, #-2147483648
-; CHECK-NEXT: sbcs.w r7, r5, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r9, r0, r2, ne
-; CHECK-NEXT: csel r8, r1, r5, ne
+; CHECK-NEXT: csel r8, r1, r10, ne
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: subs.w r7, r0, #-1
-; CHECK-NEXT: sbcs.w r7, r1, r4
-; CHECK-NEXT: sbcs r7, r2, #0
-; CHECK-NEXT: sbcs r7, r3, #0
-; CHECK-NEXT: cset r7, lt
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: csel r0, r0, r6, ne
-; CHECK-NEXT: csel r3, r3, r7, ne
-; CHECK-NEXT: csel r2, r2, r7, ne
-; CHECK-NEXT: csel r1, r1, r4, ne
-; CHECK-NEXT: rsbs r7, r0, #0
-; CHECK-NEXT: sbcs.w r7, r5, r1
-; CHECK-NEXT: sbcs.w r2, r6, r2
-; CHECK-NEXT: sbcs.w r2, r6, r3
+; CHECK-NEXT: subs.w r6, r0, #-1
+; CHECK-NEXT: sbcs.w r6, r1, r9
+; CHECK-NEXT: sbcs r6, r2, #0
+; CHECK-NEXT: sbcs r6, r3, #0
+; CHECK-NEXT: cset r6, lt
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: csel r1, r1, r9, ne
+; CHECK-NEXT: csel r3, r3, r6, ne
+; CHECK-NEXT: csel r2, r2, r6, ne
+; CHECK-NEXT: rsbs r6, r0, #0
+; CHECK-NEXT: sbcs.w r6, r10, r1
+; CHECK-NEXT: sbcs.w r2, r7, r2
+; CHECK-NEXT: sbcs.w r2, r7, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r1, r1, r10, ne
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r3, r4, r5, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r3
; CHECK-NEXT: vmov q0[3], q0[1], r1, r8
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -1791,33 +1815,38 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: utest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: vmov r12, lr, d8
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmov r4, r1, d8
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: cset r2, lo
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r4, r1, r2, ne
-; CHECK-NEXT: csel r5, r0, r2, ne
-; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: cset r6, lo
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r7, r0, r6, ne
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: cset r2, lo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r5, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -1828,43 +1857,49 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: vmov r12, lr, d8
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: vmov r4, r1, d8
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r5, r0, r2, ne
-; CHECK-NEXT: csel r0, r3, r2, ne
-; CHECK-NEXT: csel r4, r1, r2, ne
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt mi
-; CHECK-NEXT: movmi r4, #0
-; CHECK-NEXT: movmi r5, #0
-; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: cset r7, lt
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r6, r0, r7, ne
+; CHECK-NEXT: csel r5, r3, r7, ne
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r6, #0
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r1, r1, r2, ne
+; CHECK-NEXT: csel r3, r3, r2, ne
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: csel r2, r3, r2, ne
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: itt mi
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
+; CHECK-NEXT: csel r7, r8, r7, ne
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r7, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r1, r1, r2, ne
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r1, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r4
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -1876,54 +1911,59 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: vmov r8, r0, d0
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vmov r9, r0, d0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs.w r7, r0, #-1
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs.w r7, r1, r5
-; CHECK-NEXT: mov.w r6, #-2147483648
+; CHECK-NEXT: mvn r10, #-2147483648
+; CHECK-NEXT: sbcs.w r7, r1, r10
+; CHECK-NEXT: mov.w r4, #-1
; CHECK-NEXT: sbcs r7, r2, #0
+; CHECK-NEXT: mov.w r11, #-2147483648
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: cset r7, lt
; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r5, r0, r4, ne
; CHECK-NEXT: csel r3, r3, r7, ne
; CHECK-NEXT: csel r2, r2, r7, ne
-; CHECK-NEXT: mov.w r7, #-1
-; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: csel r0, r0, r7, ne
-; CHECK-NEXT: rsbs r4, r0, #0
-; CHECK-NEXT: sbcs.w r4, r6, r1
-; CHECK-NEXT: sbcs.w r2, r7, r2
-; CHECK-NEXT: sbcs.w r2, r7, r3
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r10, r0, r2, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: csel r9, r1, r6, ne
+; CHECK-NEXT: csel r1, r1, r10, ne
+; CHECK-NEXT: rsbs r0, r5, #0
+; CHECK-NEXT: sbcs.w r0, r11, r1
+; CHECK-NEXT: sbcs.w r0, r4, r2
+; CHECK-NEXT: sbcs.w r0, r4, r3
+; CHECK-NEXT: cset r6, lt
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r8, r1, r11, ne
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs.w r4, r0, #-1
-; CHECK-NEXT: sbcs.w r4, r1, r5
-; CHECK-NEXT: sbcs r4, r2, #0
-; CHECK-NEXT: sbcs r4, r3, #0
-; CHECK-NEXT: cset r4, lt
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r0, r0, r7, ne
-; CHECK-NEXT: csel r3, r3, r4, ne
-; CHECK-NEXT: csel r2, r2, r4, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
-; CHECK-NEXT: rsbs r5, r0, #0
-; CHECK-NEXT: sbcs.w r5, r6, r1
-; CHECK-NEXT: sbcs.w r2, r7, r2
-; CHECK-NEXT: sbcs.w r2, r7, r3
+; CHECK-NEXT: subs.w r7, r0, #-1
+; CHECK-NEXT: sbcs.w r7, r1, r10
+; CHECK-NEXT: sbcs r7, r2, #0
+; CHECK-NEXT: sbcs r7, r3, #0
+; CHECK-NEXT: cset r7, lt
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: csel r1, r1, r10, ne
+; CHECK-NEXT: csel r3, r3, r7, ne
+; CHECK-NEXT: csel r2, r2, r7, ne
+; CHECK-NEXT: rsbs r7, r0, #0
+; CHECK-NEXT: sbcs.w r7, r11, r1
+; CHECK-NEXT: sbcs.w r2, r4, r2
+; CHECK-NEXT: sbcs.w r2, r4, r3
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r1, r1, r11, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r5, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: csel r1, r1, r6, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r10
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r9
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r3
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r8
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -1935,27 +1975,33 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: utest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vmov r4, r0, d0
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: subs r2, #1
-; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: cset r2, lo
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r6, r0, r2, ne
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: cset r6, lo
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r7, r0, r6, ne
; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: csel r5, r1, r2, ne
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: cset r2, lo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: csel r3, r5, r6, ne
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r1, r1, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r7
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -1966,37 +2012,44 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: vmov r4, r0, d0
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: vmov r5, r0, d0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: subs r2, #1
-; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r6, r0, r2, ne
-; CHECK-NEXT: csel r0, r3, r2, ne
-; CHECK-NEXT: csel r5, r1, r2, ne
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: itt mi
-; CHECK-NEXT: movmi r5, #0
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: cset r7, lt
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r6, r0, r7, ne
+; CHECK-NEXT: csel r4, r3, r7, ne
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r6, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r1, r1, r2, ne
+; CHECK-NEXT: csel r3, r3, r2, ne
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: csel r2, r3, r2, ne
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: itt mi
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r0, #0
-; CHECK-NEXT: movmi r1, #0
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: csel r7, r8, r7, ne
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r7, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r1, r1, r2, ne
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
@@ -2059,8 +2112,8 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-LABEL: ustest_f16i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov.u16 r0, q0[1]
@@ -2069,19 +2122,24 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov.u16 r0, q4[0]
; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: itt mi
-; CHECK-NEXT: movmi r5, #0
-; CHECK-NEXT: movmi r4, #0
+; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r4, #0
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: itt mi
+; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r0, #0
-; CHECK-NEXT: movmi r1, #0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r5, #0
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r1, #0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 77548b49d77f23..75b6cb3e1272bc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -281,51 +281,49 @@ declare <6 x i32> @llvm.fptosi.sat.v6f64.v6i32 (<6 x double>)
define arm_aapcs_vfpcc <1 x i32> @test_signed_v1f64_v1i32(<1 x double> %f) {
; CHECK-LABEL: test_signed_v1f64_v1i32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: vldr d1, .LCPI8_0
; CHECK-NEXT: vmov r5, r4, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI8_1
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2iz
-; CHECK-NEXT: vldr d0, .LCPI8_1
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: bl __aeabi_d2iz
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq.w r6, #-2147483648
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r7, #-2147483648
+; CHECK-NEXT: mvnne r6, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI8_0:
-; CHECK-NEXT: .long 0 @ double -2147483648
-; CHECK-NEXT: .long 3252682752
-; CHECK-NEXT: .LCPI8_1:
; CHECK-NEXT: .long 4290772992 @ double 2147483647
; CHECK-NEXT: .long 1105199103
+; CHECK-NEXT: .LCPI8_1:
+; CHECK-NEXT: .long 0 @ double -2147483648
+; CHECK-NEXT: .long 3252682752
%x = call <1 x i32> @llvm.fptosi.sat.v1f64.v1i32(<1 x double> %f)
ret <1 x i32> %x
}
@@ -339,82 +337,115 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI9_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vldr d0, .LCPI9_1
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r6, r5, d0
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r11, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r10, r7, d8
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r8, #1
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r10
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vldr d0, .LCPI9_1
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r5, #-2147483648
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r5, #-2147483648
-; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r2, #-1
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: mvnne r7, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -441,53 +472,90 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
-; CHECK-NEXT: vmov.f32 s18, s0
-; CHECK-NEXT: vmov.f32 s19, s1
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI10_0
-; CHECK-NEXT: vmov r10, r7, d1
-; CHECK-NEXT: vmov r6, r3, d0
-; CHECK-NEXT: vmov.f32 s16, s4
-; CHECK-NEXT: vmov.f32 s17, s5
-; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: vmov r4, r6, d1
+; CHECK-NEXT: vmov r2, r11, d0
+; CHECK-NEXT: vmov.f32 s18, s4
+; CHECK-NEXT: vmov.f32 s19, s5
+; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str.w r11, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI10_1
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: vmov r2, r8, d0
+; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str.w r8, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq.w r10, #-2147483648
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: vmov r5, r7, d9
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r10, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vldr d0, .LCPI10_1
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: vmov r1, r0, d9
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov r9, r8, d8
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r11, #-2147483648
-; CHECK-NEXT: mov r4, r3
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: moveq.w r6, #-2147483648
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov r9, r8, d8
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r11, #-2147483648
+; CHECK-NEXT: mvnne r6, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r11, #0
-; CHECK-NEXT: ldr.w r10, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r9
@@ -497,55 +565,19 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r7, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov.32 q0[1], r10
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r6, #-2147483648
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r6, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: vmov.32 q0[1], r11
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r7
+; CHECK-NEXT: vmov q0[2], q0[0], r7, r6
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -553,11 +585,11 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI10_0:
-; CHECK-NEXT: .long 0 @ double -2147483648
-; CHECK-NEXT: .long 3252682752
-; CHECK-NEXT: .LCPI10_1:
; CHECK-NEXT: .long 4290772992 @ double 2147483647
; CHECK-NEXT: .long 1105199103
+; CHECK-NEXT: .LCPI10_1:
+; CHECK-NEXT: .long 0 @ double -2147483648
+; CHECK-NEXT: .long 3252682752
%x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f)
ret <3 x i32> %x
}
@@ -571,86 +603,95 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
-; CHECK-NEXT: vmov q5, q1
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI11_0
+; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov r5, r6, d10
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r9, r3
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: vmov r9, r3, d0
+; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI11_1
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r11, r0, d11
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: vmov r7, r10, d8
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r4, #-2147483648
; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: mov r8, r3
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov r11, r1, d11
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: vmov r7, r10, d8
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq.w r0, #-2147483648
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: mvnne r0, #-2147483648
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r4, r9
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: str.w r9, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r9, #-2147483648
-; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: moveq.w r6, #-2147483648
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r9, #-2147483648
+; CHECK-NEXT: mvnne r6, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r9, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r11
@@ -660,17 +701,11 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r8, #-2147483648
-; CHECK-NEXT: ldr.w r10, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: vmov r7, r4, d9
; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r8, #-2147483648
@@ -678,50 +713,51 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) {
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r6, #-2147483648
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq.w r5, #-2147483648
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r6, #-2147483648
+; CHECK-NEXT: mvnne r5, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r9, r0
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r8
-; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r6, r0
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r8
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI11_0:
-; CHECK-NEXT: .long 0 @ double -2147483648
-; CHECK-NEXT: .long 3252682752
-; CHECK-NEXT: .LCPI11_1:
; CHECK-NEXT: .long 4290772992 @ double 2147483647
; CHECK-NEXT: .long 1105199103
+; CHECK-NEXT: .LCPI11_1:
+; CHECK-NEXT: .long 0 @ double -2147483648
+; CHECK-NEXT: .long 3252682752
%x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f)
ret <4 x i32> %x
}
@@ -738,186 +774,186 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI12_0
-; CHECK-NEXT: vmov r7, r5, d4
+; CHECK-NEXT: vmov r5, r4, d4
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov.f32 s18, s6
-; CHECK-NEXT: vmov.f32 s20, s4
+; CHECK-NEXT: vmov.f32 s20, s6
+; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s22, s2
-; CHECK-NEXT: vmov.f32 s19, s7
-; CHECK-NEXT: vmov.f32 s21, s5
+; CHECK-NEXT: vmov.f32 s21, s7
+; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s23, s3
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: strd r2, r3, [sp, #20] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI12_1
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r1, r0, d10
-; CHECK-NEXT: vldr d0, .LCPI12_1
-; CHECK-NEXT: vmov r6, r8, d9
-; CHECK-NEXT: cmp.w r11, #0
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT: vmov r9, r0, d11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r11, r3
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: vmov r8, r0, d11
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: vmov r9, r6, d10
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r4, #-2147483648
-; CHECK-NEXT: str.w r10, [sp] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: moveq.w r11, #-2147483648
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: mvnne r11, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: str r4, [r0, #16]
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: str.w r11, [r7, #16]
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr.w r10, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r4, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r0, #-2147483648
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r10, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: mvnne r0, #-2147483648
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r6, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r8, #-2147483648
-; CHECK-NEXT: ldr.w r11, [sp] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: moveq.w r10, #-2147483648
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: vmov r11, r4, d9
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r8, #-2147483648
+; CHECK-NEXT: mvnne r10, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r4, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: moveq.w r7, #-2147483648
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: mvnne r7, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: vmov r7, r6, d8
+; CHECK-NEXT: vmov r5, r4, d8
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: moveq.w r6, #-2147483648
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r5, #-2147483648
+; CHECK-NEXT: mvnne r6, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r6, r7
+; CHECK-NEXT: vmov q0[3], q0[1], r10, r0
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r4
-; CHECK-NEXT: vmov q0[3], q0[1], r8, r0
-; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11}
@@ -926,11 +962,11 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
-; CHECK-NEXT: .long 0 @ double -2147483648
-; CHECK-NEXT: .long 3252682752
-; CHECK-NEXT: .LCPI12_1:
; CHECK-NEXT: .long 4290772992 @ double 2147483647
; CHECK-NEXT: .long 1105199103
+; CHECK-NEXT: .LCPI12_1:
+; CHECK-NEXT: .long 0 @ double -2147483648
+; CHECK-NEXT: .long 3252682752
%x = call <5 x i32> @llvm.fptosi.sat.v5f64.v5i32(<5 x double> %f)
ret <5 x i32> %x
}
@@ -947,180 +983,182 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI13_0
-; CHECK-NEXT: vmov r6, r4, d5
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov.f32 s20, s8
-; CHECK-NEXT: vmov.f32 s22, s6
+; CHECK-NEXT: vmov r9, r4, d5
+; CHECK-NEXT: vmov r2, r6, d0
+; CHECK-NEXT: vmov.f32 s22, s8
+; CHECK-NEXT: vmov.f32 s20, s6
; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s24, s2
-; CHECK-NEXT: vmov.f32 s21, s9
-; CHECK-NEXT: vmov.f32 s23, s7
+; CHECK-NEXT: vmov.f32 s23, s9
+; CHECK-NEXT: vmov.f32 s21, s7
; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s25, s3
-; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI13_1
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vldr d0, .LCPI13_1
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: vmov r8, r0, d10
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: vmov r7, r5, d11
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: vmov r1, r0, d12
-; CHECK-NEXT: cmp.w r9, #0
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov r7, r8, d10
-; CHECK-NEXT: vmov r11, r10, d11
-; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: mov r9, r2
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: moveq.w r10, #-2147483648
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r5, #-2147483648
+; CHECK-NEXT: mvnne r10, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: str r5, [r6, #20]
-; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str.w r10, [r11, #20]
+; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov r2, r1, d9
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq.w r6, #-2147483648
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: strd r2, r1, [sp] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r5, #-2147483648
+; CHECK-NEXT: mvnne r6, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: str r5, [r6, #16]
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldr.w r8, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: ldr.w r9, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: str.w r6, [r11, #16]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r4, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: ldr.w r11, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r0, #-2147483648
-; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r4, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: moveq.w r10, #-2147483648
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r10, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r5, r6
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r10, #-2147483648
-; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: moveq.w r8, #-2147483648
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r2, r4
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: vmov r7, r6, d9
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r10, #-2147483648
+; CHECK-NEXT: mvnne r8, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r10, #0
-; CHECK-NEXT: ldr r5, [sp] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: movne.w r8, #0
+; CHECK-NEXT: ldr.w r11, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r4, #-2147483648
-; CHECK-NEXT: ldr.w r8, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r4, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
@@ -1128,39 +1166,38 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r5, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vmov q0[2], q0[0], r5, r4
-; CHECK-NEXT: vmov q0[3], q0[1], r10, r0
-; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[3], q0[1], r8, r10
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
@@ -1169,11 +1206,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI13_0:
-; CHECK-NEXT: .long 0 @ double -2147483648
-; CHECK-NEXT: .long 3252682752
-; CHECK-NEXT: .LCPI13_1:
; CHECK-NEXT: .long 4290772992 @ double 2147483647
; CHECK-NEXT: .long 1105199103
+; CHECK-NEXT: .LCPI13_1:
+; CHECK-NEXT: .long 0 @ double -2147483648
+; CHECK-NEXT: .long 3252682752
%x = call <6 x i32> @llvm.fptosi.sat.v6f64.v6i32(<6 x double> %f)
ret <6 x i32> %x
}
@@ -1754,10 +1791,18 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vldr s22, .LCPI28_0
; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vldr s20, .LCPI28_1
@@ -1766,60 +1811,48 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: itt lt
; CHECK-NEXT: movwlt r11, #0
; CHECK-NEXT: movtlt r11, #65534
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: vcmp.f32 s17, s17
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movwgt r11, #65535
; CHECK-NEXT: movtgt r11, #1
+; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: movvs.w r11, #0
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s19
-; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
+; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: ittt lt
; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: movtlt r5, #65534
-; CHECK-NEXT: vcmp.f32 s18, s20
-; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movwgt r5, #65535
; CHECK-NEXT: movtgt r5, #1
-; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s19, s22
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: movvs r5, #0
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: vcmp.f32 s19, s22
-; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
+; CHECK-NEXT: ittt lt
; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: movtlt r7, #65534
-; CHECK-NEXT: vcmp.f32 s19, s20
-; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: vcmp.f32 s19, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movwgt r7, #65535
; CHECK-NEXT: movtgt r7, #1
-; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r4, #-1
; CHECK-NEXT: vcmp.f32 s19, s19
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -1831,9 +1864,9 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: bfc r5, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -1847,10 +1880,11 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: str.w r0, [r8]
-; CHECK-NEXT: lsr.w r0, r7, #10
+; CHECK-NEXT: lsrs r0, r7, #10
; CHECK-NEXT: bfc r7, #18, #14
; CHECK-NEXT: bfc r11, #18, #14
; CHECK-NEXT: lsll r4, r7, #22
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: orr.w r3, r5, r7
; CHECK-NEXT: str.w r3, [r8, #20]
; CHECK-NEXT: orr.w r2, r2, r4
@@ -1995,23 +2029,52 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vmov r6, s17
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vldr s22, .LCPI30_0
+; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vldr s20, .LCPI30_1
-; CHECK-NEXT: vmov r7, s19
+; CHECK-NEXT: mov r10, r2
+; CHECK-NEXT: vcmp.f32 s17, s22
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: mvnlt r3, #7
+; CHECK-NEXT: movlt.w r10, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itttt gt
+; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: vcmp.f32 s17, s17
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs.w r10, #0
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: str r7, [sp] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: mvnlt r3, #7
+; CHECK-NEXT: mvnlt r5, #7
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: movgt r5, #7
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
@@ -2023,67 +2086,40 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: str.w r1, [r4, #29]
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: str.w r0, [r4, #25]
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcmp.f32 s17, s22
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt r3, #7
-; CHECK-NEXT: vcmp.f32 s17, s17
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r6, #0
-; CHECK-NEXT: movvs.w r10, #0
; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vcmp.f32 s19, s22
-; CHECK-NEXT: mov r11, r1
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: mov r9, r3
+; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: mov r6, r2
+; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r9, #7
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: mvnlt r8, #7
+; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s19, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r11, #-1
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #7
+; CHECK-NEXT: movgt.w r9, #-1
+; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: movgt.w r8, #7
; CHECK-NEXT: vcmp.f32 s19, s19
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s22
@@ -2112,30 +2148,30 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: str r0, [r4]
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: lsrl r0, r11, #28
-; CHECK-NEXT: and r1, r9, #15
+; CHECK-NEXT: lsrl r0, r9, #28
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: orr.w r1, r9, r6, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #45]
+; CHECK-NEXT: and r1, r8, #15
; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r11, r8, lsl #4
-; CHECK-NEXT: lsrl r8, r1, #28
-; CHECK-NEXT: str.w r0, [r4, #45]
-; CHECK-NEXT: strb.w r8, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
+; CHECK-NEXT: and r0, r5, #15
+; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: strb.w r6, [r4, #49]
; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: orr.w r0, r5, r6, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: lsrl r0, r11, #28
+; CHECK-NEXT: orr.w r1, r11, r10, lsl #4
+; CHECK-NEXT: strd r0, r1, [r4, #16]
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r6, r1, #28
-; CHECK-NEXT: strb r6, [r4, #24]
+; CHECK-NEXT: lsrl r10, r1, #28
+; CHECK-NEXT: strb.w r10, [r4, #24]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: and r0, r3, #15
-; CHECK-NEXT: orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
@@ -2164,58 +2200,61 @@ define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vmov r5, s18
-; CHECK-NEXT: vldr s22, .LCPI31_0
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vldr s22, .LCPI31_0
+; CHECK-NEXT: vmov r7, s16
; CHECK-NEXT: vldr s20, .LCPI31_1
-; CHECK-NEXT: add.w r12, r4, #48
+; CHECK-NEXT: vmov r6, s17
; CHECK-NEXT: vcmp.f32 s19, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s19, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s19, s20
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s19, s19
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: movgt.w r5, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s19, s19
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: vmov r7, s16
-; CHECK-NEXT: vmov r6, s17
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: strd r5, r1, [r4, #48]
+; CHECK-NEXT: strd r2, r3, [r4, #56]
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: add.w r12, r4, #32
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
@@ -2223,48 +2262,52 @@ define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: vcmp.f32 s17, s22
; CHECK-NEXT: add.w r12, r4, #16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
; CHECK-NEXT: vpop {d8, d9, d10, d11}
@@ -2303,72 +2346,70 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI32_0
; CHECK-NEXT: vmov r8, r7, d8
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __aeabi_d2iz
+; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI32_1
; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_d2iz
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: cmp.w r10, #0
-; CHECK-NEXT: vmov r6, r5, d9
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r9, #-1
-; CHECK-NEXT: mov r10, r3
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq.w r11, #-1
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: vmov r6, r5, d9
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
+; CHECK-NEXT: movne.w r11, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: and r0, r9, #1
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: and r0, r11, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #12] @ 8-byte Folded Reload
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: bfi r4, r0, #0, #1
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2iz
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq.w r7, #-1
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r7, #0
; CHECK-NEXT: bl __aeabi_dcmpun
@@ -2378,20 +2419,20 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: and r0, r7, #1
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r4, r0, #1, #1
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: strb r4, [r0]
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI32_0:
-; CHECK-NEXT: .long 0 @ double -1
-; CHECK-NEXT: .long 3220176896
-; CHECK-NEXT: .LCPI32_1:
; CHECK-NEXT: .long 0 @ double 0
; CHECK-NEXT: .long 0
+; CHECK-NEXT: .LCPI32_1:
+; CHECK-NEXT: .long 0 @ double -1
+; CHECK-NEXT: .long 3220176896
%x = call <2 x i1> @llvm.fptosi.sat.v2f64.v2i1(<2 x double> %f)
ret <2 x i1> %x
}
@@ -2405,82 +2446,115 @@ define arm_aapcs_vfpcc <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI33_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vldr d0, .LCPI33_1
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r6, r5, d0
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r11, #127
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #127
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r10, r7, d8
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r8, #1
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r10
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vldr d0, .LCPI33_1
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r5, #127
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: mvneq r5, #127
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r5, #127
-; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r2, #-1
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: mvneq r7, #127
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movne r7, #127
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2505,84 +2579,118 @@ define arm_aapcs_vfpcc <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI34_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: vldr d0, .LCPI34_1
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r6, r5, d0
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r5, #61440
-; CHECK-NEXT: movteq r5, #65535
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movwne r5, #4095
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r10, r7, d8
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r8, #1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r7, #61440
-; CHECK-NEXT: movteq r7, #65535
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movwne r7, #4095
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: movwne r5, #61440
+; CHECK-NEXT: movtne r5, #65535
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r11, #61440
+; CHECK-NEXT: movtne r11, #65535
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r11, #4095
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r5, #4095
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r0
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2607,84 +2715,118 @@ define arm_aapcs_vfpcc <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI35_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: vldr d0, .LCPI35_1
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r6, r5, d0
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r5, #32768
-; CHECK-NEXT: movteq r5, #65535
-; CHECK-NEXT: moveq.w r4, #-1
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movwne r5, #32767
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r10, r7, d8
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r8, #1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r7, #32768
-; CHECK-NEXT: movteq r7, #65535
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movwne r7, #32767
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: movwne r5, #32768
+; CHECK-NEXT: movtne r5, #65535
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r11, #32768
+; CHECK-NEXT: movtne r11, #65535
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r11, #32767
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r5, #32767
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r0
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2709,94 +2851,118 @@ define arm_aapcs_vfpcc <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI36_0
-; CHECK-NEXT: vmov r5, r4, d9
-; CHECK-NEXT: vmov r7, r6, d0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: strd r5, r4, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT: vmov r7, r6, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vldr d0, .LCPI36_1
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r11, r5, d8
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: vmov r10, r0, d0
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r8, r0, d8
+; CHECK-NEXT: vmov r11, r10, d0
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: clz r0, r4
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: ittt ne
+; CHECK-NEXT: movwne r9, #0
+; CHECK-NEXT: movtne r9, #65532
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: movweq r8, #0
-; CHECK-NEXT: movteq r8, #65532
-; CHECK-NEXT: moveq.w r9, #-1
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: ittt eq
-; CHECK-NEXT: moveq r7, #0
-; CHECK-NEXT: movteq r7, #65532
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: movwne r7, #65535
-; CHECK-NEXT: movtne r7, #3
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r9, #65535
+; CHECK-NEXT: movtne r9, #3
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: movwne r8, #65535
-; CHECK-NEXT: movtne r8, #3
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r11, r4
+; CHECK-NEXT: lsr.w r10, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r1
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: movtne r4, #65532
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r4, #65535
+; CHECK-NEXT: movtne r4, #3
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r8
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r9
-; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r9, #0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r4, r9
+; CHECK-NEXT: vmov q0[3], q0[1], r7, r0
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2821,82 +2987,115 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double>
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI37_0
-; CHECK-NEXT: vmov r9, r8, d9
-; CHECK-NEXT: vmov r7, r3, d0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: vmov r8, r7, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vldr d0, .LCPI37_1
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r6, r5, d0
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r11, #-2147483648
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r10, r7, d8
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r8, #1
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r10
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vldr d0, .LCPI37_1
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r11, r10, d8
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r5, #-2147483648
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r4, #-1
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
; CHECK-NEXT: mvnne r5, #-2147483648
-; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r2, #-1
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: moveq.w r6, #-1
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: mvnne r7, #-2147483648
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2925,84 +3124,114 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI38_0
-; CHECK-NEXT: vmov r5, r4, d9
-; CHECK-NEXT: vmov r6, r7, d0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: strd r4, r5, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: vmov r7, r6, d9
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vmov r5, r10, d8
; CHECK-NEXT: vldr d0, .LCPI38_1
-; CHECK-NEXT: cmp.w r11, #0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: vmov r9, r8, d0
-; CHECK-NEXT: csel r11, r0, r11, ne
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq r4, #0
-; CHECK-NEXT: movteq r4, #65534
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: csel r7, r0, r7, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq r6, #0
-; CHECK-NEXT: movteq r6, #65534
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r7, #-1
-; CHECK-NEXT: movwne r6, #65535
-; CHECK-NEXT: movtne r6, #1
-; CHECK-NEXT: ldrd r9, r0, [sp, #8] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r8, r0, d8
+; CHECK-NEXT: vmov r11, r10, d0
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: clz r0, r4
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: movtne r1, #65534
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r9, #0
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r11, #-1
-; CHECK-NEXT: movwne r4, #65535
-; CHECK-NEXT: movtne r4, #1
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r9, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r5, #65535
+; CHECK-NEXT: movtne r5, #1
+; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r9, #0
+; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r11, r4
+; CHECK-NEXT: lsr.w r10, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: movtne r7, #65534
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r7, #65535
+; CHECK-NEXT: movtne r7, #1
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r7, r11
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r4
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r4, r9
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r7, r1
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -3028,80 +3257,115 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI39_0
; CHECK-NEXT: vmov r8, r7, d9
-; CHECK-NEXT: vmov r11, r5, d0
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vldr d0, .LCPI39_1
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r6, r5, d0
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: vldr d0, .LCPI39_1
-; CHECK-NEXT: cmp.w r9, #0
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: csel r9, r0, r9, ne
-; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r6, r10, d8
-; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r4, #-2147483648
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: vmov r10, r7, d8
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r7, #-2147483648
-; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT: csel r5, r0, r5, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r8, #1
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r10
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: mvnne r7, #-2147483648
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r5, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r4
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r5, r11
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r2, #-2147483648
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r2, #-2147483648
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r2, #0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-2147483648
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: mvnne r4, #-2147483648
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -3130,252 +3394,258 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: sub sp, #48
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI40_0
-; CHECK-NEXT: vmov r7, r6, d8
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r10, r9, d0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: vmov r10, r9, d8
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: vmov r7, r3, d0
+; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: str r7, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI40_1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: vmov r8, r3, d0
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r11, r3
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: vldr d0, .LCPI40_1
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: csel r4, r2, r4, ne
-; CHECK-NEXT: vmov r5, r11, d0
-; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r8, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: str.w r4, [r8, #8]
-; CHECK-NEXT: str.w r9, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: str r4, [r6, #8]
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str.w r8, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r8, #4]
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r7, [r6, #4]
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str.w r11, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: str.w r10, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str.w r9, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: vmov r9, r8, d9
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: str r4, [r0]
-; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r7, [r6]
+; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r10, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r7, r1, r4, ne
-; CHECK-NEXT: mov r4, r5
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r11, r1, r11, ne
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: movne.w r11, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: csel r6, r6, r0, ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r6, r10
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: csel r10, r4, r0, ne
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: movne.w r10, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r0, #0
-; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: lsrl r0, r7, #28
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: str r0, [r1, #16]
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: str.w r10, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r6, r11
+; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: lsrl r10, r11, #28
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: orr.w r0, r7, r4, lsl #4
-; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r0, [r7, #20]
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: orr.w r0, r11, r4, lsl #4
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: strd r10, r0, [r6, #16]
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload
; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: it eq
-; CHECK-NEXT: mvneq r6, #7
-; CHECK-NEXT: mov r10, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: it eq
+; CHECK-NEXT: mvneq r0, #7
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #7
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #7
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r0, #0
-; CHECK-NEXT: and r1, r0, #15
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: and r1, r5, #15
+; CHECK-NEXT: mov r8, r6
; CHECK-NEXT: lsrl r4, r1, #28
-; CHECK-NEXT: strb r4, [r7, #24]
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: strb r4, [r6, #24]
+; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: it eq
-; CHECK-NEXT: mvneq r4, #7
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mvneq r0, #7
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #7
+; CHECK-NEXT: movne r0, #7
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r0, #0
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: and r0, r4, #15
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str r0, [r7, #12]
+; CHECK-NEXT: str.w r0, [r8, #12]
; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -3383,11 +3653,11 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI40_0:
-; CHECK-NEXT: .long 0 @ double -6.338253001141147E+29
-; CHECK-NEXT: .long 3323985920
-; CHECK-NEXT: .LCPI40_1:
; CHECK-NEXT: .long 4294967295 @ double 6.3382530011411463E+29
; CHECK-NEXT: .long 1176502271
+; CHECK-NEXT: .LCPI40_1:
+; CHECK-NEXT: .long 0 @ double -6.338253001141147E+29
+; CHECK-NEXT: .long 3323985920
%x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f)
ret <2 x i100> %x
}
@@ -3406,237 +3676,247 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI41_0
; CHECK-NEXT: vmov r8, r7, d9
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r11, r3
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI41_1
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: vldr d0, .LCPI41_1
-; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: vmov r4, r3, d0
+; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: vmov r10, r11, d0
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov r10, r3
+; CHECK-NEXT: strd r2, r1, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r5, #-2147483648
-; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq.w r10, #-2147483648
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r5, #-2147483648
+; CHECK-NEXT: mvnne r10, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: str.w r5, [r9, #28]
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: str.w r10, [r6, #28]
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r9, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r5, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r5, r11
+; CHECK-NEXT: str.w r11, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r11, r4
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r4, r1, r0, ne
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r5, #0
-; CHECK-NEXT: str.w r5, [r9, #24]
-; CHECK-NEXT: mov r11, r6
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str r4, [r6, #24]
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r9, #20]
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: vmov r6, r5, d8
-; CHECK-NEXT: mov r10, r9
-; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r9, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: str r4, [r6, #20]
; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr.w r10, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r11, r6
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: vmov r6, r5, d8
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r10, #16]
+; CHECK-NEXT: str.w r4, [r11, #16]
; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r9, r3
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: strd r2, r1, [sp] @ 8-byte Folded Spill
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: moveq.w r9, #-2147483648
-; CHECK-NEXT: ldr.w r10, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq.w r4, #-2147483648
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r9, #-2147483648
+; CHECK-NEXT: mvnne r4, #-2147483648
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr.w r10, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: str.w r9, [r7, #12]
-; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r4, r4, r0, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: str.w r4, [r10, #12]
+; CHECK-NEXT: ldr.w r11, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r7, r1, r0, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str r4, [r7, #8]
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str.w r7, [r10, #8]
; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r7, r1, r0, ne
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str r4, [r7, #4]
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str.w r7, [r10, #4]
; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: movne.w r7, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str r4, [r7]
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str.w r7, [r10]
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -3644,11 +3924,11 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI41_0:
-; CHECK-NEXT: .long 0 @ double -1.7014118346046923E+38
-; CHECK-NEXT: .long 3353346048
-; CHECK-NEXT: .LCPI41_1:
; CHECK-NEXT: .long 4294967295 @ double 1.7014118346046921E+38
; CHECK-NEXT: .long 1205862399
+; CHECK-NEXT: .LCPI41_1:
+; CHECK-NEXT: .long 0 @ double -1.7014118346046923E+38
+; CHECK-NEXT: .long 3353346048
%x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f)
ret <2 x i128> %x
}
@@ -4279,103 +4559,101 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s16
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: vcvtt.f32.f16 s24, s17
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s17
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: vmov r0, s26
-; CHECK-NEXT: vldr s22, .LCPI48_0
-; CHECK-NEXT: vldr s20, .LCPI48_1
-; CHECK-NEXT: vcmp.f32 s24, s22
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movtlt r1, #65534
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: vcmp.f32 s24, s24
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r1, #65535
-; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: vcvtb.f32.f16 s28, s17
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: vmov r0, s28
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtt.f32.f16 s24, s17
+; CHECK-NEXT: vcvtt.f32.f16 s30, s16
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, s30
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vcvtb.f32.f16 s26, s18
; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: vmov r0, s24
-; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: vmov r0, s26
+; CHECK-NEXT: vldr s20, .LCPI48_0
+; CHECK-NEXT: vldr s22, .LCPI48_1
+; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
-; CHECK-NEXT: vcmp.f32 s26, s20
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s30, s22
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: vcmp.f32 s30, s30
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s18
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: vmov r0, s26
-; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movtlt r1, #65534
+; CHECK-NEXT: vcmp.f32 s28, s22
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: ittt lt
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movtlt r5, #65534
+; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s28
+; CHECK-NEXT: ittt gt
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #1
+; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s20
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r1, #65535
-; CHECK-NEXT: movtgt r1, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: ittt lt
+; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movtlt r6, #65534
+; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: vcmp.f32 s24, s22
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: ittt gt
+; CHECK-NEXT: movwgt r6, #65535
+; CHECK-NEXT: movtgt r6, #1
+; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp] @ 4-byte Spill
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: movtlt r6, #65534
-; CHECK-NEXT: vcmp.f32 s26, s20
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: vcmp.f32 s26, s22
; CHECK-NEXT: vcvtt.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
@@ -4386,144 +4664,144 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r4, #25]
+; CHECK-NEXT: str.w r0, [r7, #25]
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: ittt lt
+; CHECK-NEXT: movlt.w r8, #0
; CHECK-NEXT: movwlt r9, #0
; CHECK-NEXT: movtlt r9, #65534
-; CHECK-NEXT: movlt.w r8, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vcvtb.f32.f16 s18, s19
; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: movwgt r9, #65535
; CHECK-NEXT: movtgt r9, #1
+; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs.w r8, #0
; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movtlt r5, #65534
-; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movlt.w r10, #0
+; CHECK-NEXT: movwlt r11, #0
+; CHECK-NEXT: movtlt r11, #65534
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vcvtt.f32.f16 s18, s19
; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r11, #-1
-; CHECK-NEXT: movwgt r5, #65535
-; CHECK-NEXT: movtgt r5, #1
+; CHECK-NEXT: movwgt r11, #65535
+; CHECK-NEXT: movtgt r11, #1
+; CHECK-NEXT: movgt.w r10, #-1
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
+; CHECK-NEXT: movvs.w r10, #0
; CHECK-NEXT: movvs.w r11, #0
-; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movtlt r7, #65534
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movtlt r5, #65534
+; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movwgt r7, #65535
-; CHECK-NEXT: movtgt r7, #1
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #1
+; CHECK-NEXT: movgt.w r4, #-1
; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt vs
-; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s22
-; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: vcmp.f32 s16, s20
+; CHECK-NEXT: bfc r11, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt lt
-; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movtlt r1, #65534
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: vcmp.f32 s16, s22
+; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #1
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: vcmp.f32 s16, s16
-; CHECK-NEXT: lsrl r2, r5, #28
+; CHECK-NEXT: lsrl r2, r11, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: lsr.w r0, r7, #10
-; CHECK-NEXT: bfc r7, #18, #14
+; CHECK-NEXT: str r0, [r7]
+; CHECK-NEXT: lsrs r0, r5, #10
+; CHECK-NEXT: bfc r5, #18, #14
; CHECK-NEXT: bfc r9, #18, #14
-; CHECK-NEXT: lsll r10, r7, #22
+; CHECK-NEXT: lsll r4, r5, #22
; CHECK-NEXT: bfc r6, #18, #14
-; CHECK-NEXT: orr.w r3, r5, r7
-; CHECK-NEXT: str.w r3, [r4, #45]
-; CHECK-NEXT: orr.w r2, r2, r10
-; CHECK-NEXT: str.w r2, [r4, #41]
-; CHECK-NEXT: strb.w r0, [r4, #49]
+; CHECK-NEXT: orr.w r3, r11, r5
+; CHECK-NEXT: str.w r3, [r7, #45]
+; CHECK-NEXT: orrs r2, r4
+; CHECK-NEXT: str.w r2, [r7, #41]
+; CHECK-NEXT: strb.w r0, [r7, #49]
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: lsrl r0, r9, #14
-; CHECK-NEXT: orr.w r2, r9, r11, lsl #4
-; CHECK-NEXT: str.w r2, [r4, #37]
-; CHECK-NEXT: str.w r0, [r4, #33]
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: orr.w r2, r9, r10, lsl #4
+; CHECK-NEXT: str.w r2, [r7, #37]
+; CHECK-NEXT: str.w r0, [r7, #33]
; CHECK-NEXT: orr.w r0, r6, r8, lsl #18
-; CHECK-NEXT: str.w r0, [r4, #29]
+; CHECK-NEXT: str.w r0, [r7, #29]
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: lsr.w r5, r3, #10
-; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: lsll r0, r3, #22
-; CHECK-NEXT: mov r7, r3
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: lsr.w r0, r3, #10
; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: lsrl r2, r3, #28
-; CHECK-NEXT: orr.w r3, r3, r7
-; CHECK-NEXT: str r3, [r4, #20]
-; CHECK-NEXT: orr.w r2, r2, r0
-; CHECK-NEXT: str r2, [r4, #16]
-; CHECK-NEXT: strb r5, [r4, #24]
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: lsll r6, r3, #22
+; CHECK-NEXT: lsrl r2, r5, #28
+; CHECK-NEXT: orr.w r3, r3, r5
+; CHECK-NEXT: str r3, [r7, #20]
+; CHECK-NEXT: orr.w r2, r2, r6
+; CHECK-NEXT: str r2, [r7, #16]
+; CHECK-NEXT: strb r0, [r7, #24]
; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: lsrl r0, r3, #14
-; CHECK-NEXT: orr.w r2, r3, r6, lsl #4
-; CHECK-NEXT: strd r0, r2, [r4, #8]
+; CHECK-NEXT: orr.w r2, r3, r4, lsl #4
+; CHECK-NEXT: strd r0, r2, [r7, #8]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: bfc r1, #18, #14
-; CHECK-NEXT: orr.w r0, r1, r7, lsl #18
-; CHECK-NEXT: str r0, [r4, #4]
+; CHECK-NEXT: orr.w r0, r1, r6, lsl #18
+; CHECK-NEXT: str r0, [r7, #4]
; CHECK-NEXT: add sp, #24
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 2
@@ -4552,37 +4830,37 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: vcvtb.f32.f16 s26, s19
; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: vmov r0, s26
-; CHECK-NEXT: vldr s28, .LCPI49_0
-; CHECK-NEXT: vldr s30, .LCPI49_1
+; CHECK-NEXT: vldr s30, .LCPI49_0
+; CHECK-NEXT: vldr s28, .LCPI49_1
; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: vcmp.f32 s24, s28
-; CHECK-NEXT: vcvtt.f32.f16 s20, s16
+; CHECK-NEXT: vcmp.f32 s24, s30
+; CHECK-NEXT: vcvtt.f32.f16 s22, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r8, #-2147483648
; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: vcmp.f32 s24, s30
-; CHECK-NEXT: vcvtt.f32.f16 s22, s18
+; CHECK-NEXT: vcmp.f32 s24, s28
+; CHECK-NEXT: vcvtt.f32.f16 s20, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: mvngt r8, #-2147483648
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: vmov r4, s22
+; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs.w r8, #0
; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcmp.f32 s26, s28
+; CHECK-NEXT: vcmp.f32 s26, s30
; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: movlt.w r11, #-2147483648
-; CHECK-NEXT: vcmp.f32 s26, s30
+; CHECK-NEXT: vcmp.f32 s26, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: itt gt
@@ -4595,13 +4873,13 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: movvs.w r11, #0
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vcmp.f32 s22, s28
+; CHECK-NEXT: vcmp.f32 s22, s30
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r4, #-2147483648
; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: vcmp.f32 s22, s30
+; CHECK-NEXT: vcmp.f32 s22, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: itt gt
@@ -4617,12 +4895,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s20, s28
+; CHECK-NEXT: vcmp.f32 s20, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r6, #-2147483648
; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: vcmp.f32 s20, s30
+; CHECK-NEXT: vcmp.f32 s20, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r7, #-1
@@ -4633,12 +4911,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
+; CHECK-NEXT: vcmp.f32 s16, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s30
+; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: itt gt
@@ -4654,11 +4932,11 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmov q5[3], q5[1], r1, r6
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
+; CHECK-NEXT: vcmp.f32 s16, s30
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s16, s30
+; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r6, #-2147483648
; CHECK-NEXT: movlt r7, #0
@@ -4674,12 +4952,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
+; CHECK-NEXT: vcmp.f32 s16, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s30
+; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: itt gt
@@ -4695,13 +4973,13 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmov q6[3], q6[1], r1, r6
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vcmp.f32 s16, s28
+; CHECK-NEXT: vcmp.f32 s16, s30
; CHECK-NEXT: vmov q3[2], q3[0], r10, r9
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt.w r1, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s30
+; CHECK-NEXT: vcmp.f32 s16, s28
; CHECK-NEXT: vmov q3[3], q3[1], r11, r8
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
@@ -4738,109 +5016,77 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: .pad #48
-; CHECK-NEXT: sub sp, #48
+; CHECK-NEXT: .pad #56
+; CHECK-NEXT: sub sp, #56
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtb.f32.f16 s24, s17
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: vcvtt.f32.f16 s24, s16
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcvtb.f32.f16 s26, s18
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: vcvtt.f32.f16 s26, s17
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vldr s22, .LCPI50_0
; CHECK-NEXT: vldr s20, .LCPI50_1
-; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vcmp.f32 s24, s22
-; CHECK-NEXT: mov r10, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r10, #0
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r8, #-1
+; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs.w r9, #0
-; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcvtb.f32.f16 s24, s19
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s24
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s26, s22
-; CHECK-NEXT: mov r7, r2
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: vcmp.f32 s26, s20
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt r3, #7
-; CHECK-NEXT: vcmp.f32 s26, s26
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str r2, [sp, #48] @ 4-byte Spill
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs r5, #0
-; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: str r7, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s24, s22
+; CHECK-NEXT: vcmp.f32 s26, s22
+; CHECK-NEXT: vcvtt.f32.f16 s24, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: vcmp.f32 s24, s20
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str.w r2, [r4, #83]
+; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str.w r1, [r4, #79]
+; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s16
-; CHECK-NEXT: str.w r0, [r4, #75]
+; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s24
-; CHECK-NEXT: str.w r7, [r4, #58]
-; CHECK-NEXT: str.w r6, [r4, #54]
-; CHECK-NEXT: str.w r5, [r4, #50]
-; CHECK-NEXT: str.w r10, [r4, #33]
-; CHECK-NEXT: str.w r9, [r4, #29]
-; CHECK-NEXT: str.w r8, [r4, #25]
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -4860,119 +5106,155 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s17
-; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: vcvtb.f32.f16 s24, s17
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: vcvtb.f32.f16 s18, s18
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: vcmp.f32 s24, s22
-; CHECK-NEXT: vcvtt.f32.f16 s18, s18
+; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt r2, #0
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: movgt.w r8, #-1
+; CHECK-NEXT: movgt.w r5, #-1
; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: ittt vs
+; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: mov r8, r2
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: mov r4, r2
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: mvnlt r3, #7
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movlt r4, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s18
-; CHECK-NEXT: vcvtt.f32.f16 s18, s19
+; CHECK-NEXT: vcvtb.f32.f16 s18, s19
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: itttt gt
+; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: movgt.w r11, #-1
; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-NEXT: ittt vs
-; CHECK-NEXT: movvs.w r8, #0
-; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: vcmp.f32 s18, s22
+; CHECK-NEXT: mov r9, r3
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: mvnlt r9, #7
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s18
+; CHECK-NEXT: itttt gt
+; CHECK-NEXT: movgt.w r9, #7
+; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: str.w r2, [r10, #83]
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: str.w r1, [r10, #79]
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r0, #0
+; CHECK-NEXT: vcvtt.f32.f16 s18, s19
+; CHECK-NEXT: str.w r0, [r10, #75]
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: str.w r4, [r10, #58]
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: str.w r11, [r10, #54]
+; CHECK-NEXT: str.w r7, [r10, #50]
+; CHECK-NEXT: str.w r6, [r10, #33]
+; CHECK-NEXT: str.w r8, [r10, #29]
+; CHECK-NEXT: str.w r5, [r10, #25]
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: mov r11, r3
+; CHECK-NEXT: mov r6, r2
+; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: mvnlt r11, #7
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt.w r9, #0
+; CHECK-NEXT: mvnlt r4, #7
; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
+; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: movgt.w r7, #-1
; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movgt.w r11, #7
+; CHECK-NEXT: movgt r4, #7
; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt vs
-; CHECK-NEXT: movvs.w r11, #0
-; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: movvs.w r9, #0
+; CHECK-NEXT: movvs r4, #0
; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs r5, #0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s22
-; CHECK-NEXT: mov r12, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: mvnlt r12, #7
+; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt.w r12, #7
+; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
@@ -4980,73 +5262,74 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: str r2, [r4, #8]
+; CHECK-NEXT: str.w r2, [r10, #8]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: str r1, [r4, #4]
+; CHECK-NEXT: str.w r1, [r10, #4]
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: lsrl r0, r9, #28
-; CHECK-NEXT: str.w r0, [r4, #91]
+; CHECK-NEXT: str.w r0, [r10]
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: lsrl r0, r7, #28
-; CHECK-NEXT: str.w r0, [r4, #66]
-; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, lr
-; CHECK-NEXT: lsrl r0, r3, #28
-; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: ldrd r0, r1, [sp, #40] @ 8-byte Folded Reload
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: orr.w r1, r7, r6, lsl #4
+; CHECK-NEXT: str.w r1, [r10, #95]
+; CHECK-NEXT: and r1, r4, #15
+; CHECK-NEXT: str.w r0, [r10, #91]
+; CHECK-NEXT: and r0, r9, #15
+; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: strb.w r6, [r10, #99]
+; CHECK-NEXT: orr.w r0, r0, r5, lsl #4
+; CHECK-NEXT: str.w r0, [r10, #87]
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: lsrl r0, r1, #28
-; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: and r1, r11, #15
-; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r9, r10, lsl #4
-; CHECK-NEXT: lsrl r10, r1, #28
-; CHECK-NEXT: str.w r0, [r4, #95]
-; CHECK-NEXT: strb.w r10, [r4, #99]
-; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #87]
-; CHECK-NEXT: orr.w r0, r7, r8, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #70]
-; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: str.w r1, [r10, #70]
+; CHECK-NEXT: str.w r0, [r10, #66]
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r8, r1, #28
-; CHECK-NEXT: strb.w r8, [r4, #74]
-; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: lsrl r2, r1, #28
+; CHECK-NEXT: strb.w r2, [r10, #74]
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r5, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #62]
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r3, r2, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #45]
-; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT: str.w r0, [r10, #62]
+; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: str.w r1, [r10, #45]
+; CHECK-NEXT: str.w r0, [r10, #41]
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb.w r2, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: strb.w r2, [r10, #49]
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, lr, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT: str.w r0, [r10, #37]
+; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: strd r0, r1, [r10, #16]
+; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb r2, [r4, #24]
+; CHECK-NEXT: strb.w r2, [r10, #24]
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs.w r12, #0
-; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: and r0, r12, #15
-; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str r0, [r4, #12]
-; CHECK-NEXT: add sp, #48
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: and r0, r3, #15
+; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT: str.w r0, [r10, #12]
+; CHECK-NEXT: add sp, #56
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -5063,63 +5346,62 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
; CHECK-LABEL: test_signed_v8f16_v8i128:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtt.f32.f16 s30, s19
-; CHECK-NEXT: vcvtb.f32.f16 s20, s16
-; CHECK-NEXT: vmov r0, s30
-; CHECK-NEXT: vcvtb.f32.f16 s26, s19
-; CHECK-NEXT: vldr s22, .LCPI51_0
-; CHECK-NEXT: vmov r5, s20
-; CHECK-NEXT: vmov r7, s26
-; CHECK-NEXT: vcvtt.f32.f16 s28, s18
+; CHECK-NEXT: vcvtt.f32.f16 s26, s19
+; CHECK-NEXT: vcvtb.f32.f16 s28, s19
+; CHECK-NEXT: vmov r0, s26
+; CHECK-NEXT: vcvtb.f32.f16 s24, s17
+; CHECK-NEXT: vldr s20, .LCPI51_0
+; CHECK-NEXT: vmov r5, s28
+; CHECK-NEXT: vmov r8, s24
+; CHECK-NEXT: vcvtt.f32.f16 s30, s18
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vldr s24, .LCPI51_1
+; CHECK-NEXT: vldr s22, .LCPI51_1
; CHECK-NEXT: add.w r12, r4, #112
-; CHECK-NEXT: vmov r6, s28
-; CHECK-NEXT: vcvtb.f32.f16 s18, s18
-; CHECK-NEXT: vcmp.f32 s30, s24
+; CHECK-NEXT: vmov r6, s30
+; CHECK-NEXT: vcmp.f32 s26, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s30, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s30, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vcvtb.f32.f16 s26, s18
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s26, s24
+; CHECK-NEXT: vcmp.f32 s28, s22
; CHECK-NEXT: add.w r12, r4, #96
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s26, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s26, s26
+; CHECK-NEXT: vcmp.f32 s28, s28
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
@@ -5128,26 +5410,27 @@ define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s18
-; CHECK-NEXT: vcvtt.f32.f16 s26, s17
+; CHECK-NEXT: vmov r7, s26
+; CHECK-NEXT: vcvtt.f32.f16 s28, s17
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s28, s24
+; CHECK-NEXT: vcmp.f32 s30, s22
; CHECK-NEXT: add.w r12, r4, #80
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s28, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s28, s28
+; CHECK-NEXT: vcmp.f32 s30, s30
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
@@ -5156,145 +5439,155 @@ define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s26
-; CHECK-NEXT: vcvtb.f32.f16 s28, s17
+; CHECK-NEXT: vmov r5, s28
+; CHECK-NEXT: vcvtt.f32.f16 s18, s16
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s18, s24
+; CHECK-NEXT: vcmp.f32 s26, s22
; CHECK-NEXT: add.w r12, r4, #64
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s18, s22
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s28
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
+; CHECK-NEXT: vmov r6, s18
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s26, s24
+; CHECK-NEXT: vcmp.f32 s28, s22
; CHECK-NEXT: add.w r12, r4, #48
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s26, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s28
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r3, #0
-; CHECK-NEXT: ittt vs
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s16
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: vmov r7, s16
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s28, s24
+; CHECK-NEXT: vcmp.f32 s24, s22
; CHECK-NEXT: add.w r12, r4, #32
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s28, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s28, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s16, s24
+; CHECK-NEXT: vcmp.f32 s18, s22
; CHECK-NEXT: add.w r12, r4, #16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s18
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: vcmp.f32 s20, s24
+; CHECK-NEXT: vcmp.f32 s16, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r3, #-2147483648
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: vcmp.f32 s20, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: mvngt r3, #-2147483648
-; CHECK-NEXT: vcmp.f32 s20, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt vs
+; CHECK-NEXT: ittt vs
; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI51_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index ee040feca4240f..13609bd1903f2d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -268,38 +268,41 @@ declare <6 x i32> @llvm.fptoui.sat.v6f64.v6i32 (<6 x double>)
define arm_aapcs_vfpcc <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
; CHECK-LABEL: test_unsigned_v1f64_v1i32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldr d1, .LCPI8_0
; CHECK-NEXT: vmov r4, r5, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI8_1
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2uiz
-; CHECK-NEXT: vldr d0, .LCPI8_1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bl __aeabi_d2uiz
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI8_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI8_1:
; CHECK-NEXT: .long 4292870144 @ double 4294967295
; CHECK-NEXT: .long 1106247679
+; CHECK-NEXT: .LCPI8_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
ret <1 x i32> %x
}
@@ -313,60 +316,84 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI9_0
; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI9_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vmov r11, r4, d8
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r5, #-1
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #1
+; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -391,93 +418,99 @@ define arm_aapcs_vfpcc <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov.f32 s18, s0
; CHECK-NEXT: vmov.f32 s19, s1
; CHECK-NEXT: vldr d0, .LCPI10_0
-; CHECK-NEXT: vmov r8, r9, d1
-; CHECK-NEXT: vmov r5, r4, d0
+; CHECK-NEXT: vmov r4, r5, d1
+; CHECK-NEXT: vmov r9, r7, d0
; CHECK-NEXT: vmov.f32 s16, s4
; CHECK-NEXT: vmov.f32 s17, s5
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vldr d0, .LCPI10_1
-; CHECK-NEXT: vmov r11, r1, d9
-; CHECK-NEXT: cmp.w r10, #0
-; CHECK-NEXT: vmov r7, r6, d8
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r10, r0, r10, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r9, r2
-; CHECK-NEXT: mov r8, r3
+; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r10, #-1
+; CHECK-NEXT: vldr d0, .LCPI10_1
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: vmov r11, r3, d0
+; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: csel r5, r0, r4, ne
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: vmov r10, r8, d8
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: csel r0, r0, r6, ne
; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: vmov r5, r4, d9
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: csel r6, r0, r9, ne
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov.32 q0[1], r10
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r4, r5
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI10_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI10_1:
; CHECK-NEXT: .long 4292870144 @ double 4294967295
; CHECK-NEXT: .long 1106247679
+; CHECK-NEXT: .LCPI10_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f)
ret <3 x i32> %x
}
@@ -496,103 +529,103 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI11_0
; CHECK-NEXT: vmov q5, q1
-; CHECK-NEXT: vmov r8, r9, d10
-; CHECK-NEXT: vmov r2, r11, d0
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov r7, r9, d0
+; CHECK-NEXT: vmov r4, r5, d10
+; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI11_1
-; CHECK-NEXT: vmov r5, r1, d11
-; CHECK-NEXT: cmp.w r10, #0
-; CHECK-NEXT: vmov r6, r7, d8
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r4, r0, r10, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: strd r5, r1, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: mov r9, r3
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: mov r5, r7
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r10, r11
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: strd r2, r3, [sp, #16] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: vmov r10, r8, d8
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: csel r0, r0, r6, ne
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r8, r9
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: ldr.w r9, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: ldr.w r9, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r5, r0, r4, ne
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: vmov r7, r6, d9
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r11, r5, d11
+; CHECK-NEXT: mov r4, r7
+; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: csel r8, r0, r9, ne
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r4
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: movne.w r8, #-1
+; CHECK-NEXT: ldr.w r10, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: vmov r4, r5, d9
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r6, r0, r7, ne
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldrd r2, r3, [sp, #16] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldrd r1, r0, [sp, #12] @ 8-byte Folded Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r8, r1
+; CHECK-NEXT: vmov q0[3], q0[1], r0, r6
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
@@ -600,11 +633,11 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI11_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI11_1:
; CHECK-NEXT: .long 4292870144 @ double 4294967295
; CHECK-NEXT: .long 1106247679
+; CHECK-NEXT: .LCPI11_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f)
ret <4 x i32> %x
}
@@ -618,151 +651,162 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: .pad #32
-; CHECK-NEXT: sub sp, #32
+; CHECK-NEXT: .pad #40
+; CHECK-NEXT: sub sp, #40
; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI12_0
-; CHECK-NEXT: vmov r6, r11, d4
-; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: vmov r5, r6, d4
+; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov.f32 s18, s6
-; CHECK-NEXT: vmov.f32 s20, s4
+; CHECK-NEXT: vmov.f32 s20, s6
+; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s22, s2
-; CHECK-NEXT: vmov.f32 s19, s7
-; CHECK-NEXT: vmov.f32 s21, s5
+; CHECK-NEXT: vmov.f32 s21, s7
+; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s23, s3
-; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI12_1
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: vmov r7, r3, d0
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r8, r1, d11
-; CHECK-NEXT: vldr d0, .LCPI12_1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r10, r9, d9
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: vmov r5, r1, d10
-; CHECK-NEXT: strd r5, r1, [sp, #12] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r11, r2
-; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: vmov r6, r9, d10
+; CHECK-NEXT: csel r0, r0, r11, ne
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r1, d9
+; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #16]
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r4, #16]
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r10, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r9
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: mov r10, r11
-; CHECK-NEXT: mov r11, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r11, r10
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r10, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r9, r0, r4, ne
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r5, r6
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r8, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr.w r11, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r9, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: csel r0, r0, r6, ne
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r7, r0, r4, ne
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov r4, r5, d8
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r7, #-1
-; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r8, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r11, r10
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: csel r4, r0, r5, ne
+; CHECK-NEXT: vmov r5, r6, d8
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r7
-; CHECK-NEXT: vmov q0[3], q0[1], r9, r0
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #32
+; CHECK-NEXT: add sp, #40
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI12_1:
; CHECK-NEXT: .long 4292870144 @ double 4294967295
; CHECK-NEXT: .long 1106247679
+; CHECK-NEXT: .LCPI12_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
ret <5 x i32> %x
}
@@ -779,161 +823,172 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill
; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI13_0
; CHECK-NEXT: vmov r5, r6, d5
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: vmov.f32 s20, s8
-; CHECK-NEXT: vmov.f32 s22, s6
+; CHECK-NEXT: vmov r11, r3, d0
+; CHECK-NEXT: vmov.f32 s22, s8
+; CHECK-NEXT: vmov.f32 s20, s6
; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s24, s2
-; CHECK-NEXT: vmov.f32 s21, s9
-; CHECK-NEXT: vmov.f32 s23, s7
+; CHECK-NEXT: vmov.f32 s23, s9
+; CHECK-NEXT: vmov.f32 s21, s7
; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s25, s3
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: str.w r11, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI13_1
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: vmov r4, r9, d0
+; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vmov r9, r1, d11
-; CHECK-NEXT: vldr d0, .LCPI13_1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r8, r11, d10
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: vmov r7, r1, d12
-; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: strd r7, r1, [sp, #24] @ 8-byte Folded Spill
+; CHECK-NEXT: vmov r10, r1, d10
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: vmov r5, r6, d11
+; CHECK-NEXT: csel r0, r0, r8, ne
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r1, d12
+; CHECK-NEXT: strd r2, r1, [sp, #12] @ 8-byte Folded Spill
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: str r0, [r7, #20]
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: ldr.w r8, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: str r4, [r7, #20]
-; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r2, r1, d9
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r8, r5
-; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: strd r2, r1, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #16]
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r7, #16]
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r11, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r4, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r8, r9
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: csel r0, r0, r7, ne
; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: csel r9, r0, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r2, r8
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: mov r6, r7
+; CHECK-NEXT: mov r10, r5
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: csel r9, r0, r7, ne
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r8, r0, r4, ne
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: vmov r4, r5, d8
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r8, #-1
-; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: vmov r5, r6, d8
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r4, r0, r7, ne
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: csel r0, r0, r7, ne
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r8
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmov q0[3], q0[1], r9, r0
-; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add sp, #40
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
@@ -942,11 +997,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI13_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI13_1:
; CHECK-NEXT: .long 4292870144 @ double 4294967295
; CHECK-NEXT: .long 1106247679
+; CHECK-NEXT: .LCPI13_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
ret <6 x i32> %x
}
@@ -1425,66 +1480,65 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: .vsave {d8, d9, d10}
; CHECK-NEXT: vpush {d8, d9, d10}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: vmov r0, s19
-; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r7, s18
; CHECK-NEXT: vldr s20, .LCPI28_0
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r5, s16
+; CHECK-NEXT: ittt gt
+; CHECK-NEXT: movwgt r7, #65535
+; CHECK-NEXT: movtgt r7, #3
+; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vcmp.f32 s19, #0
-; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: movlt r4, #0
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov r0, s17
-; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s18, s20
-; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s19, s20
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r1, #65535
-; CHECK-NEXT: movtgt r1, #3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r4, #-1
-; CHECK-NEXT: itt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movwgt r10, #65535
; CHECK-NEXT: movtgt r10, #3
-; CHECK-NEXT: mov r3, r10
-; CHECK-NEXT: bfc r1, #18, #14
-; CHECK-NEXT: bfc r3, #18, #14
-; CHECK-NEXT: mov r6, r7
+; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vcmp.f32 s16, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: lsll r4, r3, #22
-; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: mov r1, r10
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: str.w r5, [r8]
+; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: str.w r0, [r9]
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bfc r1, #18, #14
+; CHECK-NEXT: bfc r7, #18, #14
+; CHECK-NEXT: mov r6, r8
+; CHECK-NEXT: lsll r4, r1, #22
+; CHECK-NEXT: lsrl r6, r7, #28
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r9, #65535
-; CHECK-NEXT: movtgt r9, #3
-; CHECK-NEXT: orrs r1, r3
-; CHECK-NEXT: str.w r1, [r8, #20]
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #3
+; CHECK-NEXT: orrs r1, r7
+; CHECK-NEXT: str.w r1, [r9, #20]
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s17, #0
; CHECK-NEXT: orr.w r2, r6, r4
@@ -1493,23 +1547,24 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vcmp.f32 s17, s20
-; CHECK-NEXT: bfc r9, #18, #14
+; CHECK-NEXT: bfc r5, #18, #14
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
-; CHECK-NEXT: str.w r2, [r8, #16]
+; CHECK-NEXT: str.w r2, [r9, #16]
; CHECK-NEXT: lsr.w r2, r10, #10
-; CHECK-NEXT: strb.w r2, [r8, #24]
+; CHECK-NEXT: strb.w r2, [r9, #24]
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: bfc r1, #18, #14
+; CHECK-NEXT: orr.w r0, r5, r0, lsl #18
; CHECK-NEXT: lsrl r2, r1, #14
-; CHECK-NEXT: orr.w r0, r9, r0, lsl #18
-; CHECK-NEXT: orr.w r1, r1, r7, lsl #4
-; CHECK-NEXT: strd r2, r1, [r8, #8]
-; CHECK-NEXT: str.w r0, [r8, #4]
+; CHECK-NEXT: orr.w r1, r1, r8, lsl #4
+; CHECK-NEXT: strd r2, r1, [r9, #8]
+; CHECK-NEXT: str.w r0, [r9, #4]
; CHECK-NEXT: vpop {d8, d9, d10}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-NEXT: .p2align 2
@@ -1615,14 +1670,36 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vmov r6, s17
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vldr s20, .LCPI30_0
-; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: vcmp.f32 s17, #0
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r3, #15
+; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: ittt gt
+; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: mov r10, r3
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
@@ -1634,37 +1711,16 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: str.w r1, [r4, #29]
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: str.w r0, [r4, #25]
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: vmov r7, s19
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcmp.f32 s17, #0
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: movgt.w r10, #15
+; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vcmp.f32 s19, #0
; CHECK-NEXT: mov r9, r1
@@ -1673,7 +1729,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: movlt.w r8, #0
; CHECK-NEXT: movlt.w r11, #0
; CHECK-NEXT: vcmp.f32 s19, s20
@@ -1681,7 +1737,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: itttt gt
; CHECK-NEXT: movgt.w r11, #15
; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: movgt.w r5, #-1
; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
@@ -1702,31 +1758,31 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: lsrl r0, r9, #28
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: orr.w r1, r9, r8, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #45]
; CHECK-NEXT: and r1, r11, #15
; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: mov r0, r10
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r9, r8, lsl #4
+; CHECK-NEXT: and r0, r10, #15
; CHECK-NEXT: lsrl r8, r1, #28
-; CHECK-NEXT: str.w r0, [r4, #45]
; CHECK-NEXT: strb.w r8, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r5, lsl #4
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: orr.w r0, r5, r6, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: lsrl r0, r7, #28
+; CHECK-NEXT: orr.w r1, r7, r6, lsl #4
+; CHECK-NEXT: strd r0, r1, [r4, #16]
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
; CHECK-NEXT: lsrl r6, r1, #28
; CHECK-NEXT: strb r6, [r4, #24]
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
; CHECK-NEXT: and r0, r3, #15
-; CHECK-NEXT: orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9, d10}
@@ -1753,13 +1809,13 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vmov r5, s18
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vldr s20, .LCPI31_0
; CHECK-NEXT: vcmp.f32 s19, #0
-; CHECK-NEXT: add.w r12, r4, #48
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
@@ -1767,29 +1823,32 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r3, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: vmov r7, s16
+; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: strd r5, r1, [r4, #48]
; CHECK-NEXT: vmov r6, s17
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vmov r7, s16
+; CHECK-NEXT: strd r2, r3, [r4, #56]
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s18, #0
; CHECK-NEXT: add.w r12, r4, #32
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r3, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
@@ -1797,34 +1856,38 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: vcmp.f32 s17, #0
; CHECK-NEXT: add.w r12, r4, #16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r3, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r3, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
; CHECK-NEXT: vpop {d8, d9, d10}
@@ -1865,55 +1928,57 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI32_0
-; CHECK-NEXT: vmov r4, r8, d8
+; CHECK-NEXT: vmov r5, r6, d8
; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: vmov r10, r9, d0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI32_1
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: vmov r4, r11, d0
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2uiz
-; CHECK-NEXT: vldr d0, .LCPI32_1
-; CHECK-NEXT: cmp.w r9, #0
-; CHECK-NEXT: csel r7, r0, r9, ne
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: vmov r6, r5, d9
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r8, r3
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: csel r0, r0, r8, ne
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #1
-; CHECK-NEXT: and r0, r7, #1
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: movne r0, #1
; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: bfi r7, r0, #0, #1
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_d2uiz
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, r0, r4, ne
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r4, #1
-; CHECK-NEXT: and r0, r4, #1
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: and r0, r0, #1
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r7, r0, #1, #1
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
@@ -1925,11 +1990,11 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI32_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI32_1:
; CHECK-NEXT: .long 0 @ double 1
; CHECK-NEXT: .long 1072693248
+; CHECK-NEXT: .LCPI32_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f)
ret <2 x i1> %x
}
@@ -1943,60 +2008,84 @@ define arm_aapcs_vfpcc <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI33_0
; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI33_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vmov r11, r4, d8
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movne.w r9, #255
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #255
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne r6, #255
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #1
+; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #255
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2021,60 +2110,84 @@ define arm_aapcs_vfpcc <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI34_0
; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI34_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vmov r11, r4, d8
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movwne r9, #8191
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r5, #8191
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movwne r6, #8191
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #1
+; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r0, #8191
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2099,60 +2212,84 @@ define arm_aapcs_vfpcc <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI35_0
; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI35_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vmov r11, r4, d8
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movwne r9, #65535
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r5, #65535
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movwne r6, #65535
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #1
+; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movwne r0, #65535
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2181,61 +2318,79 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI36_0
-; CHECK-NEXT: vmov r11, r5, d8
-; CHECK-NEXT: vmov r6, r7, d0
-; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r11, r10, d8
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI36_1
; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vldr d0, .LCPI36_1
-; CHECK-NEXT: vmov r5, r8, d9
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r3, r2, d0
-; CHECK-NEXT: csel r9, r1, r4, ne
-; CHECK-NEXT: csel r10, r0, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: vmov r5, r7, d0
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: vmov r8, r6, d9
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: csel r6, r0, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: csel r4, r1, r4, ne
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: movwne r6, #65535
-; CHECK-NEXT: movtne r6, #7
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movwne r10, #65535
-; CHECK-NEXT: movtne r10, #7
-; CHECK-NEXT: movne.w r9, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r10, r6
-; CHECK-NEXT: vmov q0[3], q0[1], r9, r4
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r5, #65535
+; CHECK-NEXT: movtne r5, #7
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r0, #65535
+; CHECK-NEXT: movtne r0, #7
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -2243,11 +2398,11 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI36_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI36_1:
; CHECK-NEXT: .long 0 @ double 524287
; CHECK-NEXT: .long 1092616188
+; CHECK-NEXT: .LCPI36_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f)
ret <2 x i19> %x
}
@@ -2261,60 +2416,84 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI37_0
; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI37_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vmov r11, r4, d8
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #0
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r5, #-1
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: movne r7, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #1
+; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2343,61 +2522,79 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI38_0
-; CHECK-NEXT: vmov r11, r5, d8
-; CHECK-NEXT: vmov r6, r7, d0
-; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: vmov r11, r10, d8
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI38_1
; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: vldr d0, .LCPI38_1
-; CHECK-NEXT: vmov r5, r8, d9
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov r3, r2, d0
-; CHECK-NEXT: csel r10, r0, r4, ne
-; CHECK-NEXT: csel r9, r1, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: vmov r5, r7, d0
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: vmov r8, r6, d9
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: lsr.w r9, r0, #5
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: csel r6, r1, r4, ne
-; CHECK-NEXT: csel r4, r0, r4, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: movwne r6, #65535
-; CHECK-NEXT: movtne r6, #3
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ittt ne
-; CHECK-NEXT: movwne r9, #65535
-; CHECK-NEXT: movtne r9, #3
-; CHECK-NEXT: movne.w r10, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r10, r4
-; CHECK-NEXT: vmov q0[3], q0[1], r9, r6
+; CHECK-NEXT: mov r1, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r7, #65535
+; CHECK-NEXT: movtne r7, #3
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movwne r1, #65535
+; CHECK-NEXT: movtne r1, #3
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r5, #-1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r7
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -2405,11 +2602,11 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI38_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI38_1:
; CHECK-NEXT: .long 4294967288 @ double 1125899906842623
; CHECK-NEXT: .long 1125122047
+; CHECK-NEXT: .LCPI38_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f)
ret <2 x i50> %x
}
@@ -2423,60 +2620,84 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI39_0
; CHECK-NEXT: vmov r6, r7, d9
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vldr d0, .LCPI39_1
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: csel r9, r0, r8, ne
-; CHECK-NEXT: csel r8, r1, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: vmov r9, r8, d0
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: vmov r5, r4, d8
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: vmov r11, r4, d8
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r8, #-1
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_d2ulz
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r6, #1
; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: csel r7, r1, r6, ne
-; CHECK-NEXT: csel r6, r0, r6, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r5, #-1
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: itt ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: movne.w r7, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r9
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r8
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r7, #1
+; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: lsrs r4, r0, #5
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #-1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r1, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r10
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@@ -2503,188 +2724,194 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #48
; CHECK-NEXT: sub sp, #48
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vldr d0, .LCPI40_0
-; CHECK-NEXT: vmov r11, r4, d8
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vldr d0, .LCPI40_0
+; CHECK-NEXT: vmov r6, r5, d8
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r2, r7, d0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: mov r9, r2
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: vldr d0, .LCPI40_1
+; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: str r2, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: mov r10, r3
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r5, r4
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: vldr d0, .LCPI40_1
; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: csel r4, r2, r8, ne
-; CHECK-NEXT: vmov r10, r3, d0
-; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str r5, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r8, r3
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: strd r1, r0, [sp, #20] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r0, r2, r8, ne
+; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r6, #8]
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r4, #8]
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r11, r6
+; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r8, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r4, #4]
; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str.w r10, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: strd r4, r11, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT: str r5, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r6, #4]
-; CHECK-NEXT: mov r5, r6
-; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: ldr r6, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: str.w r11, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r0, r11
-; CHECK-NEXT: mov r6, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vmov r8, r11, d9
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r5]
-; CHECK-NEXT: mov r10, r9
-; CHECK-NEXT: str.w r9, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: mov r5, r7
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r5, r9
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r6, r10
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: ldr r4, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: cmp.w r9, #0
-; CHECK-NEXT: strd r3, r0, [sp, #16] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r7, r1, r9, ne
-; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: add.w r12, sp, #16
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: stm.w r12, {r0, r2, r3} @ 12-byte Folded Spill
+; CHECK-NEXT: csel r9, r1, r10, ne
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r9, #-1
+; CHECK-NEXT: mov r7, r5
+; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r10, r4
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: csel r9, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: lsrl r0, r7, #28
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: str.w r0, [r9, #16]
+; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: csel r10, r1, r0, ne
+; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: lsrl r4, r9, #28
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r6, r1, r0, ne
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r10, #-1
-; CHECK-NEXT: orr.w r0, r7, r10, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #20]
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r9, r6, lsl #4
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: strd r4, r0, [r5, #16]
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: ldr.w r11, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #15
-; CHECK-NEXT: and r1, r7, #15
-; CHECK-NEXT: lsrl r10, r1, #28
-; CHECK-NEXT: strb.w r10, [r9, #24]
-; CHECK-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload
+; CHECK-NEXT: movne r0, #15
+; CHECK-NEXT: and r1, r0, #15
+; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: strb r6, [r5, #24]
+; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r7, #15
-; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: and r0, r7, #15
+; CHECK-NEXT: movne r0, #15
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: and r0, r0, #15
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #12]
+; CHECK-NEXT: str r0, [r5, #12]
; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -2692,11 +2919,11 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI40_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI40_1:
; CHECK-NEXT: .long 4294967295 @ double 1.2676506002282293E+30
; CHECK-NEXT: .long 1177550847
+; CHECK-NEXT: .LCPI40_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
ret <2 x i100> %x
}
@@ -2710,185 +2937,196 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI41_0
; CHECK-NEXT: vmov r8, r7, d9
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r2, r9, d0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r11, r2
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: vmov r6, r4, d0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI41_1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r6, r3, r6, ne
-; CHECK-NEXT: vmov r10, r5, d0
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: vmov r10, r11, d0
; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r0, r3, r5, ne
+; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: cmp.w r9, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: str r6, [r4, #28]
-; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: csel r5, r1, r0, ne
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: str r0, [r5, #28]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r5, #-1
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: str r5, [r4, #24]
-; CHECK-NEXT: mov r5, r4
-; CHECK-NEXT: mov r4, r9
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str.w r10, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: csel r9, r1, r0, ne
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r5, #24]
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: str r4, [sp] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r9, #-1
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: str.w r9, [r5, #20]
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: vmov r6, r11, d8
-; CHECK-NEXT: mov r9, r4
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: vmov r6, r5, d8
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r9, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: ldr.w r9, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: str.w r0, [r9, #20]
; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r7, r11
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: str.w r11, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r11, r9
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r5, #16]
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str.w r0, [r9, #16]
+; CHECK-NEXT: ldr.w r8, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: ldr.w r9, [sp] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r4, r3, r8, ne
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: mov r8, r7
-; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r0, r3, r7, ne
+; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str.w r0, [r11, #12]
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: str r4, [r7, #12]
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r7, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r10, r4
+; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #8]
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r7, #8]
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7, #4]
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r7, #4]
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r11
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, r1, r0, ne
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: str r4, [r7]
-; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: str r0, [r7]
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI41_0:
-; CHECK-NEXT: .long 0 @ double 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .LCPI41_1:
; CHECK-NEXT: .long 4294967295 @ double 3.4028236692093843E+38
; CHECK-NEXT: .long 1206910975
+; CHECK-NEXT: .LCPI41_1:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
%x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
ret <2 x i128> %x
}
@@ -3333,86 +3571,81 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcvtb.f32.f16 s22, s18
-; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vcvtb.f32.f16 s22, s17
; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcvtt.f32.f16 s26, s17
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s26
+; CHECK-NEXT: vcvtb.f32.f16 s24, s18
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: vcvtt.f32.f16 s20, s18
-; CHECK-NEXT: vcvtb.f32.f16 s24, s17
+; CHECK-NEXT: vldr s18, .LCPI48_0
; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it lt
+; CHECK-NEXT: vcvtt.f32.f16 s26, s17
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: vmov r8, s20
-; CHECK-NEXT: vldr s18, .LCPI48_0
-; CHECK-NEXT: vmov r9, s24
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcmp.f32 s26, #0
+; CHECK-NEXT: vcmp.f32 s22, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: itt gt
+; CHECK-NEXT: movwgt r1, #65535
+; CHECK-NEXT: movtgt r1, #3
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vmov r5, s26
+; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r1, #0
-; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s24, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcmp.f32 s26, s18
-; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: vcmp.f32 s22, s18
-; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r7, #-1
-; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r5, #65535
-; CHECK-NEXT: movtgt r5, #3
-; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vcmp.f32 s26, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt lt
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: vcmp.f32 s26, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: str.w r6, [r10, #25]
; CHECK-NEXT: itt gt
-; CHECK-NEXT: movwgt r4, #65535
-; CHECK-NEXT: movtgt r4, #3
-; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movwgt r1, #65535
+; CHECK-NEXT: movtgt r1, #3
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str.w r7, [r4, #25]
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s20, s18
; CHECK-NEXT: vcvtb.f32.f16 s20, s19
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movwgt r5, #65535
-; CHECK-NEXT: movtgt r5, #3
+; CHECK-NEXT: movwgt r7, #65535
+; CHECK-NEXT: movtgt r7, #3
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
; CHECK-NEXT: mov r9, r0
@@ -3435,16 +3668,16 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s20, s18
; CHECK-NEXT: vcvtb.f32.f16 s20, s16
-; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: itt lt
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: movwgt r7, #65535
-; CHECK-NEXT: movtgt r7, #3
+; CHECK-NEXT: movwgt r5, #65535
+; CHECK-NEXT: movtgt r5, #3
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s20, #0
; CHECK-NEXT: mov r8, r1
@@ -3457,36 +3690,36 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: lsrl r2, r11, #28
-; CHECK-NEXT: bfc r5, #18, #14
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: lsr.w r0, r7, #10
-; CHECK-NEXT: bfc r7, #18, #14
-; CHECK-NEXT: lsll r10, r7, #22
-; CHECK-NEXT: orr.w r1, r11, r7
+; CHECK-NEXT: lsrs r0, r5, #10
+; CHECK-NEXT: bfc r5, #18, #14
+; CHECK-NEXT: lsll r10, r5, #22
+; CHECK-NEXT: lsrl r2, r11, #28
+; CHECK-NEXT: orr.w r1, r11, r5
; CHECK-NEXT: str.w r1, [r4, #45]
; CHECK-NEXT: orr.w r1, r2, r10
; CHECK-NEXT: str.w r1, [r4, #41]
; CHECK-NEXT: strb.w r0, [r4, #49]
+; CHECK-NEXT: bfc r7, #18, #14
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: lsrl r0, r5, #14
-; CHECK-NEXT: mov r7, r4
-; CHECK-NEXT: orr.w r1, r5, r9, lsl #4
+; CHECK-NEXT: vcvtt.f32.f16 s16, s16
+; CHECK-NEXT: lsrl r0, r7, #14
+; CHECK-NEXT: mov r5, r4
+; CHECK-NEXT: orr.w r1, r7, r9, lsl #4
; CHECK-NEXT: str.w r1, [r4, #37]
; CHECK-NEXT: str.w r0, [r4, #33]
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: bfc r0, #18, #14
; CHECK-NEXT: orr.w r0, r0, r6, lsl #18
; CHECK-NEXT: str.w r0, [r4, #29]
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: bfc r1, #18, #14
; CHECK-NEXT: bfc r3, #18, #14
; CHECK-NEXT: mov r6, r9
@@ -3496,7 +3729,7 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: movwgt r8, #65535
; CHECK-NEXT: movtgt r8, #3
; CHECK-NEXT: orrs r1, r3
-; CHECK-NEXT: str r1, [r7, #20]
+; CHECK-NEXT: str r1, [r5, #20]
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: orr.w r2, r6, r4
@@ -3510,18 +3743,19 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) {
; CHECK-NEXT: itt gt
; CHECK-NEXT: movwgt r1, #65535
; CHECK-NEXT: movtgt r1, #3
-; CHECK-NEXT: str r2, [r7, #16]
-; CHECK-NEXT: lsr.w r2, r5, #10
-; CHECK-NEXT: strb r2, [r7, #24]
+; CHECK-NEXT: str r2, [r5, #16]
+; CHECK-NEXT: lsrs r2, r7, #10
+; CHECK-NEXT: strb r2, [r5, #24]
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: bfc r1, #18, #14
-; CHECK-NEXT: lsrl r2, r1, #14
; CHECK-NEXT: orr.w r0, r8, r0, lsl #18
+; CHECK-NEXT: lsrl r2, r1, #14
; CHECK-NEXT: orr.w r1, r1, r9, lsl #4
-; CHECK-NEXT: strd r2, r1, [r7, #8]
-; CHECK-NEXT: str r0, [r7, #4]
+; CHECK-NEXT: strd r2, r1, [r5, #8]
+; CHECK-NEXT: str r0, [r5, #4]
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
@@ -3544,38 +3778,38 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vcvtt.f32.f16 s22, s19
-; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vcvtt.f32.f16 s20, s19
+; CHECK-NEXT: vmov r0, s20
; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: vcvtb.f32.f16 s26, s19
+; CHECK-NEXT: vcvtb.f32.f16 s22, s19
; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: vmov r0, s26
+; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: vldr s28, .LCPI49_0
-; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: vcmp.f32 s20, #0
; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcvtt.f32.f16 s20, s16
; CHECK-NEXT: vcvtt.f32.f16 s24, s18
+; CHECK-NEXT: vcvtt.f32.f16 s26, s16
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: vcmp.f32 s22, s28
+; CHECK-NEXT: vcmp.f32 s20, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: vmov r4, s24
+; CHECK-NEXT: vmov r6, s26
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vcmp.f32 s26, #0
+; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt.w r11, #0
; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: vcmp.f32 s26, s28
+; CHECK-NEXT: vcmp.f32 s22, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r10, #-1
@@ -3599,12 +3833,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s20, #0
+; CHECK-NEXT: vcmp.f32 s26, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt lt
; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: vcmp.f32 s20, s28
+; CHECK-NEXT: vcmp.f32 s26, s28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r6, #-1
@@ -3694,86 +3928,61 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
; CHECK-NEXT: .pad #56
-; CHECK-NEXT: sub sp, #56
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtb.f32.f16 s22, s17
-; CHECK-NEXT: vmov r0, s22
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcvtb.f32.f16 s24, s18
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r0, s24
-; CHECK-NEXT: vldr s20, .LCPI50_0
-; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r10, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #-1
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcvtb.f32.f16 s22, s19
-; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: sub sp, #56
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vcvtt.f32.f16 s22, s16
; CHECK-NEXT: vmov r0, s22
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vcmp.f32 s24, #0
-; CHECK-NEXT: mov r7, r2
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: vcvtt.f32.f16 s24, s17
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: vmov r0, s24
+; CHECK-NEXT: vldr s20, .LCPI50_0
+; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r7, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
+; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r5, #-1
-; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: str r2, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: str r7, [sp, #40] @ 4-byte Spill
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: vcmp.f32 s24, #0
+; CHECK-NEXT: vcvtt.f32.f16 s22, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str.w r2, [r4, #83]
+; CHECK-NEXT: movgt r3, #15
+; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: str.w r1, [r4, #79]
+; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcvtt.f32.f16 s22, s16
-; CHECK-NEXT: str.w r0, [r4, #75]
+; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s22
-; CHECK-NEXT: str.w r7, [r4, #58]
-; CHECK-NEXT: str.w r6, [r4, #54]
-; CHECK-NEXT: str.w r5, [r4, #50]
-; CHECK-NEXT: str.w r10, [r4, #33]
-; CHECK-NEXT: str.w r9, [r4, #29]
-; CHECK-NEXT: str.w r8, [r4, #25]
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -3786,85 +3995,115 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: vcvtt.f32.f16 s22, s17
-; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: vcvtb.f32.f16 s22, s17
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: vcvtb.f32.f16 s18, s18
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r5, #0
-; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: movlt r3, #0
; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: vcvtt.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: ittt gt
+; CHECK-NEXT: movgt.w r9, #-1
+; CHECK-NEXT: movgt.w r8, #-1
+; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
+; CHECK-NEXT: vcvtb.f32.f16 s18, s19
+; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: mov r7, r2
+; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r3, #15
+; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: movgt.w r7, #-1
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r9, r1
-; CHECK-NEXT: mov r8, r2
+; CHECK-NEXT: mov r10, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
+; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: movlt r0, #0
-; CHECK-NEXT: movlt.w r9, #0
-; CHECK-NEXT: movlt.w r8, #0
-; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: str.w r2, [r4, #83]
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: str.w r1, [r4, #79]
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: ittt gt
-; CHECK-NEXT: movgt.w r8, #-1
-; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: vcvtt.f32.f16 s18, s19
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str.w r0, [r4, #75]
; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: str.w r7, [r4, #58]
+; CHECK-NEXT: str.w r11, [r4, #54]
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: str.w r5, [r4, #50]
+; CHECK-NEXT: str.w r6, [r4, #33]
+; CHECK-NEXT: str.w r8, [r4, #29]
+; CHECK-NEXT: str.w r9, [r4, #25]
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r10, #15
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcvtb.f32.f16 s16, s16
-; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: mov r7, r3
+; CHECK-NEXT: mov r6, r2
+; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt.w r11, #0
-; CHECK-NEXT: movlt r6, #0
-; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt.w r8, #0
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: itttt gt
-; CHECK-NEXT: movgt r7, #15
-; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: movgt.w r8, #15
; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: movgt.w r7, #-1
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -3884,65 +4123,65 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: lsrl r0, r11, #28
-; CHECK-NEXT: and r1, r7, #15
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: lsrl r0, r7, #28
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: orr.w r1, r7, r6, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #95]
+; CHECK-NEXT: and r1, r8, #15
; CHECK-NEXT: str.w r0, [r4, #91]
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: lsrl r0, r9, #28
-; CHECK-NEXT: str.w r0, [r4, #66]
-; CHECK-NEXT: ldr.w lr, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: mov r0, lr
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: str.w r0, [r4, #41]
-; CHECK-NEXT: ldr.w r12, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: lsrl r0, r5, #28
-; CHECK-NEXT: str r0, [r4, #16]
-; CHECK-NEXT: orr.w r0, r11, r10, lsl #4
-; CHECK-NEXT: lsrl r10, r1, #28
-; CHECK-NEXT: str.w r0, [r4, #95]
-; CHECK-NEXT: strb.w r10, [r4, #99]
-; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r6, lsl #4
+; CHECK-NEXT: and r0, r10, #15
+; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: strb.w r6, [r4, #99]
+; CHECK-NEXT: orr.w r0, r0, r5, lsl #4
; CHECK-NEXT: str.w r0, [r4, #87]
-; CHECK-NEXT: orr.w r0, r9, r8, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #70]
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #70]
+; CHECK-NEXT: str.w r0, [r4, #66]
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
-; CHECK-NEXT: lsrl r8, r1, #28
-; CHECK-NEXT: strb.w r8, [r4, #74]
-; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT: lsrl r2, r1, #28
+; CHECK-NEXT: strb.w r2, [r4, #74]
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
; CHECK-NEXT: str.w r0, [r4, #62]
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r0, r2, lsl #4
-; CHECK-NEXT: str.w r0, [r4, #45]
-; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: str.w r1, [r4, #45]
+; CHECK-NEXT: str.w r0, [r4, #41]
+; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
; CHECK-NEXT: lsrl r2, r1, #28
; CHECK-NEXT: strb.w r2, [r4, #49]
-; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, lr, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
; CHECK-NEXT: str.w r0, [r4, #37]
-; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r5, r2, lsl #4
-; CHECK-NEXT: str r0, [r4, #20]
-; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: strd r0, r1, [r4, #16]
+; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
; CHECK-NEXT: and r1, r0, #15
; CHECK-NEXT: lsrl r2, r1, #28
; CHECK-NEXT: strb r2, [r4, #24]
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r3, #15
; CHECK-NEXT: and r0, r3, #15
-; CHECK-NEXT: orr.w r0, r0, r12, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r7, lsl #4
; CHECK-NEXT: str r0, [r4, #12]
; CHECK-NEXT: add sp, #56
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
@@ -3967,61 +4206,64 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vcvtt.f32.f16 s24, s19
-; CHECK-NEXT: vcvtb.f32.f16 s22, s16
-; CHECK-NEXT: vmov r0, s24
+; CHECK-NEXT: vcvtt.f32.f16 s22, s19
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcvtb.f32.f16 s28, s19
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: vmov r0, s28
; CHECK-NEXT: vldr s20, .LCPI51_0
-; CHECK-NEXT: vmov r5, s22
-; CHECK-NEXT: vmov r7, s28
-; CHECK-NEXT: vcvtt.f32.f16 s26, s18
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s24, #0
-; CHECK-NEXT: add.w r12, r4, #112
+; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: vcvtt.f32.f16 s24, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s22, s20
+; CHECK-NEXT: vcvtb.f32.f16 s26, s18
; CHECK-NEXT: itttt lt
-; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
-; CHECK-NEXT: vcvtb.f32.f16 s18, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r3, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s26
+; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: strd r6, r1, [r4, #112]
+; CHECK-NEXT: vmov r7, s24
+; CHECK-NEXT: vmov r5, s26
+; CHECK-NEXT: vcvtt.f32.f16 s18, s17
+; CHECK-NEXT: strd r2, r3, [r4, #120]
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s28, #0
; CHECK-NEXT: add.w r12, r4, #96
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s28, s20
-; CHECK-NEXT: vcvtt.f32.f16 s24, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s18
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: vmov r6, s18
+; CHECK-NEXT: vcvtb.f32.f16 s22, s17
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s26, #0
+; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: add.w r12, r4, #80
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s26, s20
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
@@ -4030,105 +4272,116 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r3, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: vmov r6, s24
-; CHECK-NEXT: vcvtb.f32.f16 s26, s17
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vcvtt.f32.f16 s24, s16
+; CHECK-NEXT: vmov r7, s22
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s18, #0
+; CHECK-NEXT: vcmp.f32 s26, #0
; CHECK-NEXT: add.w r12, r4, #64
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s18, s20
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov r7, s26
+; CHECK-NEXT: vmov r5, s24
+; CHECK-NEXT: vcvtb.f32.f16 s16, s16
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s24, #0
+; CHECK-NEXT: vcmp.f32 s18, #0
; CHECK-NEXT: add.w r12, r4, #48
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r3, #-1
-; CHECK-NEXT: ittt gt
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: itt gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: vmov r6, s16
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s26, #0
+; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: add.w r12, r4, #32
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r3, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s16, #0
+; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: add.w r12, r4, #16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r3, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3}
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: vcmp.f32 s22, #0
+; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: itttt lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: itttt gt
+; CHECK-NEXT: ittt gt
; CHECK-NEXT: movgt.w r3, #-1
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: stm r4!, {r0, r1, r2, r3}
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
index 96aff0233e4d9a..9738f7ade6fe9d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
@@ -623,9 +623,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v4i32(ptr nocapture readonly %x, p
; CHECK-NEXT: vldrw.u32 q3, [r0, q0]
; CHECK-NEXT: vldrw.u32 q4, [r0, q1, uxtw #2]
; CHECK-NEXT: vldrw.u32 q5, [r0, q2]
-; CHECK-NEXT: adds r0, #48
-; CHECK-NEXT: vmul.i32 q3, q4, q3
; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: vmul.i32 q3, q4, q3
+; CHECK-NEXT: add.w r0, r0, #48
; CHECK-NEXT: vmul.i32 q5, q4, q5
; CHECK-NEXT: vmul.i32 q4, q4, r3
; CHECK-NEXT: vstrw.32 q4, [r1, q1, uxtw #2]
@@ -705,9 +705,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v4i8(ptr nocapture readonly %x, pt
; CHECK-NEXT: vldrb.u32 q3, [r0, q0]
; CHECK-NEXT: vldrb.u32 q4, [r0, q1]
; CHECK-NEXT: vldrb.u32 q5, [r0, q2]
-; CHECK-NEXT: adds r0, #12
-; CHECK-NEXT: vmul.i32 q3, q4, q3
; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: vmul.i32 q3, q4, q3
+; CHECK-NEXT: add.w r0, r0, #12
; CHECK-NEXT: vmul.i32 q5, q4, q5
; CHECK-NEXT: vmul.i32 q4, q4, r3
; CHECK-NEXT: vstrb.32 q4, [r1, q1]
@@ -793,9 +793,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v8i16(ptr nocapture readonly %x, p
; CHECK-NEXT: vldrh.u16 q3, [r0, q0]
; CHECK-NEXT: vldrh.u16 q4, [r0, q1, uxtw #1]
; CHECK-NEXT: vldrh.u16 q5, [r0, q2]
-; CHECK-NEXT: adds r0, #48
-; CHECK-NEXT: vmul.i16 q3, q4, q3
; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: vmul.i16 q3, q4, q3
+; CHECK-NEXT: add.w r0, r0, #48
; CHECK-NEXT: vmul.i16 q5, q4, q5
; CHECK-NEXT: vmul.i16 q4, q4, r3
; CHECK-NEXT: vstrh.16 q4, [r1, q1, uxtw #1]
@@ -887,9 +887,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v16i8(ptr nocapture readonly %x, p
; CHECK-NEXT: vldrb.u8 q3, [r0, q0]
; CHECK-NEXT: vldrb.u8 q4, [r0, q1]
; CHECK-NEXT: vldrb.u8 q5, [r0, q2]
-; CHECK-NEXT: adds r0, #48
-; CHECK-NEXT: vmul.i8 q3, q4, q3
; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: vmul.i8 q3, q4, q3
+; CHECK-NEXT: add.w r0, r0, #48
; CHECK-NEXT: vmul.i8 q5, q4, q5
; CHECK-NEXT: vmul.i8 q4, q4, r3
; CHECK-NEXT: vstrb.8 q4, [r1, q1]
diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
index acbe48f9e59271..fe28f785623ed5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
@@ -307,83 +307,82 @@ define arm_aapcs_vfpcc <4 x i32> @ext_ops_trunc_i32(<4 x i32> %a, <4 x i32> %b)
; CHECK-NEXT: vmov.f32 s10, s7
; CHECK-NEXT: vmov r10, s8
; CHECK-NEXT: vmov.f32 s8, s6
+; CHECK-NEXT: vmov r7, s2
+; CHECK-NEXT: vmov.f32 s2, s1
; CHECK-NEXT: vmov.f32 s6, s5
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: asr.w r0, r10, #31
-; CHECK-NEXT: adds.w r6, r10, r2
-; CHECK-NEXT: eor.w r7, r10, r2
+; CHECK-NEXT: asrs r5, r7, #31
+; CHECK-NEXT: adds.w r4, r10, r2
+; CHECK-NEXT: eor.w r6, r10, r2
; CHECK-NEXT: adc r3, r0, #0
-; CHECK-NEXT: asrl r6, r3, r2
-; CHECK-NEXT: subs r0, r6, r2
-; CHECK-NEXT: vmov r6, s2
+; CHECK-NEXT: asrl r4, r3, r2
+; CHECK-NEXT: subs r0, r4, r2
; CHECK-NEXT: sbc lr, r3, #0
; CHECK-NEXT: vmov r3, s10
-; CHECK-NEXT: vmov.f32 s2, s1
; CHECK-NEXT: umull r0, r8, r0, r2
-; CHECK-NEXT: asrs r5, r6, #31
-; CHECK-NEXT: adds r4, r6, r3
+; CHECK-NEXT: adds r4, r7, r3
+; CHECK-NEXT: eor.w r1, r7, r3
; CHECK-NEXT: adc r5, r5, #0
-; CHECK-NEXT: eor.w r1, r6, r3
; CHECK-NEXT: asrl r4, r5, r3
; CHECK-NEXT: subs r4, r4, r3
; CHECK-NEXT: sbc r5, r5, #0
-; CHECK-NEXT: orrs.w r7, r7, r10, asr #31
+; CHECK-NEXT: orrs.w r6, r6, r10, asr #31
; CHECK-NEXT: umull r4, r12, r4, r3
; CHECK-NEXT: csetm r9, eq
-; CHECK-NEXT: orrs.w r1, r1, r6, asr #31
-; CHECK-NEXT: mov.w r7, #0
+; CHECK-NEXT: orrs.w r1, r1, r7, asr #31
+; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r1, eq
-; CHECK-NEXT: bfi r7, r9, #0, #8
+; CHECK-NEXT: bfi r6, r9, #0, #8
; CHECK-NEXT: mla r5, r5, r3, r12
-; CHECK-NEXT: bfi r7, r1, #8, #8
-; CHECK-NEXT: rsbs r1, r6, #0
-; CHECK-NEXT: vmsr p0, r7
+; CHECK-NEXT: bfi r6, r1, #8, #8
+; CHECK-NEXT: rsbs r1, r7, #0
; CHECK-NEXT: mla r7, lr, r2, r8
; CHECK-NEXT: lsll r4, r5, r1
; CHECK-NEXT: rsb.w r1, r10, #0
-; CHECK-NEXT: lsll r4, r5, r3
; CHECK-NEXT: lsll r0, r7, r1
-; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov lr, s2
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: lsll r0, r7, r2
+; CHECK-NEXT: lsll r4, r5, r3
+; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: vmov q3[2], q3[0], r0, r4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: vpsel q2, q3, q2
-; CHECK-NEXT: adds r2, r3, r1
-; CHECK-NEXT: asr.w r0, r3, #31
-; CHECK-NEXT: adc r5, r0, #0
-; CHECK-NEXT: asrl r2, r5, r1
+; CHECK-NEXT: adds.w r2, lr, r1
+; CHECK-NEXT: asr.w r0, lr, #31
+; CHECK-NEXT: adc r3, r0, #0
+; CHECK-NEXT: asrl r2, r3, r1
; CHECK-NEXT: subs r0, r2, r1
; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: sbc r8, r5, #0
-; CHECK-NEXT: umull r4, lr, r0, r1
-; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: sbc r7, r3, #0
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: umull r0, r6, r0, r1
; CHECK-NEXT: asrs r5, r2, #31
-; CHECK-NEXT: adds r6, r2, r0
-; CHECK-NEXT: adc r7, r5, #0
-; CHECK-NEXT: mla r5, r8, r1, lr
-; CHECK-NEXT: asrl r6, r7, r0
-; CHECK-NEXT: subs.w r8, r6, r0
-; CHECK-NEXT: eor.w r6, r2, r0
-; CHECK-NEXT: sbc lr, r7, #0
-; CHECK-NEXT: eor.w r7, r3, r1
-; CHECK-NEXT: orrs.w r6, r6, r2, asr #31
-; CHECK-NEXT: orr.w r7, r7, r3, asr #31
-; CHECK-NEXT: csetm r6, eq
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: bfi r12, r6, #0, #8
+; CHECK-NEXT: adds r4, r2, r3
+; CHECK-NEXT: adc r5, r5, #0
+; CHECK-NEXT: asrl r4, r5, r3
+; CHECK-NEXT: subs r4, r4, r3
+; CHECK-NEXT: sbc r8, r5, #0
+; CHECK-NEXT: mla r5, r7, r1, r6
+; CHECK-NEXT: eor.w r6, lr, r1
+; CHECK-NEXT: orrs.w r6, r6, lr, asr #31
+; CHECK-NEXT: eor.w r7, r2, r3
; CHECK-NEXT: csetm r6, eq
+; CHECK-NEXT: orrs.w r7, r7, r2, asr #31
+; CHECK-NEXT: csetm r7, eq
+; CHECK-NEXT: rsb.w lr, lr, #0
+; CHECK-NEXT: bfi r12, r7, #0, #8
+; CHECK-NEXT: lsll r0, r5, lr
; CHECK-NEXT: bfi r12, r6, #8, #8
-; CHECK-NEXT: umull r6, r7, r8, r0
-; CHECK-NEXT: rsb.w r8, r3, #0
-; CHECK-NEXT: lsll r4, r5, r8
-; CHECK-NEXT: vmsr p0, r12
-; CHECK-NEXT: mla r3, lr, r0, r7
-; CHECK-NEXT: lsll r4, r5, r1
+; CHECK-NEXT: umull r4, r6, r4, r3
+; CHECK-NEXT: lsll r0, r5, r1
; CHECK-NEXT: rsbs r1, r2, #0
-; CHECK-NEXT: lsll r6, r3, r1
-; CHECK-NEXT: lsll r6, r3, r0
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r4
+; CHECK-NEXT: vmsr p0, r12
+; CHECK-NEXT: mla r7, r8, r3, r6
+; CHECK-NEXT: lsll r4, r7, r1
+; CHECK-NEXT: lsll r4, r7, r3
+; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vmov.f32 s1, s2
; CHECK-NEXT: vmov.f32 s2, s8
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
index d9ef1030ee9222..55a621eaf4c9cc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
@@ -6,8 +6,8 @@ declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone
define arm_aapcs_vfpcc i8 @smaxi8(i8 %a, i8 %b) {
; CHECK-LABEL: smaxi8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sxtb r1, r1
; CHECK-NEXT: sxtb r0, r0
+; CHECK-NEXT: sxtb r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, gt
; CHECK-NEXT: bx lr
@@ -20,8 +20,8 @@ declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone
define arm_aapcs_vfpcc i16 @smaxi16(i16 %a, i16 %b) {
; CHECK-LABEL: smaxi16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sxth r1, r1
; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: sxth r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, gt
; CHECK-NEXT: bx lr
@@ -48,8 +48,10 @@ define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) {
; CHECK: @ %bb.0:
; CHECK-NEXT: subs.w r12, r2, r0
; CHECK-NEXT: sbcs.w r12, r3, r1
-; CHECK-NEXT: csel r0, r0, r2, lt
-; CHECK-NEXT: csel r1, r1, r3, lt
+; CHECK-NEXT: cset r12, lt
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: bx lr
%c = call i64 @llvm.smax.i64(i64 %a, i64 %b)
ret i64 %c
@@ -203,8 +205,10 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: subs.w r12, r2, r0
; CHECK-NEXT: sbcs.w r12, r3, r1
-; CHECK-NEXT: csel r0, r0, r2, lt
-; CHECK-NEXT: csel r1, r1, r3, lt
+; CHECK-NEXT: cset r12, lt
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -286,8 +290,8 @@ declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone
define arm_aapcs_vfpcc i8 @umaxi8(i8 %a, i8 %b) {
; CHECK-LABEL: umaxi8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: uxtb r1, r1
; CHECK-NEXT: uxtb r0, r0
+; CHECK-NEXT: uxtb r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, hi
; CHECK-NEXT: bx lr
@@ -300,8 +304,8 @@ declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone
define arm_aapcs_vfpcc i16 @umaxi16(i16 %a, i16 %b) {
; CHECK-LABEL: umaxi16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: uxth r1, r1
; CHECK-NEXT: uxth r0, r0
+; CHECK-NEXT: uxth r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, hi
; CHECK-NEXT: bx lr
@@ -328,8 +332,10 @@ define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) {
; CHECK: @ %bb.0:
; CHECK-NEXT: subs.w r12, r2, r0
; CHECK-NEXT: sbcs.w r12, r3, r1
-; CHECK-NEXT: csel r0, r0, r2, lo
-; CHECK-NEXT: csel r1, r1, r3, lo
+; CHECK-NEXT: cset r12, lo
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: bx lr
%c = call i64 @llvm.umax.i64(i64 %a, i64 %b)
ret i64 %c
@@ -476,8 +482,10 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: subs.w r12, r2, r0
; CHECK-NEXT: sbcs.w r12, r3, r1
-; CHECK-NEXT: csel r0, r0, r2, lo
-; CHECK-NEXT: csel r1, r1, r3, lo
+; CHECK-NEXT: cset r12, lo
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -559,8 +567,8 @@ declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone
define arm_aapcs_vfpcc i8 @smini8(i8 %a, i8 %b) {
; CHECK-LABEL: smini8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sxtb r1, r1
; CHECK-NEXT: sxtb r0, r0
+; CHECK-NEXT: sxtb r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, lt
; CHECK-NEXT: bx lr
@@ -573,8 +581,8 @@ declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone
define arm_aapcs_vfpcc i16 @smini16(i16 %a, i16 %b) {
; CHECK-LABEL: smini16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sxth r1, r1
; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: sxth r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, lt
; CHECK-NEXT: bx lr
@@ -601,8 +609,10 @@ define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) {
; CHECK: @ %bb.0:
; CHECK-NEXT: subs.w r12, r0, r2
; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: csel r0, r0, r2, lt
-; CHECK-NEXT: csel r1, r1, r3, lt
+; CHECK-NEXT: cset r12, lt
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: bx lr
%c = call i64 @llvm.smin.i64(i64 %a, i64 %b)
ret i64 %c
@@ -756,8 +766,10 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: subs.w r12, r0, r2
; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: csel r0, r0, r2, lt
-; CHECK-NEXT: csel r1, r1, r3, lt
+; CHECK-NEXT: cset r12, lt
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -839,8 +851,8 @@ declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone
define arm_aapcs_vfpcc i8 @umini8(i8 %a, i8 %b) {
; CHECK-LABEL: umini8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: uxtb r1, r1
; CHECK-NEXT: uxtb r0, r0
+; CHECK-NEXT: uxtb r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, lo
; CHECK-NEXT: bx lr
@@ -853,8 +865,8 @@ declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone
define arm_aapcs_vfpcc i16 @umini16(i16 %a, i16 %b) {
; CHECK-LABEL: umini16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: uxth r1, r1
; CHECK-NEXT: uxth r0, r0
+; CHECK-NEXT: uxth r1, r1
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: csel r0, r0, r1, lo
; CHECK-NEXT: bx lr
@@ -881,8 +893,10 @@ define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) {
; CHECK: @ %bb.0:
; CHECK-NEXT: subs.w r12, r0, r2
; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: csel r0, r0, r2, lo
-; CHECK-NEXT: csel r1, r1, r3, lo
+; CHECK-NEXT: cset r12, lo
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: bx lr
%c = call i64 @llvm.umin.i64(i64 %a, i64 %b)
ret i64 %c
@@ -1029,8 +1043,10 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: subs.w r12, r0, r2
; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: csel r0, r0, r2, lo
-; CHECK-NEXT: csel r1, r1, r3, lo
+; CHECK-NEXT: cset r12, lo
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r0, r2, ne
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)
diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
index 43ed5eefbf4c77..70957ca950d71f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
@@ -13,8 +13,8 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
; CHECK-NEXT: movs r6, #2
; CHECK-NEXT: lsrs r7, r2, #3
; CHECK-NEXT: rsb r6, r6, r2, lsr #3
+; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: cmp r7, #2
-; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: csel r7, r6, r5, hs
; CHECK-NEXT: add.w lr, r7, #1
; CHECK-NEXT: mov r4, r5
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
index 0bec2b100911cf..101b49fea488a8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
@@ -28,12 +28,11 @@ define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4f32(<4 x float> %src1, <4 x float>
; CHECK-MVE-NEXT: csetm r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s1, s5
-; CHECK-MVE-NEXT: vmov q2[2], q2[0], r1, r0
-; CHECK-MVE-NEXT: csetm r0, ne
+; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
+; CHECK-MVE-NEXT: csetm r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: csetm r1, ne
-; CHECK-MVE-NEXT: vmov q2[3], q2[1], r1, r0
-; CHECK-MVE-NEXT: vmov q0, q2
+; CHECK-MVE-NEXT: csetm r3, ne
+; CHECK-MVE-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: sext_v4i1_v4f32:
@@ -66,49 +65,49 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
; CHECK-MVE-LABEL: sext_v8i1_v8f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: .save {r4, lr}
-; CHECK-MVE-NEXT: push {r4, lr}
+; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
; CHECK-MVE-NEXT: vcmp.f16 s3, s7
-; CHECK-MVE-NEXT: vmovx.f16 s8, s6
+; CHECK-MVE-NEXT: vmovx.f16 s8, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vmovx.f16 s10, s2
+; CHECK-MVE-NEXT: vmovx.f16 s10, s3
; CHECK-MVE-NEXT: vcmp.f16 s10, s8
-; CHECK-MVE-NEXT: vmovx.f16 s8, s5
-; CHECK-MVE-NEXT: vmovx.f16 s10, s1
; CHECK-MVE-NEXT: csetm r12, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s10, s8
+; CHECK-MVE-NEXT: vcmp.f16 s2, s6
+; CHECK-MVE-NEXT: vmovx.f16 s6, s6
+; CHECK-MVE-NEXT: vmovx.f16 s2, s2
; CHECK-MVE-NEXT: csetm lr, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f16 s2, s6
-; CHECK-MVE-NEXT: vmovx.f16 s2, s4
-; CHECK-MVE-NEXT: vmovx.f16 s6, s0
+; CHECK-MVE-NEXT: vmovx.f16 s2, s5
+; CHECK-MVE-NEXT: vmovx.f16 s6, s1
; CHECK-MVE-NEXT: csetm r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s6, s2
-; CHECK-MVE-NEXT: vmovx.f16 s2, s3
+; CHECK-MVE-NEXT: vcmp.f16 s1, s5
; CHECK-MVE-NEXT: csetm r3, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s1, s5
+; CHECK-MVE-NEXT: vcmp.f16 s6, s2
+; CHECK-MVE-NEXT: vmovx.f16 s2, s4
; CHECK-MVE-NEXT: csetm r0, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f16 s0, s4
-; CHECK-MVE-NEXT: vmovx.f16 s0, s7
+; CHECK-MVE-NEXT: vmovx.f16 s0, s0
; CHECK-MVE-NEXT: csetm r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s2, s0
+; CHECK-MVE-NEXT: vcmp.f16 s0, s2
; CHECK-MVE-NEXT: csetm r4, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov.16 q0[0], r4
-; CHECK-MVE-NEXT: vmov.16 q0[1], r0
-; CHECK-MVE-NEXT: vmov.16 q0[2], r1
-; CHECK-MVE-NEXT: vmov.16 q0[3], r2
-; CHECK-MVE-NEXT: vmov.16 q0[4], r3
-; CHECK-MVE-NEXT: vmov.16 q0[5], lr
+; CHECK-MVE-NEXT: csetm r5, ne
+; CHECK-MVE-NEXT: vmov.16 q0[1], r5
+; CHECK-MVE-NEXT: vmov.16 q0[2], r0
+; CHECK-MVE-NEXT: vmov.16 q0[3], r1
+; CHECK-MVE-NEXT: vmov.16 q0[4], r2
+; CHECK-MVE-NEXT: vmov.16 q0[5], r3
; CHECK-MVE-NEXT: vmov.16 q0[6], r12
-; CHECK-MVE-NEXT: csetm r0, ne
-; CHECK-MVE-NEXT: vmov.16 q0[7], r0
-; CHECK-MVE-NEXT: pop {r4, pc}
+; CHECK-MVE-NEXT: vmov.16 q0[7], lr
+; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-MVEFP-LABEL: sext_v8i1_v8f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
@@ -199,22 +198,22 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) {
; CHECK-MVEFP-NEXT: vpush {d8, d9}
; CHECK-MVEFP-NEXT: vmov q4, q0
; CHECK-MVEFP-NEXT: vldr d0, .LCPI6_0
-; CHECK-MVEFP-NEXT: vmov r0, r1, d8
+; CHECK-MVEFP-NEXT: vmov r0, r1, d9
; CHECK-MVEFP-NEXT: vmov r4, r5, d0
; CHECK-MVEFP-NEXT: mov r2, r4
; CHECK-MVEFP-NEXT: mov r3, r5
; CHECK-MVEFP-NEXT: bl __aeabi_dcmpeq
; CHECK-MVEFP-NEXT: mov r6, r0
-; CHECK-MVEFP-NEXT: vmov r0, r1, d9
+; CHECK-MVEFP-NEXT: vmov r0, r1, d8
; CHECK-MVEFP-NEXT: mov r2, r4
; CHECK-MVEFP-NEXT: mov r3, r5
; CHECK-MVEFP-NEXT: bl __aeabi_dcmpeq
-; CHECK-MVEFP-NEXT: cmp r0, #0
-; CHECK-MVEFP-NEXT: csetm r0, eq
; CHECK-MVEFP-NEXT: cmp r6, #0
; CHECK-MVEFP-NEXT: csetm r1, eq
-; CHECK-MVEFP-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-MVEFP-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-MVEFP-NEXT: cmp r0, #0
+; CHECK-MVEFP-NEXT: csetm r0, eq
+; CHECK-MVEFP-NEXT: vmov q0[2], q0[0], r0, r1
+; CHECK-MVEFP-NEXT: vmov q0[3], q0[1], r0, r1
; CHECK-MVEFP-NEXT: vpop {d8, d9}
; CHECK-MVEFP-NEXT: pop {r4, r5, r6, pc}
; CHECK-MVEFP-NEXT: .p2align 3
@@ -246,22 +245,22 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4f32(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: zext_v4i1_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s2, s6
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: vmov.i32 q2, #0x1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: csetm r0, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: csetm r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
-; CHECK-MVE-NEXT: vmov q3[2], q3[0], r1, r0
-; CHECK-MVE-NEXT: csetm r0, ne
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: csetm r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: csetm r1, ne
-; CHECK-MVE-NEXT: vmov q3[3], q3[1], r1, r0
-; CHECK-MVE-NEXT: vand q0, q3, q2
+; CHECK-MVE-NEXT: csetm r3, ne
+; CHECK-MVE-NEXT: vmov q0[2], q0[0], r3, r2
+; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-MVE-NEXT: vand q0, q0, q2
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: zext_v4i1_v4f32:
@@ -294,51 +293,51 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
; CHECK-MVE-LABEL: zext_v8i1_v8f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: .save {r4, lr}
-; CHECK-MVE-NEXT: push {r4, lr}
-; CHECK-MVE-NEXT: vcmp.f16 s3, s7
+; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: vmovx.f16 s8, s7
+; CHECK-MVE-NEXT: vmovx.f16 s10, s3
+; CHECK-MVE-NEXT: vcmp.f16 s10, s8
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s3, s7
; CHECK-MVE-NEXT: vmovx.f16 s10, s2
-; CHECK-MVE-NEXT: vcmp.f16 s10, s8
; CHECK-MVE-NEXT: csetm r12, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vcmp.f16 s10, s8
+; CHECK-MVE-NEXT: csetm lr, ne
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f16 s2, s6
; CHECK-MVE-NEXT: vmovx.f16 s2, s5
; CHECK-MVE-NEXT: vmovx.f16 s6, s1
-; CHECK-MVE-NEXT: csetm lr, ne
+; CHECK-MVE-NEXT: csetm r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f16 s6, s2
; CHECK-MVE-NEXT: vmovx.f16 s2, s4
; CHECK-MVE-NEXT: vmovx.f16 s6, s0
-; CHECK-MVE-NEXT: csetm r2, ne
-; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s1, s5
; CHECK-MVE-NEXT: csetm r3, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s6, s2
-; CHECK-MVE-NEXT: vmovx.f16 s2, s3
+; CHECK-MVE-NEXT: vcmp.f16 s1, s5
; CHECK-MVE-NEXT: csetm r0, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s0, s4
-; CHECK-MVE-NEXT: vmovx.f16 s0, s7
+; CHECK-MVE-NEXT: vcmp.f16 s6, s2
; CHECK-MVE-NEXT: csetm r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f16 s2, s0
+; CHECK-MVE-NEXT: vcmp.f16 s0, s4
; CHECK-MVE-NEXT: vmov.i16 q0, #0x1
; CHECK-MVE-NEXT: csetm r4, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vmov.16 q1[0], r4
-; CHECK-MVE-NEXT: vmov.16 q1[1], r1
-; CHECK-MVE-NEXT: vmov.16 q1[2], r0
-; CHECK-MVE-NEXT: vmov.16 q1[3], r3
-; CHECK-MVE-NEXT: vmov.16 q1[4], r2
-; CHECK-MVE-NEXT: vmov.16 q1[5], lr
-; CHECK-MVE-NEXT: vmov.16 q1[6], r12
-; CHECK-MVE-NEXT: csetm r0, ne
-; CHECK-MVE-NEXT: vmov.16 q1[7], r0
+; CHECK-MVE-NEXT: csetm r5, ne
+; CHECK-MVE-NEXT: vmov.16 q1[0], r5
+; CHECK-MVE-NEXT: vmov.16 q1[1], r4
+; CHECK-MVE-NEXT: vmov.16 q1[2], r1
+; CHECK-MVE-NEXT: vmov.16 q1[3], r0
+; CHECK-MVE-NEXT: vmov.16 q1[4], r3
+; CHECK-MVE-NEXT: vmov.16 q1[5], r2
+; CHECK-MVE-NEXT: vmov.16 q1[6], lr
+; CHECK-MVE-NEXT: vmov.16 q1[7], r12
; CHECK-MVE-NEXT: vand q0, q1, q0
-; CHECK-MVE-NEXT: pop {r4, pc}
+; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-MVEFP-LABEL: zext_v8i1_v8f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
@@ -615,24 +614,24 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
; CHECK-MVE-LABEL: fptoui_v4i1_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s3
+; CHECK-MVE-NEXT: vldr s8, .LCPI20_0
; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2
-; CHECK-MVE-NEXT: vldr s10, .LCPI20_0
-; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1
-; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s3
+; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s1
+; CHECK-MVE-NEXT: vmov.f32 s4, #1.000000e+00
; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0
-; CHECK-MVE-NEXT: vmov.f32 s8, #1.000000e+00
-; CHECK-MVE-NEXT: vmov r3, s2
-; CHECK-MVE-NEXT: vmov r2, s6
-; CHECK-MVE-NEXT: vmov r1, s4
+; CHECK-MVE-NEXT: vmov r0, s6
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vmov r0, s2
+; CHECK-MVE-NEXT: vseleq.f32 s3, s8, s4
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vmov r0, s10
+; CHECK-MVE-NEXT: vseleq.f32 s2, s8, s4
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vmov r0, s0
-; CHECK-MVE-NEXT: cmp r3, #0
-; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s8
-; CHECK-MVE-NEXT: cmp r2, #0
-; CHECK-MVE-NEXT: vseleq.f32 s1, s10, s8
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s10, s8
+; CHECK-MVE-NEXT: vseleq.f32 s1, s8, s4
; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f32 s0, s10, s8
+; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
@@ -655,27 +654,24 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
; CHECK-MVE-LABEL: fptosi_v4i1_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3
+; CHECK-MVE-NEXT: vldr s10, .LCPI21_0
; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2
-; CHECK-MVE-NEXT: vldr s8, .LCPI21_0
-; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s1
-; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s3
+; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1
+; CHECK-MVE-NEXT: vmov.f32 s4, #1.000000e+00
; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0
-; CHECK-MVE-NEXT: vmov.f32 s6, #1.000000e+00
-; CHECK-MVE-NEXT: vmov r3, s2
-; CHECK-MVE-NEXT: vmov r2, s4
-; CHECK-MVE-NEXT: vmov r1, s10
+; CHECK-MVE-NEXT: vmov r0, s8
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vmov r0, s2
+; CHECK-MVE-NEXT: vseleq.f32 s3, s10, s4
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vmov r0, s6
+; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s4
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmov r0, s0
-; CHECK-MVE-NEXT: lsls r3, r3, #31
-; CHECK-MVE-NEXT: lsl.w r2, r2, #31
-; CHECK-MVE-NEXT: vseleq.f32 s2, s8, s6
-; CHECK-MVE-NEXT: cmp r2, #0
-; CHECK-MVE-NEXT: lsl.w r1, r1, #31
-; CHECK-MVE-NEXT: vseleq.f32 s1, s8, s6
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: lsl.w r0, r0, #31
-; CHECK-MVE-NEXT: vseleq.f32 s3, s8, s6
-; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s6
+; CHECK-MVE-NEXT: vseleq.f32 s1, s10, s4
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f32 s0, s10, s4
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
@@ -805,45 +801,45 @@ define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) {
; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s0
; CHECK-MVE-NEXT: vmovx.f16 s0, s0
; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0
-; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: vmov r1, s0
; CHECK-MVE-NEXT: vldr.16 s8, .LCPI24_0
+; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.f16 s6, #1.000000e+00
-; CHECK-MVE-NEXT: vmovx.f16 s10, s1
-; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vmov r1, s10
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6
-; CHECK-MVE-NEXT: vmovx.f16 s10, s3
-; CHECK-MVE-NEXT: vins.f16 s0, s4
-; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1
; CHECK-MVE-NEXT: vmov r0, s4
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
+; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6
+; CHECK-MVE-NEXT: vins.f16 s0, s10
+; CHECK-MVE-NEXT: vmovx.f16 s10, s1
; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT: vmov r0, s10
; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f16 s1, s8, s6
-; CHECK-MVE-NEXT: vins.f16 s1, s4
+; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s2
; CHECK-MVE-NEXT: vmovx.f16 s2, s2
-; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vcvt.s32.f16 s2, s2
-; CHECK-MVE-NEXT: vmov r1, s2
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vmov r1, s10
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f16 s2, s8, s6
-; CHECK-MVE-NEXT: vins.f16 s2, s4
+; CHECK-MVE-NEXT: vmov r0, s2
+; CHECK-MVE-NEXT: vseleq.f16 s1, s8, s6
+; CHECK-MVE-NEXT: vins.f16 s1, s10
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vmov r0, s4
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s3
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vseleq.f16 s2, s8, s6
+; CHECK-MVE-NEXT: vins.f16 s2, s10
+; CHECK-MVE-NEXT: vmovx.f16 s10, s3
+; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
+; CHECK-MVE-NEXT: vmov r0, s10
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f16 s3, s8, s6
-; CHECK-MVE-NEXT: vins.f16 s3, s4
+; CHECK-MVE-NEXT: vins.f16 s3, s10
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 1
; CHECK-MVE-NEXT: @ %bb.1:
@@ -869,49 +865,45 @@ define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) {
; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s0
; CHECK-MVE-NEXT: vmovx.f16 s0, s0
; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0
-; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: vmov r1, s0
; CHECK-MVE-NEXT: vldr.16 s8, .LCPI25_0
+; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.f16 s6, #1.000000e+00
-; CHECK-MVE-NEXT: vmovx.f16 s10, s1
-; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
; CHECK-MVE-NEXT: lsls r0, r0, #31
-; CHECK-MVE-NEXT: lsls r1, r1, #31
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
-; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6
-; CHECK-MVE-NEXT: vmov r1, s10
-; CHECK-MVE-NEXT: vins.f16 s0, s4
-; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1
; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: vmovx.f16 s10, s3
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
+; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6
+; CHECK-MVE-NEXT: vins.f16 s0, s10
+; CHECK-MVE-NEXT: vmovx.f16 s10, s1
; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
-; CHECK-MVE-NEXT: lsls r1, r1, #31
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT: vmov r0, s10
; CHECK-MVE-NEXT: lsls r0, r0, #31
-; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f16 s1, s8, s6
-; CHECK-MVE-NEXT: vins.f16 s1, s4
+; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s2
; CHECK-MVE-NEXT: vmovx.f16 s2, s2
-; CHECK-MVE-NEXT: vmov r0, s4
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
; CHECK-MVE-NEXT: vcvt.s32.f16 s2, s2
-; CHECK-MVE-NEXT: vmov r1, s2
; CHECK-MVE-NEXT: lsls r0, r0, #31
-; CHECK-MVE-NEXT: lsls r1, r1, #31
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
-; CHECK-MVE-NEXT: cmp r0, #0
-; CHECK-MVE-NEXT: vseleq.f16 s2, s8, s6
-; CHECK-MVE-NEXT: vmov r1, s10
-; CHECK-MVE-NEXT: vins.f16 s2, s4
+; CHECK-MVE-NEXT: vmov r0, s2
+; CHECK-MVE-NEXT: vseleq.f16 s1, s8, s6
+; CHECK-MVE-NEXT: vins.f16 s1, s10
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vmov r0, s4
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s3
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s2, s8, s6
+; CHECK-MVE-NEXT: vins.f16 s2, s10
+; CHECK-MVE-NEXT: vmovx.f16 s10, s3
+; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
+; CHECK-MVE-NEXT: vmov r0, s10
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: lsls r1, r1, #31
-; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6
+; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f16 s3, s8, s6
-; CHECK-MVE-NEXT: vins.f16 s3, s4
+; CHECK-MVE-NEXT: vins.f16 s3, s10
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 1
; CHECK-MVE-NEXT: @ %bb.1:
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll
index ff5ee8929aae8f..46406aeebfa4ee 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll
@@ -378,21 +378,23 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: orrs r0, r1
-; CHECK-NEXT: vmov r1, r2, d0
-; CHECK-NEXT: cset r0, eq
+; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: orrs r1, r2
-; CHECK-NEXT: csinc r0, r0, zr, ne
+; CHECK-NEXT: cset r1, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csinc r0, r1, zr, ne
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r1, r0, #0, #8
-; CHECK-NEXT: vmov r0, r2, d3
+; CHECK-NEXT: vmov r0, r2, d1
; CHECK-NEXT: orrs r0, r2
-; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: cset r0, eq
+; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: orrs r2, r3
-; CHECK-NEXT: csinc r0, r0, zr, ne
+; CHECK-NEXT: cset r2, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csinc r0, r2, zr, ne
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r1, r0, #8, #8
; CHECK-NEXT: vmsr p0, r1
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
index 9400f24e7192c8..bf6468baac22bb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
@@ -4,15 +4,15 @@
define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) {
; CHECK-LABEL: arm_min_helium_f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: movs r4, #0
+; CHECK-NEXT: .save {r4, r6, r7, lr}
+; CHECK-NEXT: push {r4, r6, r7, lr}
+; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: mov.w r12, #4
-; CHECK-NEXT: vidup.u32 q2, r4, #1
-; CHECK-NEXT: movw r5, #54437
-; CHECK-NEXT: movt r5, #21352
-; CHECK-NEXT: vdup.32 q1, r5
+; CHECK-NEXT: vidup.u32 q2, r6, #1
+; CHECK-NEXT: movw r4, #54437
+; CHECK-NEXT: movt r4, #21352
; CHECK-NEXT: vmov.i32 q0, #0x0
+; CHECK-NEXT: vdup.32 q1, r4
; CHECK-NEXT: dlstp.32 lr, r1
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -33,7 +33,7 @@ define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResul
; CHECK-NEXT: vminv.u32 r1, q0
; CHECK-NEXT: str r1, [r3]
; CHECK-NEXT: vstr s8, [r2]
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop {r4, r6, r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI0_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
index a990cee1f5fb31..f70af5661f4c90 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
@@ -70,27 +70,29 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: vmov r0, r1, d4
; CHECK-NEXT: orrs r0, r1
-; CHECK-NEXT: vmov r1, r2, d2
-; CHECK-NEXT: cset r0, eq
+; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: orrs r1, r2
-; CHECK-NEXT: vmov r2, r3, d4
+; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: orrs r2, r3
-; CHECK-NEXT: csel r0, r0, r1, eq
+; CHECK-NEXT: cset r2, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, r1, r2, eq
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r1, r0, #0, #8
-; CHECK-NEXT: vmov r0, r2, d1
-; CHECK-NEXT: orrs r0, r2
-; CHECK-NEXT: vmov r2, r3, d3
-; CHECK-NEXT: cset r12, eq
+; CHECK-NEXT: vmov r0, r2, d5
+; CHECK-NEXT: orr.w r12, r0, r2
+; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: orrs r2, r3
-; CHECK-NEXT: vmov r3, r0, d5
+; CHECK-NEXT: vmov r3, r0, d3
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: orrs r0, r3
-; CHECK-NEXT: csel r0, r12, r2, eq
+; CHECK-NEXT: cset r0, eq
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r2, r0, eq
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r1, r0, #8, #8
; CHECK-NEXT: vmsr p0, r1
@@ -174,27 +176,29 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpnez_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpnez_v2i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: vmov r0, r1, d4
; CHECK-NEXT: orrs r0, r1
-; CHECK-NEXT: vmov r1, r2, d2
-; CHECK-NEXT: cset r0, ne
+; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: orrs r1, r2
-; CHECK-NEXT: vmov r2, r3, d4
+; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: orrs r2, r3
-; CHECK-NEXT: csel r0, r0, r1, ne
+; CHECK-NEXT: cset r2, ne
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, r1, r2, ne
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r1, r0, #0, #8
-; CHECK-NEXT: vmov r0, r2, d1
-; CHECK-NEXT: orrs r0, r2
-; CHECK-NEXT: vmov r2, r3, d3
-; CHECK-NEXT: cset r12, ne
+; CHECK-NEXT: vmov r0, r2, d5
+; CHECK-NEXT: orr.w r12, r0, r2
+; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: orrs r2, r3
-; CHECK-NEXT: vmov r3, r0, d5
+; CHECK-NEXT: vmov r3, r0, d3
; CHECK-NEXT: cset r2, ne
; CHECK-NEXT: orrs r0, r3
-; CHECK-NEXT: csel r0, r12, r2, ne
+; CHECK-NEXT: cset r0, ne
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: csel r0, r2, r0, ne
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r1, r0, #8, #8
; CHECK-NEXT: vmsr p0, r1
@@ -280,19 +284,19 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpsltz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpsltz_v2i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov r2, s9
+; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: movs r3, #0
-; CHECK-NEXT: vmov r0, s1
-; CHECK-NEXT: vmov r1, s5
-; CHECK-NEXT: cmp.w r3, r2, lsr #31
-; CHECK-NEXT: vmov r2, s7
-; CHECK-NEXT: csel r0, r0, r1, ne
-; CHECK-NEXT: vmov r1, s3
-; CHECK-NEXT: asr.w r12, r0, #31
-; CHECK-NEXT: vmov r0, s11
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: cmp.w r3, r0, lsr #31
-; CHECK-NEXT: bfi r3, r12, #0, #8
; CHECK-NEXT: csel r0, r1, r2, ne
+; CHECK-NEXT: vmov r1, s11
+; CHECK-NEXT: asr.w r12, r0, #31
+; CHECK-NEXT: vmov r2, s3
+; CHECK-NEXT: vmov r0, s7
+; CHECK-NEXT: cmp.w r3, r1, lsr #31
+; CHECK-NEXT: bfi r3, r12, #0, #8
+; CHECK-NEXT: csel r0, r2, r0, ne
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: bfi r3, r0, #8, #8
; CHECK-NEXT: vmsr p0, r3
@@ -377,34 +381,34 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64 %c) {
; CHECK-LABEL: cmpeqz_v2i1_i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: vmov r2, r3, d2
-; CHECK-NEXT: orrs r2, r3
-; CHECK-NEXT: vmov r3, r4, d3
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: orr.w r3, r0, r1
+; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: vmov r1, r2, d3
; CHECK-NEXT: csetm r12, eq
-; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: orrs r3, r4
-; CHECK-NEXT: vmov r4, r3, d0
-; CHECK-NEXT: csetm r5, eq
-; CHECK-NEXT: orrs r3, r4
-; CHECK-NEXT: vmov r3, r4, d1
-; CHECK-NEXT: csetm lr, eq
-; CHECK-NEXT: orrs r3, r4
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: orrs r1, r2
+; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: csetm r4, eq
-; CHECK-NEXT: orrs r0, r1
-; CHECK-NEXT: beq .LBB15_2
+; CHECK-NEXT: orrs r1, r2
+; CHECK-NEXT: vmov r1, r2, d1
+; CHECK-NEXT: csetm lr, eq
+; CHECK-NEXT: orrs r1, r2
+; CHECK-NEXT: csetm r1, eq
+; CHECK-NEXT: cbz r3, .LBB15_2
; CHECK-NEXT: @ %bb.1: @ %select.false
-; CHECK-NEXT: bfi r2, r12, #0, #8
-; CHECK-NEXT: bfi r2, r5, #8, #8
+; CHECK-NEXT: bfi r0, r12, #0, #8
+; CHECK-NEXT: bfi r0, r4, #8, #8
; CHECK-NEXT: b .LBB15_3
; CHECK-NEXT: .LBB15_2:
-; CHECK-NEXT: bfi r2, lr, #0, #8
-; CHECK-NEXT: bfi r2, r4, #8, #8
+; CHECK-NEXT: bfi r0, lr, #0, #8
+; CHECK-NEXT: bfi r0, r1, #8, #8
; CHECK-NEXT: .LBB15_3: @ %select.end
-; CHECK-NEXT: vmsr p0, r2
+; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop {r4, pc}
entry:
%c1 = icmp eq <2 x i64> %a, zeroinitializer
%c2 = icmp eq <2 x i64> %b, zeroinitializer
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
index f4a0d5120305a1..0ff262e6b53ab5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
@@ -458,24 +458,26 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: orrs r0, r1
-; CHECK-NEXT: vmov r1, r2, d0
-; CHECK-NEXT: cset r0, eq
+; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: orrs r1, r2
+; CHECK-NEXT: cset r1, eq
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: eoreq r0, r0, #1
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: eoreq r1, r1, #1
+; CHECK-NEXT: rsbs r0, r1, #0
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: bfi r1, r0, #0, #8
-; CHECK-NEXT: vmov r0, r2, d3
+; CHECK-NEXT: vmov r0, r2, d1
; CHECK-NEXT: orrs r0, r2
-; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: cset r0, eq
+; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: orrs r2, r3
+; CHECK-NEXT: cset r2, eq
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it eq
-; CHECK-NEXT: eoreq r0, r0, #1
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: eoreq r2, r2, #1
+; CHECK-NEXT: rsbs r0, r2, #0
; CHECK-NEXT: bfi r1, r0, #8, #8
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpsel q0, q0, q1
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 29b56639bd7698..8eb941371f9937 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -6,99 +6,102 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #12
+; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq.w .LBB0_8
; CHECK-NEXT: @ %bb.1: @ %entry
-; CHECK-NEXT: mov r11, r2
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: bne .LBB0_3
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: mov r10, r11
+; CHECK-NEXT: mov r10, r2
; CHECK-NEXT: b .LBB0_6
; CHECK-NEXT: .LBB0_3: @ %vector.ph
-; CHECK-NEXT: bic r2, r3, #1
-; CHECK-NEXT: adr r4, .LCPI0_0
-; CHECK-NEXT: subs r7, r2, #2
-; CHECK-NEXT: movs r6, #1
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: add.w r10, r11, r2, lsl #2
+; CHECK-NEXT: bic r3, r3, #1
+; CHECK-NEXT: subs r7, r3, #2
+; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: adr r4, .LCPI0_0
+; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-NEXT: add.w lr, r6, r7, lsr #1
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: add.w r8, r1, r2, lsl #2
-; CHECK-NEXT: add.w r12, r0, r2, lsl #2
+; CHECK-NEXT: add.w r10, r2, r3, lsl #2
+; CHECK-NEXT: add.w r8, r1, r3, lsl #2
+; CHECK-NEXT: add.w r12, r0, r3, lsl #2
; CHECK-NEXT: vldrw.u32 q0, [r4]
; CHECK-NEXT: vmvn.i32 q1, #0x80000000
; CHECK-NEXT: .LBB0_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrd r4, r2, [r0], #8
+; CHECK-NEXT: ldrd r4, r3, [r0], #8
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: ldrd r7, r6, [r1], #8
-; CHECK-NEXT: smull r4, r7, r7, r4
-; CHECK-NEXT: asrl r4, r7, #31
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: smull r4, r11, r7, r4
+; CHECK-NEXT: asrl r4, r11, #31
; CHECK-NEXT: rsbs.w r9, r4, #-2147483648
; CHECK-NEXT: mov.w r9, #-1
-; CHECK-NEXT: sbcs.w r3, r9, r7
+; CHECK-NEXT: sbcs.w r3, r9, r11
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r5, r3, #0, #8
-; CHECK-NEXT: smull r2, r3, r6, r2
-; CHECK-NEXT: asrl r2, r3, #31
-; CHECK-NEXT: rsbs.w r6, r2, #-2147483648
-; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
-; CHECK-NEXT: sbcs.w r6, r9, r3
-; CHECK-NEXT: vmov q2[3], q2[1], r7, r3
-; CHECK-NEXT: csetm r6, lt
-; CHECK-NEXT: bfi r5, r6, #8, #8
+; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: smull r6, r3, r6, r3
+; CHECK-NEXT: asrl r6, r3, #31
+; CHECK-NEXT: rsbs.w r7, r6, #-2147483648
+; CHECK-NEXT: vmov q2[2], q2[0], r4, r6
+; CHECK-NEXT: sbcs.w r7, r9, r3
+; CHECK-NEXT: vmov q2[3], q2[1], r11, r3
+; CHECK-NEXT: csetm r7, lt
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: bfi r5, r7, #8, #8
; CHECK-NEXT: vmsr p0, r5
-; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: vpsel q2, q2, q0
-; CHECK-NEXT: vmov r2, r3, d4
-; CHECK-NEXT: subs r2, r2, r5
-; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: vmov r2, r4, d5
-; CHECK-NEXT: subs r2, r2, r5
-; CHECK-NEXT: sbcs r2, r4, #0
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: bfi r3, r2, #8, #8
-; CHECK-NEXT: vmsr p0, r3
+; CHECK-NEXT: vmov r3, r4, d4
+; CHECK-NEXT: subs r3, r3, r6
+; CHECK-NEXT: sbcs r3, r4, #0
+; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: csetm r3, lt
+; CHECK-NEXT: bfi r4, r3, #0, #8
+; CHECK-NEXT: vmov r3, r5, d5
+; CHECK-NEXT: subs r3, r3, r6
+; CHECK-NEXT: sbcs r3, r5, #0
+; CHECK-NEXT: csetm r3, lt
+; CHECK-NEXT: bfi r4, r3, #8, #8
+; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: vpsel q2, q2, q1
-; CHECK-NEXT: vmov r2, s10
-; CHECK-NEXT: vmov r3, s8
-; CHECK-NEXT: strd r3, r2, [r11], #8
+; CHECK-NEXT: vmov r3, s10
+; CHECK-NEXT: vmov r4, s8
+; CHECK-NEXT: strd r4, r3, [r2], #8
; CHECK-NEXT: le lr, .LBB0_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
-; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: cmp r7, r3
; CHECK-NEXT: beq .LBB0_8
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader
-; CHECK-NEXT: sub.w lr, r3, r2
+; CHECK-NEXT: sub.w lr, r3, r7
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: mov.w r1, #-2147483648
-; CHECK-NEXT: mvn r3, #-2147483648
+; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: .LBB0_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r2, [r12], #4
+; CHECK-NEXT: ldr r3, [r12], #4
; CHECK-NEXT: ldr r4, [r8], #4
-; CHECK-NEXT: smull r2, r5, r4, r2
-; CHECK-NEXT: asrl r2, r5, #31
-; CHECK-NEXT: subs r4, r1, r2
-; CHECK-NEXT: sbcs.w r4, r0, r5
-; CHECK-NEXT: csel r2, r2, r1, lt
-; CHECK-NEXT: csel r4, r5, r0, lt
-; CHECK-NEXT: subs r5, r2, r3
-; CHECK-NEXT: sbcs r4, r4, #0
-; CHECK-NEXT: csel r2, r2, r3, lt
-; CHECK-NEXT: str r2, [r10], #4
+; CHECK-NEXT: smull r4, r3, r4, r3
+; CHECK-NEXT: asrl r4, r3, #31
+; CHECK-NEXT: subs r5, r1, r4
+; CHECK-NEXT: sbcs.w r5, r0, r3
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r4, r4, r1, ne
+; CHECK-NEXT: csel r3, r3, r0, ne
+; CHECK-NEXT: subs r5, r4, r2
+; CHECK-NEXT: sbcs r3, r3, #0
+; CHECK-NEXT: csel r3, r4, r2, lt
+; CHECK-NEXT: str r3, [r10], #4
; CHECK-NEXT: le lr, .LBB0_7
; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: add sp, #12
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.9:
@@ -319,8 +322,10 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: asrl r4, r1, #31
; CHECK-NEXT: subs r5, r3, r4
; CHECK-NEXT: sbcs.w r5, r0, r1
-; CHECK-NEXT: csel r4, r4, r3, lt
-; CHECK-NEXT: csel r1, r1, r0, lt
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r4, r4, r3, ne
+; CHECK-NEXT: csel r1, r1, r0, ne
; CHECK-NEXT: subs r5, r4, r2
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: csel r1, r4, r2, lt
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
index cc856730d90cf1..6f2539e3cad9aa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
@@ -5,25 +5,25 @@
define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -41,13 +41,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_one_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: csinc r0, r0, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: csinc r1, r1, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -57,13 +57,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
; CHECK-MVE-NEXT: csinc r3, r3, zr, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -82,25 +82,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -118,25 +118,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ge
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -154,25 +154,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_olt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -190,25 +190,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ole_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ls
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -226,13 +226,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: csinc r0, r0, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: csinc r1, r1, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -242,13 +242,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
; CHECK-MVE-NEXT: csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -295,25 +295,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, hi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -331,25 +331,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, pl
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -367,25 +367,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ult_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, lt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -403,25 +403,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ule_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -439,25 +439,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ord_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -476,25 +476,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uno_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s5
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s7
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s5
+; CHECK-MVE-NEXT: vcmp.f32 s3, s7
; CHECK-MVE-NEXT: cset r1, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s6
; CHECK-MVE-NEXT: cset r2, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vs
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index 586b731c934be3..d42c393743f4f3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -5,25 +5,25 @@
define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -44,13 +44,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_one_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: csinc r0, r0, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: csinc r1, r1, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -60,13 +60,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
; CHECK-MVE-NEXT: csinc r3, r3, zr, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -88,25 +88,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -127,25 +127,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ge
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -166,25 +166,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_olt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -205,25 +205,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ole_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ls
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -244,13 +244,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: csinc r0, r0, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: csinc r1, r1, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -260,13 +260,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
; CHECK-MVE-NEXT: csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -319,25 +319,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, hi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -358,25 +358,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, pl
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -397,25 +397,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ult_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, lt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -436,25 +436,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ule_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -475,25 +475,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ord_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -515,25 +515,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uno_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s4
+; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s4
+; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: cset r0, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r1, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: cset r2, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vs
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1614,25 +1614,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1653,13 +1653,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_one_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: csinc r0, r0, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: csinc r1, r1, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -1669,13 +1669,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %sr
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
; CHECK-MVE-NEXT: csinc r3, r3, zr, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1697,25 +1697,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1736,25 +1736,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_oge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ge
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1775,25 +1775,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_olt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1814,25 +1814,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ole_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ls
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1853,13 +1853,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: csinc r0, r0, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: csinc r1, r1, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -1869,13 +1869,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %sr
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
; CHECK-MVE-NEXT: csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1928,25 +1928,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, hi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -1967,25 +1967,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_uge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, pl
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -2006,25 +2006,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ult_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, lt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -2045,25 +2045,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ule_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -2084,25 +2084,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ord_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
@@ -2124,25 +2124,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_uno_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s4, s0
+; CHECK-MVE-NEXT: vcmp.f32 s4, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s3
+; CHECK-MVE-NEXT: vcmp.f32 s4, s0
; CHECK-MVE-NEXT: cset r0, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s4, s1
+; CHECK-MVE-NEXT: vcmp.f32 s4, s3
; CHECK-MVE-NEXT: cset r1, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s4, s2
; CHECK-MVE-NEXT: cset r2, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vs
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
index de8b413bf24e55..718657839d38db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
@@ -5,25 +5,25 @@
define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -41,13 +41,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_one_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: csinc r0, r0, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: csinc r1, r1, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -57,13 +57,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
; CHECK-MVE-NEXT: csinc r3, r3, zr, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -82,25 +82,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -118,25 +118,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ge
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -154,25 +154,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_olt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -190,25 +190,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ole_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ls
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -226,13 +226,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: csinc r0, r0, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: csinc r1, r1, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -242,13 +242,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
; CHECK-MVE-NEXT: csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -295,25 +295,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, hi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -331,25 +331,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, pl
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -367,25 +367,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ult_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, lt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -403,25 +403,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ule_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -439,25 +439,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ord_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
; CHECK-MVE-NEXT: cset r0, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
; CHECK-MVE-NEXT: cset r1, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: cset r2, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -476,25 +476,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uno_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
; CHECK-MVE-NEXT: cset r0, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
; CHECK-MVE-NEXT: cset r1, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: cset r2, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vs
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1530,25 +1530,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, eq
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1566,13 +1566,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_one_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: csinc r0, r0, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: csinc r1, r1, zr, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -1582,13 +1582,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x floa
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
; CHECK-MVE-NEXT: csinc r3, r3, zr, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1607,25 +1607,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, mi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, mi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1643,25 +1643,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_oge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, ls
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ls
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1679,25 +1679,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_olt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, gt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, gt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1715,25 +1715,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ole_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, ge
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, ge
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1751,13 +1751,13 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, eq
; CHECK-MVE-NEXT: csinc r0, r0, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, eq
; CHECK-MVE-NEXT: csinc r1, r1, zr, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
@@ -1767,13 +1767,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x floa
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, eq
; CHECK-MVE-NEXT: csinc r3, r3, zr, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1820,25 +1820,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, lt
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, lt
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1856,25 +1856,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_uge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, le
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, le
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1892,25 +1892,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ult_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, hi
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, hi
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1928,25 +1928,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ule_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, #0
+; CHECK-MVE-NEXT: vcmp.f32 s1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, #0
; CHECK-MVE-NEXT: cset r0, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s3, #0
; CHECK-MVE-NEXT: cset r1, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, #0
; CHECK-MVE-NEXT: cset r2, pl
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, pl
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -1964,25 +1964,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_ord_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
; CHECK-MVE-NEXT: cset r0, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
; CHECK-MVE-NEXT: cset r1, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: cset r2, vc
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vc
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
@@ -2001,25 +2001,25 @@ entry:
define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_r_uno_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
; CHECK-MVE-NEXT: cset r0, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
; CHECK-MVE-NEXT: cset r1, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: cset r2, vs
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: cset r3, vs
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5
; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4
; CHECK-MVE-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
index 35e578e425e746..898380760bd4d2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll
@@ -499,12 +499,16 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) {
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: subs.w r4, r2, r12
; CHECK-NEXT: sbcs.w r4, r3, lr
-; CHECK-NEXT: csel r2, r2, r12, lo
-; CHECK-NEXT: csel r3, r3, lr, lo
+; CHECK-NEXT: cset r4, lo
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r2, r2, r12, ne
+; CHECK-NEXT: csel r3, r3, lr, ne
; CHECK-NEXT: subs r4, r2, r0
; CHECK-NEXT: sbcs.w r4, r3, r1
-; CHECK-NEXT: csel r0, r2, r0, lo
-; CHECK-NEXT: csel r1, r3, r1, lo
+; CHECK-NEXT: cset r4, lo
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r0, r2, r0, ne
+; CHECK-NEXT: csel r1, r3, r1, ne
; CHECK-NEXT: pop {r4, pc}
%x = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
%cmp = icmp ult i64 %x, %min
@@ -521,12 +525,16 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) {
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: subs.w r4, r2, r12
; CHECK-NEXT: sbcs.w r4, r3, lr
-; CHECK-NEXT: csel r2, r2, r12, lt
-; CHECK-NEXT: csel r3, r3, lr, lt
+; CHECK-NEXT: cset r4, lt
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r2, r2, r12, ne
+; CHECK-NEXT: csel r3, r3, lr, ne
; CHECK-NEXT: subs r4, r2, r0
; CHECK-NEXT: sbcs.w r4, r3, r1
-; CHECK-NEXT: csel r0, r2, r0, lt
-; CHECK-NEXT: csel r1, r3, r1, lt
+; CHECK-NEXT: cset r4, lt
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r0, r2, r0, ne
+; CHECK-NEXT: csel r1, r3, r1, ne
; CHECK-NEXT: pop {r4, pc}
%x = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
%cmp = icmp slt i64 %x, %min
@@ -543,12 +551,16 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) {
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: subs.w r4, r2, r12
; CHECK-NEXT: sbcs.w r4, r3, lr
-; CHECK-NEXT: csel r2, r12, r2, lo
-; CHECK-NEXT: csel r3, lr, r3, lo
+; CHECK-NEXT: cset r4, lo
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r2, r12, r2, ne
+; CHECK-NEXT: csel r3, lr, r3, ne
; CHECK-NEXT: subs r4, r0, r2
; CHECK-NEXT: sbcs.w r4, r1, r3
-; CHECK-NEXT: csel r0, r2, r0, lo
-; CHECK-NEXT: csel r1, r3, r1, lo
+; CHECK-NEXT: cset r4, lo
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r0, r2, r0, ne
+; CHECK-NEXT: csel r1, r3, r1, ne
; CHECK-NEXT: pop {r4, pc}
%x = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
%cmp = icmp ugt i64 %x, %max
@@ -565,12 +577,16 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) {
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: subs.w r4, r2, r12
; CHECK-NEXT: sbcs.w r4, r3, lr
-; CHECK-NEXT: csel r2, r12, r2, lt
-; CHECK-NEXT: csel r3, lr, r3, lt
+; CHECK-NEXT: cset r4, lt
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r2, r12, r2, ne
+; CHECK-NEXT: csel r3, lr, r3, ne
; CHECK-NEXT: subs r4, r0, r2
; CHECK-NEXT: sbcs.w r4, r1, r3
-; CHECK-NEXT: csel r0, r2, r0, lt
-; CHECK-NEXT: csel r1, r3, r1, lt
+; CHECK-NEXT: cset r4, lt
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: csel r0, r2, r0, ne
+; CHECK-NEXT: csel r1, r3, r1, ne
; CHECK-NEXT: pop {r4, pc}
%x = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
%cmp = icmp sgt i64 %x, %max
diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
index bf0d92b5e0303e..642ff69ded33f5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
@@ -502,8 +502,8 @@ define <2 x i64> @large_i128(<2 x double> %x) {
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: csel r0, r0, r7, ne
; CHECK-NEXT: csel r3, r3, r7, ne
-; CHECK-NEXT: csel r1, r1, r7, ne
; CHECK-NEXT: csel r2, r2, r9, ne
+; CHECK-NEXT: csel r1, r1, r7, ne
; CHECK-NEXT: rsbs r7, r0, #0
; CHECK-NEXT: sbcs.w r7, r4, r1
; CHECK-NEXT: sbcs.w r2, r4, r2
@@ -521,8 +521,8 @@ define <2 x i64> @large_i128(<2 x double> %x) {
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: csel r0, r0, r5, ne
; CHECK-NEXT: csel r3, r3, r5, ne
-; CHECK-NEXT: csel r1, r1, r5, ne
; CHECK-NEXT: csel r2, r2, r9, ne
+; CHECK-NEXT: csel r1, r1, r5, ne
; CHECK-NEXT: rsbs r5, r0, #0
; CHECK-NEXT: sbcs.w r5, r4, r1
; CHECK-NEXT: sbcs.w r2, r4, r2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index fe1d06cb39e16d..cff16c300e7036 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -8,119 +8,121 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: .pad #44
; THUMBV7-NEXT: sub sp, #44
-; THUMBV7-NEXT: ldr.w r8, [sp, #88]
-; THUMBV7-NEXT: mov r9, r0
-; THUMBV7-NEXT: ldr r7, [sp, #96]
-; THUMBV7-NEXT: ldr.w lr, [sp, #100]
-; THUMBV7-NEXT: umull r0, r5, r2, r8
-; THUMBV7-NEXT: ldr r4, [sp, #80]
-; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT: umull r1, r0, r3, r7
-; THUMBV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r11, lr, r2
-; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r1, [sp, #92]
-; THUMBV7-NEXT: str r0, [sp] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r10, r7, r2
-; THUMBV7-NEXT: mov r7, r1
-; THUMBV7-NEXT: umull r6, r12, r1, r4
-; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT: ldr.w lr, [sp, #88]
+; THUMBV7-NEXT: mov r11, r0
+; THUMBV7-NEXT: ldr r4, [sp, #96]
+; THUMBV7-NEXT: ldr.w r12, [sp, #80]
+; THUMBV7-NEXT: umull r1, r5, r2, lr
+; THUMBV7-NEXT: umull r7, r6, r3, r4
+; THUMBV7-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT: ldr r1, [sp, #100]
+; THUMBV7-NEXT: umull r4, r0, r4, r2
+; THUMBV7-NEXT: str r7, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: umull r7, r1, r1, r2
+; THUMBV7-NEXT: str r4, [sp, #24] @ 4-byte Spill
+; THUMBV7-NEXT: str r0, [sp, #12] @ 4-byte Spill
; THUMBV7-NEXT: ldr r0, [sp, #84]
-; THUMBV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r1, r0, r8
-; THUMBV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r2, r2, r7
-; THUMBV7-NEXT: mov r7, r4
-; THUMBV7-NEXT: strd r6, r2, [sp, #8] @ 8-byte Folded Spill
-; THUMBV7-NEXT: umull r2, r6, r4, r8
+; THUMBV7-NEXT: str r7, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT: ldr r7, [sp, #92]
+; THUMBV7-NEXT: umull r10, r8, r0, lr
+; THUMBV7-NEXT: umull r4, r9, r7, r12
+; THUMBV7-NEXT: str r4, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT: umull r4, r0, r12, lr
+; THUMBV7-NEXT: mov.w r12, #0
+; THUMBV7-NEXT: umlal r5, r12, r3, lr
+; THUMBV7-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; THUMBV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT: umull r4, r2, r2, r7
+; THUMBV7-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV7-NEXT: str r4, [sp, #28] @ 4-byte Spill
; THUMBV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
-; THUMBV7-NEXT: movs r6, #0
-; THUMBV7-NEXT: str.w r2, [r9]
-; THUMBV7-NEXT: umlal r5, r6, r3, r8
+; THUMBV7-NEXT: str.w r0, [r11]
+; THUMBV7-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
; THUMBV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; THUMBV7-NEXT: add r4, r2
-; THUMBV7-NEXT: adds.w r2, r10, r4
-; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT: add r2, r0
+; THUMBV7-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; THUMBV7-NEXT: adds.w lr, r0, r2
; THUMBV7-NEXT: mov.w r2, #0
-; THUMBV7-NEXT: adc r2, r2, #0
-; THUMBV7-NEXT: cmp.w r12, #0
-; THUMBV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r12, #1
+; THUMBV7-NEXT: adc r0, r2, #0
+; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT: add.w r4, r10, r0
+; THUMBV7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: adds r4, r4, r0
+; THUMBV7-NEXT: adc r0, r2, #0
+; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; THUMBV7-NEXT: adds.w r10, r2, r0
+; THUMBV7-NEXT: mov r2, r3
+; THUMBV7-NEXT: adc.w r0, r4, lr
+; THUMBV7-NEXT: ldr.w lr, [sp, #100]
; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: ldr r2, [sp, #96]
+; THUMBV7-NEXT: str r0, [sp, #24] @ 4-byte Spill
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
-; THUMBV7-NEXT: orrs.w r10, r7, r0
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r10, #1
-; THUMBV7-NEXT: orrs.w r7, r2, lr
-; THUMBV7-NEXT: ldr r2, [sp, #92]
+; THUMBV7-NEXT: cmp r3, #0
+; THUMBV7-NEXT: mov r0, lr
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r7, #1
-; THUMBV7-NEXT: cmp r0, #0
+; THUMBV7-NEXT: movne r2, #1
+; THUMBV7-NEXT: cmp.w lr, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r0, #1
-; THUMBV7-NEXT: cmp r2, #0
-; THUMBV7-NEXT: mov r4, r2
-; THUMBV7-NEXT: mov r8, r2
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r4, #1
-; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV7-NEXT: ands r0, r4
-; THUMBV7-NEXT: movs r4, #0
-; THUMBV7-NEXT: adds r5, r5, r2
-; THUMBV7-NEXT: str.w r5, [r9, #4]
-; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; THUMBV7-NEXT: and.w r5, r10, r7
-; THUMBV7-NEXT: orr.w r0, r0, r12
-; THUMBV7-NEXT: mov.w r12, #0
-; THUMBV7-NEXT: add r1, r2
-; THUMBV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r2, r6
-; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT: adc r7, r4, #0
-; THUMBV7-NEXT: adds r1, r1, r6
-; THUMBV7-NEXT: umlal r2, r7, r3, r8
-; THUMBV7-NEXT: adc r4, r4, #0
-; THUMBV7-NEXT: orrs r0, r4
-; THUMBV7-NEXT: orrs r0, r5
-; THUMBV7-NEXT: ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload
+; THUMBV7-NEXT: ldr r4, [sp, #28] @ 4-byte Reload
+; THUMBV7-NEXT: ands r0, r2
+; THUMBV7-NEXT: orrs r1, r0
; THUMBV7-NEXT: adds r5, r5, r4
-; THUMBV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r1, r4
-; THUMBV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
-; THUMBV7-NEXT: cmp r4, #0
+; THUMBV7-NEXT: str.w r5, [r11, #4]
+; THUMBV7-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV7-NEXT: mov.w r5, #0
+; THUMBV7-NEXT: adcs.w r0, r0, r12
+; THUMBV7-NEXT: adc r2, r5, #0
+; THUMBV7-NEXT: cmp r6, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r4, #1
-; THUMBV7-NEXT: cmp r3, #0
+; THUMBV7-NEXT: movne r6, #1
+; THUMBV7-NEXT: orrs r1, r6
+; THUMBV7-NEXT: ldr r6, [sp, #84]
+; THUMBV7-NEXT: umlal r0, r2, r3, r7
+; THUMBV7-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
+; THUMBV7-NEXT: cmp r7, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r7, #1
+; THUMBV7-NEXT: orrs r1, r3
+; THUMBV7-NEXT: mov r3, r6
+; THUMBV7-NEXT: cmp r6, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
-; THUMBV7-NEXT: cmp.w lr, #0
+; THUMBV7-NEXT: cmp.w r8, #0
+; THUMBV7-NEXT: and.w r3, r3, r7
+; THUMBV7-NEXT: ldr r7, [sp, #80]
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: cmp.w r11, #0
+; THUMBV7-NEXT: movne.w r8, #1
+; THUMBV7-NEXT: cmp.w r9, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne.w r9, #1
+; THUMBV7-NEXT: orrs r7, r6
+; THUMBV7-NEXT: ldr r6, [sp, #96]
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r7, #1
+; THUMBV7-NEXT: orr.w r3, r3, r8
+; THUMBV7-NEXT: orrs.w r6, r6, lr
+; THUMBV7-NEXT: orr.w r3, r3, r9
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r11, #1
-; THUMBV7-NEXT: adds r2, r2, r5
-; THUMBV7-NEXT: and.w r3, r3, lr
-; THUMBV7-NEXT: str.w r2, [r9, #8]
-; THUMBV7-NEXT: adcs r1, r7
-; THUMBV7-NEXT: str.w r1, [r9, #12]
-; THUMBV7-NEXT: orr.w r1, r3, r11
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT: orr.w r1, r1, r4
-; THUMBV7-NEXT: orr.w r1, r1, r2
+; THUMBV7-NEXT: movne r6, #1
+; THUMBV7-NEXT: adds.w r0, r0, r10
+; THUMBV7-NEXT: str.w r0, [r11, #8]
+; THUMBV7-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT: adcs r0, r2
+; THUMBV7-NEXT: str.w r0, [r11, #12]
+; THUMBV7-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV7-NEXT: and.w r2, r7, r6
+; THUMBV7-NEXT: orr.w r0, r0, r3
+; THUMBV7-NEXT: orr.w r0, r0, r2
; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: adc r1, r12, #0
+; THUMBV7-NEXT: adc r1, r5, #0
; THUMBV7-NEXT: orrs r0, r1
; THUMBV7-NEXT: and r0, r0, #1
-; THUMBV7-NEXT: strb.w r0, [r9, #16]
+; THUMBV7-NEXT: strb.w r0, [r11, #16]
; THUMBV7-NEXT: add sp, #44
; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index 55e917159fce9b..161adf7e7d7639 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -4,32 +4,33 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-LABEL: mulodi_test:
; THUMBV7: @ %bb.0: @ %start
-; THUMBV7-NEXT: .save {r4, r5, r7, lr}
-; THUMBV7-NEXT: push {r4, r5, r7, lr}
+; THUMBV7-NEXT: .save {r4, r5, r6, lr}
+; THUMBV7-NEXT: push {r4, r5, r6, lr}
; THUMBV7-NEXT: umull r12, lr, r3, r0
+; THUMBV7-NEXT: movs r6, #0
+; THUMBV7-NEXT: umull r4, r5, r1, r2
+; THUMBV7-NEXT: umull r0, r2, r0, r2
+; THUMBV7-NEXT: add r4, r12
+; THUMBV7-NEXT: adds.w r12, r2, r4
+; THUMBV7-NEXT: adc r2, r6, #0
; THUMBV7-NEXT: cmp r3, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: umull r0, r4, r0, r2
-; THUMBV7-NEXT: umull r2, r5, r1, r2
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
-; THUMBV7-NEXT: ands r1, r3
; THUMBV7-NEXT: cmp r5, #0
+; THUMBV7-NEXT: and.w r1, r1, r3
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r5, #1
; THUMBV7-NEXT: orrs r1, r5
; THUMBV7-NEXT: cmp.w lr, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: orr.w r3, r1, lr
-; THUMBV7-NEXT: add.w r1, r2, r12
-; THUMBV7-NEXT: movs r2, #0
-; THUMBV7-NEXT: adds r1, r1, r4
-; THUMBV7-NEXT: adc r2, r2, #0
-; THUMBV7-NEXT: orrs r2, r3
-; THUMBV7-NEXT: pop {r4, r5, r7, pc}
+; THUMBV7-NEXT: orr.w r1, r1, lr
+; THUMBV7-NEXT: orrs r2, r1
+; THUMBV7-NEXT: mov r1, r12
+; THUMBV7-NEXT: pop {r4, r5, r6, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
%1 = extractvalue { i64, i1 } %0, 0
More information about the llvm-commits
mailing list