[llvm] Port foldCSelOfCSel to ARM (PR #160915)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 09:05:00 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/160915
>From d43030f1919a55c5895e3645c3397bed3920c6dc Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Fri, 26 Sep 2025 10:31:25 -0400
Subject: [PATCH 1/2] [ARM] Have knownbits for CMOV match that of CSEL in
AArch64
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9a247bb5a83d9..d870d4c55714a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -19951,14 +19951,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
case ARMISD::CMOV: {
- // Bits are known zero/one if known on the LHS and RHS.
- Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
- if (Known.isUnknown())
- return;
-
- KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
- Known = Known.intersectWith(KnownRHS);
- return;
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = Known.intersectWith(Known2);
+ break;
}
case ISD::INTRINSIC_W_CHAIN: {
Intrinsic::ID IntID =
>From 2e50683502a62c680bd981a9d8c025f9cc6b227e Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Fri, 26 Sep 2025 11:35:14 -0400
Subject: [PATCH 2/2] Port foldCSelOfCSel to ARM
There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 66 +
llvm/test/CodeGen/ARM/addsubo-legalization.ll | 28 +-
.../ARM/atomicrmw_exclusive_monitor_ints.ll | 450 ++--
llvm/test/CodeGen/ARM/consthoist-icmpimm.ll | 86 +-
llvm/test/CodeGen/ARM/fpclamptosat.ll | 1295 +++++----
llvm/test/CodeGen/ARM/fpclamptosat_vec.ll | 2363 ++++++++---------
llvm/test/CodeGen/ARM/neon_vabd.ll | 42 +-
llvm/test/CodeGen/ARM/smml.ll | 28 +-
llvm/test/CodeGen/ARM/vector-trunc.ll | 6 +-
llvm/test/CodeGen/ARM/vselect_imax.ll | 454 ++--
llvm/test/CodeGen/ARM/wide-compares.ll | 10 +-
11 files changed, 2307 insertions(+), 2521 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d870d4c55714a..352afe7a0b145 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18371,9 +18371,75 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
return SDValue();
}
+static SDValue foldCMOVOfCMOV(SDNode *Op, SelectionDAG &DAG) {
+ // (cmov A, B, CC1, (cmov C, D, CC2, Flags))
+ // -> (cmov (cmov A, B, CC1, C), (cmov A, B, CC1, D), CC2, Flags)
+ SDValue L = Op->getOperand(0);
+ SDValue R = Op->getOperand(1);
+ ARMCC::CondCodes OpCC =
+ static_cast<ARMCC::CondCodes>(Op->getConstantOperandVal(2));
+
+ SDValue OpCmp = Op->getOperand(3);
+ if (OpCmp.getOpcode() != ARMISD::CMPZ && OpCmp.getOpcode() != ARMISD::CMP)
+ // Only looking at EQ and NE cases.
+ return SDValue();
+
+ SDValue CmpLHS = OpCmp.getOperand(0);
+ SDValue CmpRHS = OpCmp.getOperand(1);
+
+ if (CmpRHS.getOpcode() == ARMISD::CMOV)
+ std::swap(CmpLHS, CmpRHS);
+ else if (CmpLHS.getOpcode() != ARMISD::CMOV)
+ return SDValue();
+
+ SDValue X = CmpLHS->getOperand(0);
+ SDValue Y = CmpLHS->getOperand(1);
+ if (!isa<ConstantSDNode>(X) || !isa<ConstantSDNode>(Y) || X == Y)
+ return SDValue();
+
+ ConstantSDNode *CX = cast<ConstantSDNode>(X);
+ ConstantSDNode *CY = cast<ConstantSDNode>(Y);
+ if (CX->getAPIntValue() == CY->getAPIntValue())
+ return SDValue();
+
+ ARMCC::CondCodes CC =
+ static_cast<ARMCC::CondCodes>(CmpLHS->getConstantOperandVal(2));
+ SDValue CondFlags = CmpLHS->getOperand(3);
+
+ if (CmpRHS == Y) {
+ // If the compare uses the second constant, flip the condition.
+ // VERIFY: getOppositeCondition does the same flip as AArch64's
+ // getInvertedCondCode.
+ CC = ARMCC::getOppositeCondition(CC);
+ } else if (CmpRHS != X) {
+ return SDValue();
+ }
+
+ if (OpCC == ARMCC::NE) {
+ // Outer NE inverts the sense.
+ CC = ARMCC::getOppositeCondition(CC);
+ } else if (OpCC != ARMCC::EQ) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ EVT VT = Op->getValueType(0);
+ // CMOV takes (falseVal, trueVal, CC, Flags). To match (CSEL L,R,CC), pass
+ // (R,L).
+ SDValue CCValue = DAG.getConstant(CC, DL, FlagsVT);
+ return DAG.getNode(ARMISD::CMOV, DL, VT, R, L, CCValue, CondFlags);
+}
+
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+ // CMOV x, x, cc -> x
+ if (N->getOperand(0) == N->getOperand(1))
+ return N->getOperand(0);
+
+ if (SDValue R = foldCMOVOfCMOV(N, DAG))
+ return R;
+
SDValue Cmp = N->getOperand(3);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at EQ and NE cases.
diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
index 5ebb115791c66..dbda2c78d8111 100644
--- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll
+++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
@@ -20,19 +20,17 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r4, r5, d17
; CHECK-NEXT: subs.w r3, lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r2, #-1
-; CHECK-NEXT: subs r3, r4, r6
-; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r2, r3
+; CHECK-NEXT: subs r6, r4, r6
+; CHECK-NEXT: sbcs.w r7, r5, r7
+; CHECK-NEXT: itt lo
; CHECK-NEXT: movlo r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: movlo r1, r3
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
@@ -59,19 +57,17 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r6, r7, d17
; CHECK-NEXT: subs.w r3, lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r2, #-1
-; CHECK-NEXT: subs r3, r4, r6
-; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r2, r3
+; CHECK-NEXT: subs r6, r4, r6
+; CHECK-NEXT: sbcs.w r7, r5, r7
+; CHECK-NEXT: itt lo
; CHECK-NEXT: movlo r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: movlo r1, r3
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index f633315822cc3..72ea6994be33b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -8339,12 +8339,12 @@ define i64 @test_max_i64() {
; CHECK-ARM8-NEXT: mov r9, r1
; CHECK-ARM8-NEXT: rsbs r0, r2, #1
; CHECK-ARM8-NEXT: rscs r0, r1, #0
-; CHECK-ARM8-NEXT: mov r0, #0
-; CHECK-ARM8-NEXT: movwlt r0, #1
+; CHECK-ARM8-NEXT: mov r3, #0
+; CHECK-ARM8-NEXT: movwlt r3, #1
+; CHECK-ARM8-NEXT: mov r0, r1
+; CHECK-ARM8-NEXT: movge r0, r3
; CHECK-ARM8-NEXT: mov r10, #1
; CHECK-ARM8-NEXT: movlt r10, r2
-; CHECK-ARM8-NEXT: cmp r0, #0
-; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM8-NEXT: mov r11, r0
; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64
@@ -8406,12 +8406,12 @@ define i64 @test_max_i64() {
; CHECK-ARM6-NEXT: mov r9, r1
; CHECK-ARM6-NEXT: rsbs r0, r2, #1
; CHECK-ARM6-NEXT: rscs r0, r1, #0
-; CHECK-ARM6-NEXT: mov r0, #0
-; CHECK-ARM6-NEXT: movlt r0, #1
+; CHECK-ARM6-NEXT: mov r3, #0
+; CHECK-ARM6-NEXT: movlt r3, #1
+; CHECK-ARM6-NEXT: mov r0, r1
+; CHECK-ARM6-NEXT: movge r0, r3
; CHECK-ARM6-NEXT: mov r10, #1
; CHECK-ARM6-NEXT: movlt r10, r2
-; CHECK-ARM6-NEXT: cmp r0, #0
-; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM6-NEXT: mov r11, r0
; CHECK-ARM6-NEXT: ldr r6, .LCPI40_0
@@ -8474,18 +8474,18 @@ define i64 @test_max_i64() {
; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-THUMB7-NEXT: rsbs.w r0, r2, #1
-; CHECK-THUMB7-NEXT: mov.w r0, #0
-; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1
+; CHECK-THUMB7-NEXT: mov.w r3, #0
+; CHECK-THUMB7-NEXT: sbcs.w r0, r3, r1
; CHECK-THUMB7-NEXT: it lt
-; CHECK-THUMB7-NEXT: movlt r0, #1
+; CHECK-THUMB7-NEXT: movlt r3, #1
; CHECK-THUMB7-NEXT: mov r8, r2
; CHECK-THUMB7-NEXT: mov r9, r1
+; CHECK-THUMB7-NEXT: mov r0, r1
+; CHECK-THUMB7-NEXT: it ge
+; CHECK-THUMB7-NEXT: movge r0, r3
; CHECK-THUMB7-NEXT: mov.w r10, #1
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r10, r2
-; CHECK-THUMB7-NEXT: cmp r0, #0
-; CHECK-THUMB7-NEXT: it ne
-; CHECK-THUMB7-NEXT: movne r0, r1
; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-THUMB7-NEXT: mov r11, r0
; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64
@@ -8545,85 +8545,82 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #64
+; CHECK-THUMB8BASE-NEXT: sub sp, #64
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
; CHECK-THUMB8BASE-NEXT: ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: b .LBB40_1
; CHECK-THUMB8BASE-NEXT: .LBB40_1: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r1, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r0, #1
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #40] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: subs r3, r0, r3
; CHECK-THUMB8BASE-NEXT: sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: blt .LBB40_3
; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_3: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blt .LBB40_5
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: bge .LBB40_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_5: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB40_7
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: blt .LBB40_7
; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #56
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: beq .LBB40_1
; CHECK-THUMB8BASE-NEXT: b .LBB40_8
; CHECK-THUMB8BASE-NEXT: .LBB40_8: @ %atomicrmw.end
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: add sp, #64
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw max ptr @atomic_i64, i64 1 monotonic
@@ -8652,12 +8649,12 @@ define i64 @test_min_i64() {
; CHECK-ARM8-NEXT: mov r9, r1
; CHECK-ARM8-NEXT: subs r0, r2, #2
; CHECK-ARM8-NEXT: sbcs r0, r1, #0
-; CHECK-ARM8-NEXT: mov r0, #0
-; CHECK-ARM8-NEXT: movwlt r0, #1
+; CHECK-ARM8-NEXT: mov r3, #0
+; CHECK-ARM8-NEXT: movwlt r3, #1
+; CHECK-ARM8-NEXT: mov r0, r1
+; CHECK-ARM8-NEXT: movge r0, r3
; CHECK-ARM8-NEXT: mov r10, #1
; CHECK-ARM8-NEXT: movlt r10, r2
-; CHECK-ARM8-NEXT: cmp r0, #0
-; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM8-NEXT: mov r11, r0
; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64
@@ -8719,12 +8716,12 @@ define i64 @test_min_i64() {
; CHECK-ARM6-NEXT: mov r9, r1
; CHECK-ARM6-NEXT: subs r0, r2, #2
; CHECK-ARM6-NEXT: sbcs r0, r1, #0
-; CHECK-ARM6-NEXT: mov r0, #0
-; CHECK-ARM6-NEXT: movlt r0, #1
+; CHECK-ARM6-NEXT: mov r3, #0
+; CHECK-ARM6-NEXT: movlt r3, #1
+; CHECK-ARM6-NEXT: mov r0, r1
+; CHECK-ARM6-NEXT: movge r0, r3
; CHECK-ARM6-NEXT: mov r10, #1
; CHECK-ARM6-NEXT: movlt r10, r2
-; CHECK-ARM6-NEXT: cmp r0, #0
-; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM6-NEXT: mov r11, r0
; CHECK-ARM6-NEXT: ldr r6, .LCPI41_0
@@ -8790,15 +8787,14 @@ define i64 @test_min_i64() {
; CHECK-THUMB7-NEXT: mov r9, r1
; CHECK-THUMB7-NEXT: subs r0, r2, #2
; CHECK-THUMB7-NEXT: sbcs r0, r1, #0
-; CHECK-THUMB7-NEXT: mov.w r0, #0
-; CHECK-THUMB7-NEXT: it lt
-; CHECK-THUMB7-NEXT: movlt r0, #1
+; CHECK-THUMB7-NEXT: mov.w r3, #0
+; CHECK-THUMB7-NEXT: mov r0, r1
+; CHECK-THUMB7-NEXT: ite lt
+; CHECK-THUMB7-NEXT: movlt r3, #1
+; CHECK-THUMB7-NEXT: movge r0, r3
; CHECK-THUMB7-NEXT: mov.w r10, #1
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r10, r2
-; CHECK-THUMB7-NEXT: cmp r0, #0
-; CHECK-THUMB7-NEXT: it ne
-; CHECK-THUMB7-NEXT: movne r0, r1
; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-THUMB7-NEXT: mov r11, r0
; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64
@@ -8858,85 +8854,82 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #64
+; CHECK-THUMB8BASE-NEXT: sub sp, #64
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
; CHECK-THUMB8BASE-NEXT: ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: b .LBB41_1
; CHECK-THUMB8BASE-NEXT: .LBB41_1: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r0, #1
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r2, #0
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #40] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: subs r3, r3, #2
; CHECK-THUMB8BASE-NEXT: sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: blt .LBB41_3
; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB41_3: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blt .LBB41_5
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: bge .LBB41_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB41_5: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB41_7
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: blt .LBB41_7
; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB41_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #56
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: beq .LBB41_1
; CHECK-THUMB8BASE-NEXT: b .LBB41_8
; CHECK-THUMB8BASE-NEXT: .LBB41_8: @ %atomicrmw.end
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: add sp, #64
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw min ptr @atomic_i64, i64 1 monotonic
@@ -8965,12 +8958,12 @@ define i64 @test_umax_i64() {
; CHECK-ARM8-NEXT: mov r9, r1
; CHECK-ARM8-NEXT: rsbs r0, r2, #1
; CHECK-ARM8-NEXT: rscs r0, r1, #0
-; CHECK-ARM8-NEXT: mov r0, #0
-; CHECK-ARM8-NEXT: movwlo r0, #1
+; CHECK-ARM8-NEXT: mov r3, #0
+; CHECK-ARM8-NEXT: movwlo r3, #1
+; CHECK-ARM8-NEXT: mov r0, r1
+; CHECK-ARM8-NEXT: movhs r0, r3
; CHECK-ARM8-NEXT: mov r10, #1
; CHECK-ARM8-NEXT: movlo r10, r2
-; CHECK-ARM8-NEXT: cmp r0, #0
-; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM8-NEXT: mov r11, r0
; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64
@@ -9032,12 +9025,12 @@ define i64 @test_umax_i64() {
; CHECK-ARM6-NEXT: mov r9, r1
; CHECK-ARM6-NEXT: rsbs r0, r2, #1
; CHECK-ARM6-NEXT: rscs r0, r1, #0
-; CHECK-ARM6-NEXT: mov r0, #0
-; CHECK-ARM6-NEXT: movlo r0, #1
+; CHECK-ARM6-NEXT: mov r3, #0
+; CHECK-ARM6-NEXT: movlo r3, #1
+; CHECK-ARM6-NEXT: mov r0, r1
+; CHECK-ARM6-NEXT: movhs r0, r3
; CHECK-ARM6-NEXT: mov r10, #1
; CHECK-ARM6-NEXT: movlo r10, r2
-; CHECK-ARM6-NEXT: cmp r0, #0
-; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM6-NEXT: mov r11, r0
; CHECK-ARM6-NEXT: ldr r6, .LCPI42_0
@@ -9100,18 +9093,18 @@ define i64 @test_umax_i64() {
; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-THUMB7-NEXT: rsbs.w r0, r2, #1
-; CHECK-THUMB7-NEXT: mov.w r0, #0
-; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1
+; CHECK-THUMB7-NEXT: mov.w r3, #0
+; CHECK-THUMB7-NEXT: sbcs.w r0, r3, r1
; CHECK-THUMB7-NEXT: it lo
-; CHECK-THUMB7-NEXT: movlo r0, #1
+; CHECK-THUMB7-NEXT: movlo r3, #1
; CHECK-THUMB7-NEXT: mov r8, r2
; CHECK-THUMB7-NEXT: mov r9, r1
+; CHECK-THUMB7-NEXT: mov r0, r1
+; CHECK-THUMB7-NEXT: it hs
+; CHECK-THUMB7-NEXT: movhs r0, r3
; CHECK-THUMB7-NEXT: mov.w r10, #1
; CHECK-THUMB7-NEXT: it lo
; CHECK-THUMB7-NEXT: movlo r10, r2
-; CHECK-THUMB7-NEXT: cmp r0, #0
-; CHECK-THUMB7-NEXT: it ne
-; CHECK-THUMB7-NEXT: movne r0, r1
; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-THUMB7-NEXT: mov r11, r0
; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64
@@ -9171,85 +9164,82 @@ define i64 @test_umax_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #64
+; CHECK-THUMB8BASE-NEXT: sub sp, #64
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
; CHECK-THUMB8BASE-NEXT: ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: b .LBB42_1
; CHECK-THUMB8BASE-NEXT: .LBB42_1: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r1, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r0, #1
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #40] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: subs r3, r0, r3
; CHECK-THUMB8BASE-NEXT: sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: blo .LBB42_3
; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB42_3: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blo .LBB42_5
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: bhs .LBB42_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB42_5: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB42_7
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: blo .LBB42_7
; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB42_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #56
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: beq .LBB42_1
; CHECK-THUMB8BASE-NEXT: b .LBB42_8
; CHECK-THUMB8BASE-NEXT: .LBB42_8: @ %atomicrmw.end
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: add sp, #64
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw umax ptr @atomic_i64, i64 1 monotonic
@@ -9278,12 +9268,12 @@ define i64 @test_umin_i64() {
; CHECK-ARM8-NEXT: mov r9, r1
; CHECK-ARM8-NEXT: subs r0, r2, #2
; CHECK-ARM8-NEXT: sbcs r0, r1, #0
-; CHECK-ARM8-NEXT: mov r0, #0
-; CHECK-ARM8-NEXT: movwlo r0, #1
+; CHECK-ARM8-NEXT: mov r3, #0
+; CHECK-ARM8-NEXT: movwlo r3, #1
+; CHECK-ARM8-NEXT: mov r0, r1
+; CHECK-ARM8-NEXT: movhs r0, r3
; CHECK-ARM8-NEXT: mov r10, #1
; CHECK-ARM8-NEXT: movlo r10, r2
-; CHECK-ARM8-NEXT: cmp r0, #0
-; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM8-NEXT: mov r11, r0
; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64
@@ -9345,12 +9335,12 @@ define i64 @test_umin_i64() {
; CHECK-ARM6-NEXT: mov r9, r1
; CHECK-ARM6-NEXT: subs r0, r2, #2
; CHECK-ARM6-NEXT: sbcs r0, r1, #0
-; CHECK-ARM6-NEXT: mov r0, #0
-; CHECK-ARM6-NEXT: movlo r0, #1
+; CHECK-ARM6-NEXT: mov r3, #0
+; CHECK-ARM6-NEXT: movlo r3, #1
+; CHECK-ARM6-NEXT: mov r0, r1
+; CHECK-ARM6-NEXT: movhs r0, r3
; CHECK-ARM6-NEXT: mov r10, #1
; CHECK-ARM6-NEXT: movlo r10, r2
-; CHECK-ARM6-NEXT: cmp r0, #0
-; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM6-NEXT: mov r11, r0
; CHECK-ARM6-NEXT: ldr r6, .LCPI43_0
@@ -9416,15 +9406,14 @@ define i64 @test_umin_i64() {
; CHECK-THUMB7-NEXT: mov r9, r1
; CHECK-THUMB7-NEXT: subs r0, r2, #2
; CHECK-THUMB7-NEXT: sbcs r0, r1, #0
-; CHECK-THUMB7-NEXT: mov.w r0, #0
-; CHECK-THUMB7-NEXT: it lo
-; CHECK-THUMB7-NEXT: movlo r0, #1
+; CHECK-THUMB7-NEXT: mov.w r3, #0
+; CHECK-THUMB7-NEXT: mov r0, r1
+; CHECK-THUMB7-NEXT: ite lo
+; CHECK-THUMB7-NEXT: movlo r3, #1
+; CHECK-THUMB7-NEXT: movhs r0, r3
; CHECK-THUMB7-NEXT: mov.w r10, #1
; CHECK-THUMB7-NEXT: it lo
; CHECK-THUMB7-NEXT: movlo r10, r2
-; CHECK-THUMB7-NEXT: cmp r0, #0
-; CHECK-THUMB7-NEXT: it ne
-; CHECK-THUMB7-NEXT: movne r0, r1
; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-THUMB7-NEXT: mov r11, r0
; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64
@@ -9484,85 +9473,82 @@ define i64 @test_umin_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #64
+; CHECK-THUMB8BASE-NEXT: sub sp, #64
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
; CHECK-THUMB8BASE-NEXT: ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: b .LBB43_1
; CHECK-THUMB8BASE-NEXT: .LBB43_1: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r0, #1
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r2, #0
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #40] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: subs r3, r3, #2
; CHECK-THUMB8BASE-NEXT: sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: blo .LBB43_3
; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB43_3: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blo .LBB43_5
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: bhs .LBB43_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB43_5: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB43_7
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: blo .LBB43_7
; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB43_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #56
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: beq .LBB43_1
; CHECK-THUMB8BASE-NEXT: b .LBB43_8
; CHECK-THUMB8BASE-NEXT: .LBB43_8: @ %atomicrmw.end
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: add sp, #64
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw umin ptr @atomic_i64, i64 1 monotonic
diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
index 16b7403bdb932..3be25dc2c3e77 100644
--- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
+++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
@@ -473,21 +473,21 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: ldr r0, [sp, #28]
; CHECKV6M-NEXT: lsls r0, r0, #31
; CHECKV6M-NEXT: ldr r6, .LCPI5_0
-; CHECKV6M-NEXT: ldr r5, [sp, #24]
-; CHECKV6M-NEXT: ldr r0, [sp, #20]
+; CHECKV6M-NEXT: ldr r0, [sp, #24]
+; CHECKV6M-NEXT: ldr r5, [sp, #20]
; CHECKV6M-NEXT: beq .LBB5_6
; CHECKV6M-NEXT: @ %bb.1: @ %then
; CHECKV6M-NEXT: movs r7, #0
; CHECKV6M-NEXT: subs r2, r2, r6
; CHECKV6M-NEXT: sbcs r3, r7
; CHECKV6M-NEXT: mov r2, r0
-; CHECKV6M-NEXT: blo .LBB5_3
+; CHECKV6M-NEXT: bhs .LBB5_3
; CHECKV6M-NEXT: @ %bb.2: @ %then
; CHECKV6M-NEXT: mov r2, r5
; CHECKV6M-NEXT: .LBB5_3: @ %then
; CHECKV6M-NEXT: subs r3, r4, r6
; CHECKV6M-NEXT: sbcs r1, r7
-; CHECKV6M-NEXT: blo .LBB5_5
+; CHECKV6M-NEXT: bhs .LBB5_5
; CHECKV6M-NEXT: @ %bb.4: @ %then
; CHECKV6M-NEXT: mov r0, r5
; CHECKV6M-NEXT: .LBB5_5: @ %then
@@ -497,7 +497,7 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: movs r1, #0
; CHECKV6M-NEXT: subs r2, r2, r6
; CHECKV6M-NEXT: sbcs r3, r1
-; CHECKV6M-NEXT: blo .LBB5_8
+; CHECKV6M-NEXT: bhs .LBB5_8
; CHECKV6M-NEXT: @ %bb.7: @ %else
; CHECKV6M-NEXT: mov r0, r5
; CHECKV6M-NEXT: .LBB5_8: @ %else
@@ -516,25 +516,25 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7M-NEXT: movs r4, #1
; CHECKV7M-NEXT: movt r4, #2
; CHECKV7M-NEXT: lsls r0, r0, #31
-; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8]
+; CHECKV7M-NEXT: ldrd r0, lr, [sp, #8]
; CHECKV7M-NEXT: beq .LBB5_2
; CHECKV7M-NEXT: @ %bb.1: @ %then
; CHECKV7M-NEXT: subs r2, r2, r4
; CHECKV7M-NEXT: sbcs r2, r3, #0
; CHECKV7M-NEXT: mov r2, r0
-; CHECKV7M-NEXT: it lo
-; CHECKV7M-NEXT: movlo r2, lr
+; CHECKV7M-NEXT: it hs
+; CHECKV7M-NEXT: movhs r2, lr
; CHECKV7M-NEXT: subs.w r3, r12, r4
; CHECKV7M-NEXT: sbcs r1, r1, #0
-; CHECKV7M-NEXT: it lo
-; CHECKV7M-NEXT: movlo r0, lr
+; CHECKV7M-NEXT: it hs
+; CHECKV7M-NEXT: movhs r0, lr
; CHECKV7M-NEXT: add r0, r2
; CHECKV7M-NEXT: pop {r4, pc}
; CHECKV7M-NEXT: .LBB5_2: @ %else
; CHECKV7M-NEXT: subs r1, r2, r4
; CHECKV7M-NEXT: sbcs r1, r3, #0
-; CHECKV7M-NEXT: it lo
-; CHECKV7M-NEXT: movlo r0, lr
+; CHECKV7M-NEXT: it hs
+; CHECKV7M-NEXT: movhs r0, lr
; CHECKV7M-NEXT: pop {r4, pc}
;
; CHECKV7A-LABEL: icmp64_ule_m1:
@@ -543,7 +543,7 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: push {r4, lr}
; CHECKV7A-NEXT: ldr r4, [sp, #16]
; CHECKV7A-NEXT: mov r12, r0
-; CHECKV7A-NEXT: ldrd lr, r0, [sp, #8]
+; CHECKV7A-NEXT: ldrd r0, lr, [sp, #8]
; CHECKV7A-NEXT: lsls r4, r4, #31
; CHECKV7A-NEXT: movw r4, #1
; CHECKV7A-NEXT: movt r4, #2
@@ -552,19 +552,19 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: subs r2, r2, r4
; CHECKV7A-NEXT: sbcs r2, r3, #0
; CHECKV7A-NEXT: mov r2, r0
-; CHECKV7A-NEXT: it lo
-; CHECKV7A-NEXT: movlo r2, lr
+; CHECKV7A-NEXT: it hs
+; CHECKV7A-NEXT: movhs r2, lr
; CHECKV7A-NEXT: subs.w r3, r12, r4
; CHECKV7A-NEXT: sbcs r1, r1, #0
-; CHECKV7A-NEXT: it lo
-; CHECKV7A-NEXT: movlo r0, lr
+; CHECKV7A-NEXT: it hs
+; CHECKV7A-NEXT: movhs r0, lr
; CHECKV7A-NEXT: add r0, r2
; CHECKV7A-NEXT: pop {r4, pc}
; CHECKV7A-NEXT: .LBB5_2: @ %else
; CHECKV7A-NEXT: subs r1, r2, r4
; CHECKV7A-NEXT: sbcs r1, r3, #0
-; CHECKV7A-NEXT: it lo
-; CHECKV7A-NEXT: movlo r0, lr
+; CHECKV7A-NEXT: it hs
+; CHECKV7A-NEXT: movhs r0, lr
; CHECKV7A-NEXT: pop {r4, pc}
br i1 %c, label %then, label %else
then:
@@ -590,21 +590,21 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: lsls r6, r0, #17
; CHECKV6M-NEXT: ldr r0, [sp, #28]
; CHECKV6M-NEXT: lsls r0, r0, #31
-; CHECKV6M-NEXT: ldr r5, [sp, #24]
-; CHECKV6M-NEXT: ldr r0, [sp, #20]
+; CHECKV6M-NEXT: ldr r0, [sp, #24]
+; CHECKV6M-NEXT: ldr r5, [sp, #20]
; CHECKV6M-NEXT: beq .LBB6_6
; CHECKV6M-NEXT: @ %bb.1: @ %then
; CHECKV6M-NEXT: movs r7, #0
; CHECKV6M-NEXT: subs r2, r2, r6
; CHECKV6M-NEXT: sbcs r3, r7
; CHECKV6M-NEXT: mov r2, r0
-; CHECKV6M-NEXT: bhs .LBB6_3
+; CHECKV6M-NEXT: blo .LBB6_3
; CHECKV6M-NEXT: @ %bb.2: @ %then
; CHECKV6M-NEXT: mov r2, r5
; CHECKV6M-NEXT: .LBB6_3: @ %then
; CHECKV6M-NEXT: subs r3, r4, r6
; CHECKV6M-NEXT: sbcs r1, r7
-; CHECKV6M-NEXT: bhs .LBB6_5
+; CHECKV6M-NEXT: blo .LBB6_5
; CHECKV6M-NEXT: @ %bb.4: @ %then
; CHECKV6M-NEXT: mov r0, r5
; CHECKV6M-NEXT: .LBB6_5: @ %then
@@ -614,7 +614,7 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: movs r1, #0
; CHECKV6M-NEXT: subs r2, r2, r6
; CHECKV6M-NEXT: sbcs r3, r1
-; CHECKV6M-NEXT: bhs .LBB6_8
+; CHECKV6M-NEXT: blo .LBB6_8
; CHECKV6M-NEXT: @ %bb.7: @ %else
; CHECKV6M-NEXT: mov r0, r5
; CHECKV6M-NEXT: .LBB6_8: @ %else
@@ -692,8 +692,8 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: ldr r0, [sp, #28]
; CHECKV6M-NEXT: lsls r0, r0, #31
; CHECKV6M-NEXT: ldr r6, .LCPI7_0
-; CHECKV6M-NEXT: ldr r5, [sp, #24]
-; CHECKV6M-NEXT: ldr r0, [sp, #20]
+; CHECKV6M-NEXT: ldr r0, [sp, #24]
+; CHECKV6M-NEXT: ldr r5, [sp, #20]
; CHECKV6M-NEXT: beq .LBB7_6
; CHECKV6M-NEXT: @ %bb.1: @ %then
; CHECKV6M-NEXT: movs r7, #0
@@ -701,13 +701,13 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: mov r2, r7
; CHECKV6M-NEXT: sbcs r2, r3
; CHECKV6M-NEXT: mov r2, r0
-; CHECKV6M-NEXT: blo .LBB7_3
+; CHECKV6M-NEXT: bhs .LBB7_3
; CHECKV6M-NEXT: @ %bb.2: @ %then
; CHECKV6M-NEXT: mov r2, r5
; CHECKV6M-NEXT: .LBB7_3: @ %then
; CHECKV6M-NEXT: subs r3, r6, r4
; CHECKV6M-NEXT: sbcs r7, r1
-; CHECKV6M-NEXT: blo .LBB7_5
+; CHECKV6M-NEXT: bhs .LBB7_5
; CHECKV6M-NEXT: @ %bb.4: @ %then
; CHECKV6M-NEXT: mov r0, r5
; CHECKV6M-NEXT: .LBB7_5: @ %then
@@ -717,7 +717,7 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV6M-NEXT: movs r1, #0
; CHECKV6M-NEXT: subs r2, r6, r2
; CHECKV6M-NEXT: sbcs r1, r3
-; CHECKV6M-NEXT: blo .LBB7_8
+; CHECKV6M-NEXT: bhs .LBB7_8
; CHECKV6M-NEXT: @ %bb.7: @ %else
; CHECKV6M-NEXT: mov r0, r5
; CHECKV6M-NEXT: .LBB7_8: @ %else
@@ -736,27 +736,27 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7M-NEXT: movs r4, #1
; CHECKV7M-NEXT: movt r4, #2
; CHECKV7M-NEXT: lsls r0, r0, #31
-; CHECKV7M-NEXT: ldrd lr, r0, [sp, #16]
+; CHECKV7M-NEXT: ldrd r0, lr, [sp, #16]
; CHECKV7M-NEXT: beq .LBB7_2
; CHECKV7M-NEXT: @ %bb.1: @ %then
; CHECKV7M-NEXT: subs r2, r4, r2
; CHECKV7M-NEXT: mov.w r5, #0
; CHECKV7M-NEXT: sbcs.w r2, r5, r3
; CHECKV7M-NEXT: mov r2, r0
-; CHECKV7M-NEXT: it lo
-; CHECKV7M-NEXT: movlo r2, lr
+; CHECKV7M-NEXT: it hs
+; CHECKV7M-NEXT: movhs r2, lr
; CHECKV7M-NEXT: subs.w r3, r4, r12
; CHECKV7M-NEXT: sbcs.w r1, r5, r1
-; CHECKV7M-NEXT: it lo
-; CHECKV7M-NEXT: movlo r0, lr
+; CHECKV7M-NEXT: it hs
+; CHECKV7M-NEXT: movhs r0, lr
; CHECKV7M-NEXT: add r0, r2
; CHECKV7M-NEXT: pop {r4, r5, r7, pc}
; CHECKV7M-NEXT: .LBB7_2: @ %else
; CHECKV7M-NEXT: movs r1, #0
; CHECKV7M-NEXT: subs r2, r4, r2
; CHECKV7M-NEXT: sbcs r1, r3
-; CHECKV7M-NEXT: it lo
-; CHECKV7M-NEXT: movlo r0, lr
+; CHECKV7M-NEXT: it hs
+; CHECKV7M-NEXT: movhs r0, lr
; CHECKV7M-NEXT: pop {r4, r5, r7, pc}
;
; CHECKV7A-LABEL: icmp64_ugt_m1:
@@ -765,7 +765,7 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: push {r4, r5, r7, lr}
; CHECKV7A-NEXT: ldr r4, [sp, #24]
; CHECKV7A-NEXT: mov r12, r0
-; CHECKV7A-NEXT: ldrd lr, r0, [sp, #16]
+; CHECKV7A-NEXT: ldrd r0, lr, [sp, #16]
; CHECKV7A-NEXT: lsls r4, r4, #31
; CHECKV7A-NEXT: movw r4, #1
; CHECKV7A-NEXT: movt r4, #2
@@ -775,20 +775,20 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: mov.w r5, #0
; CHECKV7A-NEXT: sbcs.w r2, r5, r3
; CHECKV7A-NEXT: mov r2, r0
-; CHECKV7A-NEXT: it lo
-; CHECKV7A-NEXT: movlo r2, lr
+; CHECKV7A-NEXT: it hs
+; CHECKV7A-NEXT: movhs r2, lr
; CHECKV7A-NEXT: subs.w r3, r4, r12
; CHECKV7A-NEXT: sbcs.w r1, r5, r1
-; CHECKV7A-NEXT: it lo
-; CHECKV7A-NEXT: movlo r0, lr
+; CHECKV7A-NEXT: it hs
+; CHECKV7A-NEXT: movhs r0, lr
; CHECKV7A-NEXT: add r0, r2
; CHECKV7A-NEXT: pop {r4, r5, r7, pc}
; CHECKV7A-NEXT: .LBB7_2: @ %else
; CHECKV7A-NEXT: movs r1, #0
; CHECKV7A-NEXT: subs r2, r4, r2
; CHECKV7A-NEXT: sbcs r1, r3
-; CHECKV7A-NEXT: it lo
-; CHECKV7A-NEXT: movlo r0, lr
+; CHECKV7A-NEXT: it hs
+; CHECKV7A-NEXT: movhs r0, lr
; CHECKV7A-NEXT: pop {r4, r5, r7, pc}
br i1 %c, label %then, label %else
then:
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..7c61ab8a7a727 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -29,10 +29,11 @@ define i32 @stest_f64i32(double %x) {
; SOFT-NEXT: lsls r2, r2, #31
; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB0_6
+; SOFT-NEXT: bge .LBB0_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r2, r0
; SOFT-NEXT: .LBB0_6: @ %entry
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.7:
@@ -45,16 +46,14 @@ define i32 @stest_f64i32(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2lz
-; VFP2-NEXT: mvn r12, #-2147483648
-; VFP2-NEXT: subs.w r3, r0, r12
-; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: mvn r3, #-2147483648
+; VFP2-NEXT: movs r2, #0
+; VFP2-NEXT: subs r3, r0, r3
; VFP2-NEXT: sbcs r3, r1, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: itte lt
; VFP2-NEXT: movlt r2, #1
-; VFP2-NEXT: cmp r2, #0
-; VFP2-NEXT: ite ne
-; VFP2-NEXT: movne r2, r1
-; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: movlt r2, r1
+; VFP2-NEXT: mvnge r0, #-2147483648
; VFP2-NEXT: mov.w r1, #-1
; VFP2-NEXT: rsbs.w r3, r0, #-2147483648
; VFP2-NEXT: sbcs r1, r2
@@ -87,7 +86,7 @@ define i32 @utest_f64i32(double %x) {
; SOFT-NEXT: adds r3, r0, #1
; SOFT-NEXT: sbcs r1, r2
; SOFT-NEXT: blo .LBB1_2
-; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: @ %bb.1:
; SOFT-NEXT: mvns r0, r2
; SOFT-NEXT: .LBB1_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
@@ -123,34 +122,27 @@ define i32 @ustest_f64i32(double %x) {
; SOFT-NEXT: .save {r4, lr}
; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __aeabi_d2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: mvns r3, r2
-; SOFT-NEXT: adds r4, r0, #1
+; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: movs r0, #0
+; SOFT-NEXT: mvns r3, r0
+; SOFT-NEXT: adds r4, r2, #1
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
+; SOFT-NEXT: sbcs r4, r0
; SOFT-NEXT: blt .LBB2_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB2_2: @ %entry
; SOFT-NEXT: blt .LBB2_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: .LBB2_4: @ %entry
-; SOFT-NEXT: rsbs r3, r0, #0
-; SOFT-NEXT: mov r3, r2
+; SOFT-NEXT: rsbs r3, r2, #0
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB2_7
+; SOFT-NEXT: bge .LBB2_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB2_8
-; SOFT-NEXT: .LBB2_6: @ %entry
-; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB2_7:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB2_6
-; SOFT-NEXT: .LBB2_8: @ %entry
; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: .LBB2_6: @ %entry
; SOFT-NEXT: pop {r4, pc}
;
; VFP2-LABEL: ustest_f64i32:
@@ -163,19 +155,14 @@ define i32 @ustest_f64i32(double %x) {
; VFP2-NEXT: mov.w r2, #0
; VFP2-NEXT: sbcs r3, r1, #0
; VFP2-NEXT: mov.w r3, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: itte lt
; VFP2-NEXT: movlt r3, #1
-; VFP2-NEXT: cmp r3, #0
-; VFP2-NEXT: ite ne
-; VFP2-NEXT: movne r3, r1
-; VFP2-NEXT: moveq.w r0, #-1
+; VFP2-NEXT: movlt r3, r1
+; VFP2-NEXT: movge.w r0, #-1
; VFP2-NEXT: rsbs r1, r0, #0
; VFP2-NEXT: sbcs.w r1, r2, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r2, #1
-; VFP2-NEXT: cmp r2, #0
-; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r0, #0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i32:
@@ -217,10 +204,11 @@ define i32 @stest_f32i32(float %x) {
; SOFT-NEXT: lsls r2, r2, #31
; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB3_6
+; SOFT-NEXT: bge .LBB3_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r2, r0
; SOFT-NEXT: .LBB3_6: @ %entry
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.7:
@@ -252,7 +240,7 @@ define i32 @utest_f32i32(float %x) {
; SOFT-NEXT: adds r3, r0, #1
; SOFT-NEXT: sbcs r1, r2
; SOFT-NEXT: blo .LBB4_2
-; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: @ %bb.1:
; SOFT-NEXT: mvns r0, r2
; SOFT-NEXT: .LBB4_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
@@ -276,34 +264,27 @@ define i32 @ustest_f32i32(float %x) {
; SOFT-NEXT: .save {r4, lr}
; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: mvns r3, r2
-; SOFT-NEXT: adds r4, r0, #1
+; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: movs r0, #0
+; SOFT-NEXT: mvns r3, r0
+; SOFT-NEXT: adds r4, r2, #1
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
+; SOFT-NEXT: sbcs r4, r0
; SOFT-NEXT: blt .LBB5_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB5_2: @ %entry
; SOFT-NEXT: blt .LBB5_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: .LBB5_4: @ %entry
-; SOFT-NEXT: rsbs r3, r0, #0
-; SOFT-NEXT: mov r3, r2
+; SOFT-NEXT: rsbs r3, r2, #0
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB5_7
+; SOFT-NEXT: bge .LBB5_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB5_8
-; SOFT-NEXT: .LBB5_6: @ %entry
-; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB5_7:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB5_6
-; SOFT-NEXT: .LBB5_8: @ %entry
; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: .LBB5_6: @ %entry
; SOFT-NEXT: pop {r4, pc}
;
; VFP-LABEL: ustest_f32i32:
@@ -347,10 +328,11 @@ define i32 @stest_f16i32(half %x) {
; SOFT-NEXT: lsls r2, r2, #31
; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB6_6
+; SOFT-NEXT: bge .LBB6_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r2, r0
; SOFT-NEXT: .LBB6_6: @ %entry
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.7:
@@ -395,7 +377,7 @@ define i32 @utesth_f16i32(half %x) {
; SOFT-NEXT: adds r3, r0, #1
; SOFT-NEXT: sbcs r1, r2
; SOFT-NEXT: blo .LBB7_2
-; SOFT-NEXT: @ %bb.1: @ %entry
+; SOFT-NEXT: @ %bb.1:
; SOFT-NEXT: mvns r0, r2
; SOFT-NEXT: .LBB7_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
@@ -432,34 +414,27 @@ define i32 @ustest_f16i32(half %x) {
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __aeabi_f2lz
-; SOFT-NEXT: movs r2, #0
-; SOFT-NEXT: mvns r3, r2
-; SOFT-NEXT: adds r4, r0, #1
+; SOFT-NEXT: mov r2, r0
+; SOFT-NEXT: movs r0, #0
+; SOFT-NEXT: mvns r3, r0
+; SOFT-NEXT: adds r4, r2, #1
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r2
+; SOFT-NEXT: sbcs r4, r0
; SOFT-NEXT: blt .LBB8_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB8_2: @ %entry
; SOFT-NEXT: blt .LBB8_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: .LBB8_4: @ %entry
-; SOFT-NEXT: rsbs r3, r0, #0
-; SOFT-NEXT: mov r3, r2
+; SOFT-NEXT: rsbs r3, r2, #0
+; SOFT-NEXT: mov r3, r0
; SOFT-NEXT: sbcs r3, r1
-; SOFT-NEXT: blt .LBB8_7
+; SOFT-NEXT: bge .LBB8_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB8_8
-; SOFT-NEXT: .LBB8_6: @ %entry
-; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB8_7:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB8_6
-; SOFT-NEXT: .LBB8_8: @ %entry
; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: .LBB8_6: @ %entry
; SOFT-NEXT: pop {r4, pc}
;
; VFP2-LABEL: ustest_f16i32:
@@ -1017,21 +992,21 @@ define i64 @stest_f64i64(double %x) {
; VFP2-NEXT: sbcs r4, r2, #0
; VFP2-NEXT: sbcs r4, r3, #0
; VFP2-NEXT: mov.w r4, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: itee lt
; VFP2-NEXT: movlt r4, #1
+; VFP2-NEXT: movge r3, r4
+; VFP2-NEXT: movge r2, r4
; VFP2-NEXT: cmp r4, #0
-; VFP2-NEXT: itet eq
-; VFP2-NEXT: moveq r3, r4
-; VFP2-NEXT: movne r4, r2
+; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r1, lr
-; VFP2-NEXT: mov.w r2, #-1
+; VFP2-NEXT: mov.w r4, #-1
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: moveq r0, r4
; VFP2-NEXT: rsbs r5, r0, #0
; VFP2-NEXT: mov.w lr, #-2147483648
; VFP2-NEXT: sbcs.w r5, lr, r1
-; VFP2-NEXT: sbcs.w r4, r2, r4
-; VFP2-NEXT: sbcs r2, r3
+; VFP2-NEXT: sbcs.w r2, r4, r2
+; VFP2-NEXT: sbcs.w r2, r4, r3
; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: movge r1, lr
@@ -1048,12 +1023,12 @@ define i64 @stest_f64i64(double %x) {
; FULL-NEXT: sbcs lr, r2, #0
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
+; FULL-NEXT: csel r5, lr, r3, ge
+; FULL-NEXT: csel r2, lr, r2, ge
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
; FULL-NEXT: mov.w r3, #-1
; FULL-NEXT: csel r0, r0, r3, ne
; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
; FULL-NEXT: mov.w r12, #-2147483648
; FULL-NEXT: sbcs.w r4, r12, r1
@@ -1273,21 +1248,21 @@ define i64 @stest_f32i64(float %x) {
; VFP2-NEXT: sbcs r4, r2, #0
; VFP2-NEXT: sbcs r4, r3, #0
; VFP2-NEXT: mov.w r4, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: itee lt
; VFP2-NEXT: movlt r4, #1
+; VFP2-NEXT: movge r3, r4
+; VFP2-NEXT: movge r2, r4
; VFP2-NEXT: cmp r4, #0
-; VFP2-NEXT: itet eq
-; VFP2-NEXT: moveq r3, r4
-; VFP2-NEXT: movne r4, r2
+; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r1, lr
-; VFP2-NEXT: mov.w r2, #-1
+; VFP2-NEXT: mov.w r4, #-1
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: moveq r0, r4
; VFP2-NEXT: rsbs r5, r0, #0
; VFP2-NEXT: mov.w lr, #-2147483648
; VFP2-NEXT: sbcs.w r5, lr, r1
-; VFP2-NEXT: sbcs.w r4, r2, r4
-; VFP2-NEXT: sbcs r2, r3
+; VFP2-NEXT: sbcs.w r2, r4, r2
+; VFP2-NEXT: sbcs.w r2, r4, r3
; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: movge r1, lr
@@ -1304,12 +1279,12 @@ define i64 @stest_f32i64(float %x) {
; FULL-NEXT: sbcs lr, r2, #0
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
+; FULL-NEXT: csel r5, lr, r3, ge
+; FULL-NEXT: csel r2, lr, r2, ge
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
; FULL-NEXT: mov.w r3, #-1
; FULL-NEXT: csel r0, r0, r3, ne
; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
; FULL-NEXT: mov.w r12, #-2147483648
; FULL-NEXT: sbcs.w r4, r12, r1
@@ -1534,21 +1509,21 @@ define i64 @stest_f16i64(half %x) {
; VFP2-NEXT: sbcs r4, r2, #0
; VFP2-NEXT: sbcs r4, r3, #0
; VFP2-NEXT: mov.w r4, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: itee lt
; VFP2-NEXT: movlt r4, #1
+; VFP2-NEXT: movge r3, r4
+; VFP2-NEXT: movge r2, r4
; VFP2-NEXT: cmp r4, #0
-; VFP2-NEXT: itet eq
-; VFP2-NEXT: moveq r3, r4
-; VFP2-NEXT: movne r4, r2
+; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r1, lr
-; VFP2-NEXT: mov.w r2, #-1
+; VFP2-NEXT: mov.w r4, #-1
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: moveq r0, r4
; VFP2-NEXT: rsbs r5, r0, #0
; VFP2-NEXT: mov.w lr, #-2147483648
; VFP2-NEXT: sbcs.w r5, lr, r1
-; VFP2-NEXT: sbcs.w r4, r2, r4
-; VFP2-NEXT: sbcs r2, r3
+; VFP2-NEXT: sbcs.w r2, r4, r2
+; VFP2-NEXT: sbcs.w r2, r4, r3
; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: movge r1, lr
@@ -1567,12 +1542,12 @@ define i64 @stest_f16i64(half %x) {
; FULL-NEXT: sbcs lr, r2, #0
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
+; FULL-NEXT: csel r5, lr, r3, ge
+; FULL-NEXT: csel r2, lr, r2, ge
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
; FULL-NEXT: mov.w r3, #-1
; FULL-NEXT: csel r0, r0, r3, ne
; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
; FULL-NEXT: mov.w r12, #-2147483648
; FULL-NEXT: sbcs.w r4, r12, r1
@@ -1743,37 +1718,34 @@ define i32 @stest_f64i32_mm(double %x) {
; SOFT-NEXT: bl __aeabi_d2lz
; SOFT-NEXT: movs r2, #1
; SOFT-NEXT: movs r3, #0
-; SOFT-NEXT: ldr r4, .LCPI27_0
-; SOFT-NEXT: subs r5, r0, r4
-; SOFT-NEXT: mov r5, r1
-; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: ldr r5, .LCPI27_0
+; SOFT-NEXT: subs r4, r0, r5
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: sbcs r4, r3
+; SOFT-NEXT: mov r4, r2
; SOFT-NEXT: bge .LBB27_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: bge .LBB27_8
+; SOFT-NEXT: blt .LBB27_8
; SOFT-NEXT: .LBB27_2: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB27_4
+; SOFT-NEXT: blt .LBB27_4
; SOFT-NEXT: .LBB27_3: @ %entry
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB27_4: @ %entry
; SOFT-NEXT: mvns r3, r3
-; SOFT-NEXT: lsls r2, r2, #31
-; SOFT-NEXT: subs r4, r2, r0
-; SOFT-NEXT: sbcs r3, r1
+; SOFT-NEXT: lsls r1, r2, #31
+; SOFT-NEXT: subs r2, r1, r0
+; SOFT-NEXT: sbcs r3, r4
; SOFT-NEXT: blt .LBB27_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: .LBB27_6: @ %entry
; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB27_7: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: blt .LBB27_2
-; SOFT-NEXT: .LBB27_8: @ %entry
; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB27_3
+; SOFT-NEXT: bge .LBB27_2
+; SOFT-NEXT: .LBB27_8: @ %entry
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: bge .LBB27_3
; SOFT-NEXT: b .LBB27_4
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.9:
@@ -1786,17 +1758,14 @@ define i32 @stest_f64i32_mm(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2lz
-; VFP2-NEXT: mvn r2, #-2147483648
-; VFP2-NEXT: subs r3, r0, r2
-; VFP2-NEXT: sbcs r3, r1, #0
-; VFP2-NEXT: it ge
-; VFP2-NEXT: movge r0, r2
+; VFP2-NEXT: mvn r12, #-2147483648
+; VFP2-NEXT: subs.w r3, r0, r12
; VFP2-NEXT: mov.w r2, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: sbcs r3, r1, #0
+; VFP2-NEXT: itte lt
; VFP2-NEXT: movlt r2, #1
-; VFP2-NEXT: cmp r2, #0
-; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r2, r1
+; VFP2-NEXT: movlt r2, r1
+; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: mov.w r1, #-1
; VFP2-NEXT: rsbs.w r3, r0, #-2147483648
; VFP2-NEXT: sbcs r1, r2
@@ -1910,37 +1879,34 @@ define i32 @stest_f32i32_mm(float %x) {
; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: movs r2, #1
; SOFT-NEXT: movs r3, #0
-; SOFT-NEXT: ldr r4, .LCPI30_0
-; SOFT-NEXT: subs r5, r0, r4
-; SOFT-NEXT: mov r5, r1
-; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: ldr r5, .LCPI30_0
+; SOFT-NEXT: subs r4, r0, r5
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: sbcs r4, r3
+; SOFT-NEXT: mov r4, r2
; SOFT-NEXT: bge .LBB30_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: bge .LBB30_8
+; SOFT-NEXT: blt .LBB30_8
; SOFT-NEXT: .LBB30_2: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB30_4
+; SOFT-NEXT: blt .LBB30_4
; SOFT-NEXT: .LBB30_3: @ %entry
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB30_4: @ %entry
; SOFT-NEXT: mvns r3, r3
-; SOFT-NEXT: lsls r2, r2, #31
-; SOFT-NEXT: subs r4, r2, r0
-; SOFT-NEXT: sbcs r3, r1
+; SOFT-NEXT: lsls r1, r2, #31
+; SOFT-NEXT: subs r2, r1, r0
+; SOFT-NEXT: sbcs r3, r4
; SOFT-NEXT: blt .LBB30_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: .LBB30_6: @ %entry
; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB30_7: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: blt .LBB30_2
-; SOFT-NEXT: .LBB30_8: @ %entry
; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB30_3
+; SOFT-NEXT: bge .LBB30_2
+; SOFT-NEXT: .LBB30_8: @ %entry
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: bge .LBB30_3
; SOFT-NEXT: b .LBB30_4
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.9:
@@ -2030,37 +1996,34 @@ define i32 @stest_f16i32_mm(half %x) {
; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: movs r2, #1
; SOFT-NEXT: movs r3, #0
-; SOFT-NEXT: ldr r4, .LCPI33_0
-; SOFT-NEXT: subs r5, r0, r4
-; SOFT-NEXT: mov r5, r1
-; SOFT-NEXT: sbcs r5, r3
+; SOFT-NEXT: ldr r5, .LCPI33_0
+; SOFT-NEXT: subs r4, r0, r5
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: sbcs r4, r3
+; SOFT-NEXT: mov r4, r2
; SOFT-NEXT: bge .LBB33_7
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: bge .LBB33_8
+; SOFT-NEXT: blt .LBB33_8
; SOFT-NEXT: .LBB33_2: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB33_4
+; SOFT-NEXT: blt .LBB33_4
; SOFT-NEXT: .LBB33_3: @ %entry
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB33_4: @ %entry
; SOFT-NEXT: mvns r3, r3
-; SOFT-NEXT: lsls r2, r2, #31
-; SOFT-NEXT: subs r4, r2, r0
-; SOFT-NEXT: sbcs r3, r1
+; SOFT-NEXT: lsls r1, r2, #31
+; SOFT-NEXT: subs r2, r1, r0
+; SOFT-NEXT: sbcs r3, r4
; SOFT-NEXT: blt .LBB33_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: .LBB33_6: @ %entry
; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB33_7: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: blt .LBB33_2
-; SOFT-NEXT: .LBB33_8: @ %entry
; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB33_3
+; SOFT-NEXT: bge .LBB33_2
+; SOFT-NEXT: .LBB33_8: @ %entry
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: bge .LBB33_3
; SOFT-NEXT: b .LBB33_4
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.9:
@@ -2595,77 +2558,79 @@ define i64 @stest_f64i64_mm(double %x) {
; SOFT-NEXT: .pad #12
; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: ldr r6, .LCPI45_0
-; SOFT-NEXT: adds r4, r7, #1
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r6
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: mvns r6, r5
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: ldr r7, .LCPI45_0
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: adds r0, r0, #1
+; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: sbcs r0, r7
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r7, r4
; SOFT-NEXT: blt .LBB45_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB45_2: @ %entry
-; SOFT-NEXT: mvns r6, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB45_12
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bge .LBB45_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: beq .LBB45_13
+; SOFT-NEXT: mov r0, r3
; SOFT-NEXT: .LBB45_4: @ %entry
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB45_14
-; SOFT-NEXT: .LBB45_5: @ %entry
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB45_7
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: blt .LBB45_12
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: blt .LBB45_13
; SOFT-NEXT: .LBB45_6: @ %entry
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB45_8
; SOFT-NEXT: .LBB45_7: @ %entry
-; SOFT-NEXT: lsls r3, r0, #31
-; SOFT-NEXT: rsbs r4, r7, #0
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r1
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: bge .LBB45_15
-; SOFT-NEXT: @ %bb.8: @ %entry
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB45_16
-; SOFT-NEXT: .LBB45_9: @ %entry
-; SOFT-NEXT: bne .LBB45_11
+; SOFT-NEXT: ldr r1, .LCPI45_0
+; SOFT-NEXT: .LBB45_8: @ %entry
+; SOFT-NEXT: lsls r3, r4, #31
+; SOFT-NEXT: rsbs r7, r2, #0
+; SOFT-NEXT: mov r7, r3
+; SOFT-NEXT: sbcs r7, r1
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: sbcs r7, r0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: sbcs r6, r0
+; SOFT-NEXT: bge .LBB45_14
+; SOFT-NEXT: @ %bb.9: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: blt .LBB45_15
; SOFT-NEXT: .LBB45_10: @ %entry
-; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB45_16
; SOFT-NEXT: .LBB45_11: @ %entry
-; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB45_12: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB45_4
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bge .LBB45_6
; SOFT-NEXT: .LBB45_13: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB45_5
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB45_7
+; SOFT-NEXT: b .LBB45_8
; SOFT-NEXT: .LBB45_14: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI45_0
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB45_6
-; SOFT-NEXT: b .LBB45_7
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bge .LBB45_10
; SOFT-NEXT: .LBB45_15: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB45_9
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB45_11
; SOFT-NEXT: .LBB45_16: @ %entry
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: beq .LBB45_10
-; SOFT-NEXT: b .LBB45_11
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI45_0:
@@ -2683,56 +2648,56 @@ define i64 @stest_f64i64_mm(double %x) {
; VFP2-NEXT: sbcs r4, r2, #0
; VFP2-NEXT: sbcs r4, r3, #0
; VFP2-NEXT: mov.w r4, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: iteee lt
; VFP2-NEXT: movlt r4, #1
+; VFP2-NEXT: movge r3, r4
+; VFP2-NEXT: movge r2, r4
+; VFP2-NEXT: movge.w r0, #-1
; VFP2-NEXT: cmp r4, #0
-; VFP2-NEXT: itet eq
-; VFP2-NEXT: moveq r3, r4
-; VFP2-NEXT: movne r4, r2
-; VFP2-NEXT: moveq r1, lr
-; VFP2-NEXT: mov.w r2, #-1
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: rsbs r5, r0, #0
; VFP2-NEXT: mov.w lr, #-2147483648
; VFP2-NEXT: sbcs.w r5, lr, r1
-; VFP2-NEXT: sbcs.w r4, r2, r4
-; VFP2-NEXT: sbcs r2, r3
-; VFP2-NEXT: it lt
+; VFP2-NEXT: mov.w r4, #-1
+; VFP2-NEXT: sbcs.w r2, r4, r2
+; VFP2-NEXT: sbcs.w r2, r4, r3
+; VFP2-NEXT: ite lt
; VFP2-NEXT: movlt.w r12, #1
+; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f64i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
; FULL-NEXT: sbcs.w lr, r1, r12
; FULL-NEXT: sbcs lr, r2, #0
; FULL-NEXT: sbcs lr, r3, #0
+; FULL-NEXT: it ge
+; FULL-NEXT: movge.w r0, #-1
; FULL-NEXT: cset lr, lt
+; FULL-NEXT: csel r3, lr, r3, ge
+; FULL-NEXT: csel r2, lr, r2, ge
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: mov.w lr, #-2147483648
; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
+; FULL-NEXT: csel r0, r2, r0, ge
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -2744,30 +2709,33 @@ entry:
define i64 @utest_f64i64_mm(double %x) {
; SOFT-LABEL: utest_f64i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __fixunsdfti
-; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: movs r5, #0
; SOFT-NEXT: subs r2, r2, #1
-; SOFT-NEXT: sbcs r3, r4
-; SOFT-NEXT: blo .LBB46_4
+; SOFT-NEXT: sbcs r3, r5
+; SOFT-NEXT: bhs .LBB46_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB46_5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: blo .LBB46_6
; SOFT-NEXT: .LBB46_2: @ %entry
-; SOFT-NEXT: beq .LBB46_6
+; SOFT-NEXT: bhs .LBB46_4
; SOFT-NEXT: .LBB46_3: @ %entry
-; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB46_4:
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB46_2
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: .LBB46_4: @ %entry
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB46_5: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB46_3
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: bhs .LBB46_2
; SOFT-NEXT: .LBB46_6: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: blo .LBB46_3
+; SOFT-NEXT: b .LBB46_4
;
; VFP2-LABEL: utest_f64i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -2777,12 +2745,10 @@ define i64 @utest_f64i64_mm(double %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: it lo
+; VFP2-NEXT: itee lo
; VFP2-NEXT: movlo.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: movhs r0, r12
+; VFP2-NEXT: movhs r1, r12
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utest_f64i64_mm:
@@ -2793,9 +2759,8 @@ define i64 @utest_f64i64_mm(double %x) {
; FULL-NEXT: subs r2, #1
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lo
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: csel r0, r2, r0, hs
+; FULL-NEXT: csel r1, r2, r1, hs
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptoui double %x to i128
@@ -2807,52 +2772,52 @@ entry:
define i64 @ustest_f64i64_mm(double %x) {
; SOFT-LABEL: ustest_f64i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __fixdfti
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: movs r5, #1
; SOFT-NEXT: movs r1, #0
; SOFT-NEXT: subs r2, r2, #1
; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB47_2
+; SOFT-NEXT: bge .LBB47_8
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB47_3
-; SOFT-NEXT: b .LBB47_4
-; SOFT-NEXT: .LBB47_2:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB47_4
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: blt .LBB47_9
+; SOFT-NEXT: .LBB47_2: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: blt .LBB47_10
; SOFT-NEXT: .LBB47_3: @ %entry
-; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: bge .LBB47_5
; SOFT-NEXT: .LBB47_4: @ %entry
-; SOFT-NEXT: beq .LBB47_10
-; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: bne .LBB47_7
-; SOFT-NEXT: .LBB47_6: @ %entry
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: .LBB47_7: @ %entry
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r2, r1
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: .LBB47_5: @ %entry
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: bpl .LBB47_11
-; SOFT-NEXT: @ %bb.8: @ %entry
+; SOFT-NEXT: @ %bb.6: @ %entry
; SOFT-NEXT: bpl .LBB47_12
+; SOFT-NEXT: .LBB47_7: @ %entry
+; SOFT-NEXT: pop {r4, r5, r7, pc}
+; SOFT-NEXT: .LBB47_8: @ %entry
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: bge .LBB47_2
; SOFT-NEXT: .LBB47_9: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: bge .LBB47_3
; SOFT-NEXT: .LBB47_10: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: beq .LBB47_6
-; SOFT-NEXT: b .LBB47_7
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: blt .LBB47_4
+; SOFT-NEXT: b .LBB47_5
; SOFT-NEXT: .LBB47_11: @ %entry
-; SOFT-NEXT: mov r2, r0
-; SOFT-NEXT: bmi .LBB47_9
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bmi .LBB47_7
; SOFT-NEXT: .LBB47_12: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: pop {r4, r5, r7, pc}
;
; VFP2-LABEL: ustest_f64i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -2862,13 +2827,11 @@ define i64 @ustest_f64i64_mm(double %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: iteet lt
; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itte eq
-; VFP2-NEXT: moveq r1, r12
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: movne r12, r3
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: movge r0, r12
+; VFP2-NEXT: movlt r12, r3
; VFP2-NEXT: cmp.w r12, #0
; VFP2-NEXT: itt mi
; VFP2-NEXT: movmi r0, #0
@@ -2883,10 +2846,9 @@ define i64 @ustest_f64i64_mm(double %x) {
; FULL-NEXT: subs r2, #1
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r1, r1, r2, ne
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r2, r3, r2, ne
+; FULL-NEXT: csel r1, r2, r1, ge
+; FULL-NEXT: csel r0, r2, r0, ge
+; FULL-NEXT: csel r2, r2, r3, ge
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: itt mi
; FULL-NEXT: movmi r0, #0
@@ -2908,77 +2870,79 @@ define i64 @stest_f32i64_mm(float %x) {
; SOFT-NEXT: .pad #12
; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: ldr r6, .LCPI48_0
-; SOFT-NEXT: adds r4, r7, #1
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r6
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: mvns r6, r5
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: ldr r7, .LCPI48_0
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: adds r0, r0, #1
+; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: sbcs r0, r7
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r7, r4
; SOFT-NEXT: blt .LBB48_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB48_2: @ %entry
-; SOFT-NEXT: mvns r6, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB48_12
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bge .LBB48_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: beq .LBB48_13
+; SOFT-NEXT: mov r0, r3
; SOFT-NEXT: .LBB48_4: @ %entry
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB48_14
-; SOFT-NEXT: .LBB48_5: @ %entry
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB48_7
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: blt .LBB48_12
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: blt .LBB48_13
; SOFT-NEXT: .LBB48_6: @ %entry
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB48_8
; SOFT-NEXT: .LBB48_7: @ %entry
-; SOFT-NEXT: lsls r3, r0, #31
-; SOFT-NEXT: rsbs r4, r7, #0
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r1
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: bge .LBB48_15
-; SOFT-NEXT: @ %bb.8: @ %entry
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB48_16
-; SOFT-NEXT: .LBB48_9: @ %entry
-; SOFT-NEXT: bne .LBB48_11
+; SOFT-NEXT: ldr r1, .LCPI48_0
+; SOFT-NEXT: .LBB48_8: @ %entry
+; SOFT-NEXT: lsls r3, r4, #31
+; SOFT-NEXT: rsbs r7, r2, #0
+; SOFT-NEXT: mov r7, r3
+; SOFT-NEXT: sbcs r7, r1
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: sbcs r7, r0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: sbcs r6, r0
+; SOFT-NEXT: bge .LBB48_14
+; SOFT-NEXT: @ %bb.9: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: blt .LBB48_15
; SOFT-NEXT: .LBB48_10: @ %entry
-; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB48_16
; SOFT-NEXT: .LBB48_11: @ %entry
-; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB48_12: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB48_4
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bge .LBB48_6
; SOFT-NEXT: .LBB48_13: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB48_5
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB48_7
+; SOFT-NEXT: b .LBB48_8
; SOFT-NEXT: .LBB48_14: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI48_0
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB48_6
-; SOFT-NEXT: b .LBB48_7
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bge .LBB48_10
; SOFT-NEXT: .LBB48_15: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB48_9
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB48_11
; SOFT-NEXT: .LBB48_16: @ %entry
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: beq .LBB48_10
-; SOFT-NEXT: b .LBB48_11
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI48_0:
@@ -2996,56 +2960,56 @@ define i64 @stest_f32i64_mm(float %x) {
; VFP2-NEXT: sbcs r4, r2, #0
; VFP2-NEXT: sbcs r4, r3, #0
; VFP2-NEXT: mov.w r4, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: iteee lt
; VFP2-NEXT: movlt r4, #1
+; VFP2-NEXT: movge r3, r4
+; VFP2-NEXT: movge r2, r4
+; VFP2-NEXT: movge.w r0, #-1
; VFP2-NEXT: cmp r4, #0
-; VFP2-NEXT: itet eq
-; VFP2-NEXT: moveq r3, r4
-; VFP2-NEXT: movne r4, r2
-; VFP2-NEXT: moveq r1, lr
-; VFP2-NEXT: mov.w r2, #-1
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: rsbs r5, r0, #0
; VFP2-NEXT: mov.w lr, #-2147483648
; VFP2-NEXT: sbcs.w r5, lr, r1
-; VFP2-NEXT: sbcs.w r4, r2, r4
-; VFP2-NEXT: sbcs r2, r3
-; VFP2-NEXT: it lt
+; VFP2-NEXT: mov.w r4, #-1
+; VFP2-NEXT: sbcs.w r2, r4, r2
+; VFP2-NEXT: sbcs.w r2, r4, r3
+; VFP2-NEXT: ite lt
; VFP2-NEXT: movlt.w r12, #1
+; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f32i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
; FULL-NEXT: sbcs.w lr, r1, r12
; FULL-NEXT: sbcs lr, r2, #0
; FULL-NEXT: sbcs lr, r3, #0
+; FULL-NEXT: it ge
+; FULL-NEXT: movge.w r0, #-1
; FULL-NEXT: cset lr, lt
+; FULL-NEXT: csel r3, lr, r3, ge
+; FULL-NEXT: csel r2, lr, r2, ge
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: mov.w lr, #-2147483648
; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
+; FULL-NEXT: csel r0, r2, r0, ge
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3057,30 +3021,33 @@ entry:
define i64 @utest_f32i64_mm(float %x) {
; SOFT-LABEL: utest_f32i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __fixunssfti
-; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: movs r5, #0
; SOFT-NEXT: subs r2, r2, #1
-; SOFT-NEXT: sbcs r3, r4
-; SOFT-NEXT: blo .LBB49_4
+; SOFT-NEXT: sbcs r3, r5
+; SOFT-NEXT: bhs .LBB49_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB49_5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: blo .LBB49_6
; SOFT-NEXT: .LBB49_2: @ %entry
-; SOFT-NEXT: beq .LBB49_6
+; SOFT-NEXT: bhs .LBB49_4
; SOFT-NEXT: .LBB49_3: @ %entry
-; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB49_4:
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB49_2
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: .LBB49_4: @ %entry
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB49_5: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB49_3
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: bhs .LBB49_2
; SOFT-NEXT: .LBB49_6: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: blo .LBB49_3
+; SOFT-NEXT: b .LBB49_4
;
; VFP2-LABEL: utest_f32i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -3090,12 +3057,10 @@ define i64 @utest_f32i64_mm(float %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: it lo
+; VFP2-NEXT: itee lo
; VFP2-NEXT: movlo.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: movhs r0, r12
+; VFP2-NEXT: movhs r1, r12
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utest_f32i64_mm:
@@ -3106,9 +3071,8 @@ define i64 @utest_f32i64_mm(float %x) {
; FULL-NEXT: subs r2, #1
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lo
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: csel r0, r2, r0, hs
+; FULL-NEXT: csel r1, r2, r1, hs
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptoui float %x to i128
@@ -3120,52 +3084,52 @@ entry:
define i64 @ustest_f32i64_mm(float %x) {
; SOFT-LABEL: ustest_f32i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: bl __fixsfti
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: movs r5, #1
; SOFT-NEXT: movs r1, #0
; SOFT-NEXT: subs r2, r2, #1
; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB50_2
+; SOFT-NEXT: bge .LBB50_8
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB50_3
-; SOFT-NEXT: b .LBB50_4
-; SOFT-NEXT: .LBB50_2:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB50_4
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: blt .LBB50_9
+; SOFT-NEXT: .LBB50_2: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: blt .LBB50_10
; SOFT-NEXT: .LBB50_3: @ %entry
-; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: bge .LBB50_5
; SOFT-NEXT: .LBB50_4: @ %entry
-; SOFT-NEXT: beq .LBB50_10
-; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: bne .LBB50_7
-; SOFT-NEXT: .LBB50_6: @ %entry
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: .LBB50_7: @ %entry
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r2, r1
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: .LBB50_5: @ %entry
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: bpl .LBB50_11
-; SOFT-NEXT: @ %bb.8: @ %entry
+; SOFT-NEXT: @ %bb.6: @ %entry
; SOFT-NEXT: bpl .LBB50_12
+; SOFT-NEXT: .LBB50_7: @ %entry
+; SOFT-NEXT: pop {r4, r5, r7, pc}
+; SOFT-NEXT: .LBB50_8: @ %entry
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: bge .LBB50_2
; SOFT-NEXT: .LBB50_9: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: bge .LBB50_3
; SOFT-NEXT: .LBB50_10: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: beq .LBB50_6
-; SOFT-NEXT: b .LBB50_7
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: blt .LBB50_4
+; SOFT-NEXT: b .LBB50_5
; SOFT-NEXT: .LBB50_11: @ %entry
-; SOFT-NEXT: mov r2, r0
-; SOFT-NEXT: bmi .LBB50_9
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bmi .LBB50_7
; SOFT-NEXT: .LBB50_12: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: pop {r4, r5, r7, pc}
;
; VFP2-LABEL: ustest_f32i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -3175,13 +3139,11 @@ define i64 @ustest_f32i64_mm(float %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: iteet lt
; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itte eq
-; VFP2-NEXT: moveq r1, r12
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: movne r12, r3
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: movge r0, r12
+; VFP2-NEXT: movlt r12, r3
; VFP2-NEXT: cmp.w r12, #0
; VFP2-NEXT: itt mi
; VFP2-NEXT: movmi r0, #0
@@ -3196,10 +3158,9 @@ define i64 @ustest_f32i64_mm(float %x) {
; FULL-NEXT: subs r2, #1
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r1, r1, r2, ne
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r2, r3, r2, ne
+; FULL-NEXT: csel r1, r2, r1, ge
+; FULL-NEXT: csel r0, r2, r0, ge
+; FULL-NEXT: csel r2, r2, r3, ge
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: itt mi
; FULL-NEXT: movmi r0, #0
@@ -3223,77 +3184,79 @@ define i64 @stest_f16i64_mm(half %x) {
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: ldr r6, .LCPI51_0
-; SOFT-NEXT: adds r4, r7, #1
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: sbcs r4, r6
-; SOFT-NEXT: mov r4, r2
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r5
-; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: mvns r6, r5
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: ldr r7, .LCPI51_0
+; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: adds r0, r0, #1
+; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: sbcs r0, r7
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: sbcs r0, r5
+; SOFT-NEXT: mov r7, r4
; SOFT-NEXT: blt .LBB51_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r7, r5
; SOFT-NEXT: .LBB51_2: @ %entry
-; SOFT-NEXT: mvns r6, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB51_12
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: bge .LBB51_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: beq .LBB51_13
+; SOFT-NEXT: mov r0, r3
; SOFT-NEXT: .LBB51_4: @ %entry
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB51_14
-; SOFT-NEXT: .LBB51_5: @ %entry
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB51_7
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r0, r7
+; SOFT-NEXT: blt .LBB51_12
+; SOFT-NEXT: @ %bb.5: @ %entry
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: blt .LBB51_13
; SOFT-NEXT: .LBB51_6: @ %entry
-; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB51_8
; SOFT-NEXT: .LBB51_7: @ %entry
-; SOFT-NEXT: lsls r3, r0, #31
-; SOFT-NEXT: rsbs r4, r7, #0
-; SOFT-NEXT: mov r4, r3
-; SOFT-NEXT: sbcs r4, r1
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r4, r2
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: bge .LBB51_15
-; SOFT-NEXT: @ %bb.8: @ %entry
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB51_16
-; SOFT-NEXT: .LBB51_9: @ %entry
-; SOFT-NEXT: bne .LBB51_11
+; SOFT-NEXT: ldr r1, .LCPI51_0
+; SOFT-NEXT: .LBB51_8: @ %entry
+; SOFT-NEXT: lsls r3, r4, #31
+; SOFT-NEXT: rsbs r7, r2, #0
+; SOFT-NEXT: mov r7, r3
+; SOFT-NEXT: sbcs r7, r1
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: sbcs r7, r0
+; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: sbcs r6, r0
+; SOFT-NEXT: bge .LBB51_14
+; SOFT-NEXT: @ %bb.9: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: blt .LBB51_15
; SOFT-NEXT: .LBB51_10: @ %entry
-; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: beq .LBB51_16
; SOFT-NEXT: .LBB51_11: @ %entry
-; SOFT-NEXT: mov r0, r7
; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB51_12: @ %entry
-; SOFT-NEXT: mov r3, r4
-; SOFT-NEXT: bne .LBB51_4
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: mov r2, r6
+; SOFT-NEXT: bge .LBB51_6
; SOFT-NEXT: .LBB51_13: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: bne .LBB51_5
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB51_7
+; SOFT-NEXT: b .LBB51_8
; SOFT-NEXT: .LBB51_14: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI51_0
-; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB51_6
-; SOFT-NEXT: b .LBB51_7
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bge .LBB51_10
; SOFT-NEXT: .LBB51_15: @ %entry
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: bne .LBB51_9
+; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r4, #0
+; SOFT-NEXT: bne .LBB51_11
; SOFT-NEXT: .LBB51_16: @ %entry
-; SOFT-NEXT: mov r7, r0
-; SOFT-NEXT: beq .LBB51_10
-; SOFT-NEXT: b .LBB51_11
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
; SOFT-NEXT: @ %bb.17:
; SOFT-NEXT: .LCPI51_0:
@@ -3314,33 +3277,32 @@ define i64 @stest_f16i64_mm(half %x) {
; VFP2-NEXT: sbcs r4, r2, #0
; VFP2-NEXT: sbcs r4, r3, #0
; VFP2-NEXT: mov.w r4, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: iteee lt
; VFP2-NEXT: movlt r4, #1
+; VFP2-NEXT: movge r3, r4
+; VFP2-NEXT: movge r2, r4
+; VFP2-NEXT: movge.w r0, #-1
; VFP2-NEXT: cmp r4, #0
-; VFP2-NEXT: itet eq
-; VFP2-NEXT: moveq r3, r4
-; VFP2-NEXT: movne r4, r2
-; VFP2-NEXT: moveq r1, lr
-; VFP2-NEXT: mov.w r2, #-1
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: rsbs r5, r0, #0
; VFP2-NEXT: mov.w lr, #-2147483648
; VFP2-NEXT: sbcs.w r5, lr, r1
-; VFP2-NEXT: sbcs.w r4, r2, r4
-; VFP2-NEXT: sbcs r2, r3
-; VFP2-NEXT: it lt
+; VFP2-NEXT: mov.w r4, #-1
+; VFP2-NEXT: sbcs.w r2, r4, r2
+; VFP2-NEXT: sbcs.w r2, r4, r3
+; VFP2-NEXT: ite lt
; VFP2-NEXT: movlt.w r12, #1
+; VFP2-NEXT: movge r0, r12
; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
+; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r1, lr
; VFP2-NEXT: pop {r4, r5, r7, pc}
;
; FULL-LABEL: stest_f16i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -3349,23 +3311,24 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: sbcs.w lr, r1, r12
; FULL-NEXT: sbcs lr, r2, #0
; FULL-NEXT: sbcs lr, r3, #0
+; FULL-NEXT: it ge
+; FULL-NEXT: movge.w r0, #-1
; FULL-NEXT: cset lr, lt
+; FULL-NEXT: csel r3, lr, r3, ge
+; FULL-NEXT: csel r2, lr, r2, ge
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: mov.w lr, #-2147483648
; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
+; FULL-NEXT: csel r0, r2, r0, ge
; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3377,32 +3340,35 @@ entry:
define i64 @utesth_f16i64_mm(half %x) {
; SOFT-LABEL: utesth_f16i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixunssfti
-; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: movs r4, #1
+; SOFT-NEXT: movs r5, #0
; SOFT-NEXT: subs r2, r2, #1
-; SOFT-NEXT: sbcs r3, r4
-; SOFT-NEXT: blo .LBB52_4
+; SOFT-NEXT: sbcs r3, r5
+; SOFT-NEXT: bhs .LBB52_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB52_5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: blo .LBB52_6
; SOFT-NEXT: .LBB52_2: @ %entry
-; SOFT-NEXT: beq .LBB52_6
+; SOFT-NEXT: bhs .LBB52_4
; SOFT-NEXT: .LBB52_3: @ %entry
-; SOFT-NEXT: pop {r4, pc}
-; SOFT-NEXT: .LBB52_4:
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB52_2
+; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: .LBB52_4: @ %entry
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: pop {r4, r5, r7, pc}
; SOFT-NEXT: .LBB52_5: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB52_3
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: bhs .LBB52_2
; SOFT-NEXT: .LBB52_6: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r3, r0
+; SOFT-NEXT: blo .LBB52_3
+; SOFT-NEXT: b .LBB52_4
;
; VFP2-LABEL: utesth_f16i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -3415,12 +3381,10 @@ define i64 @utesth_f16i64_mm(half %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: it lo
+; VFP2-NEXT: itee lo
; VFP2-NEXT: movlo.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: movhs r0, r12
+; VFP2-NEXT: movhs r1, r12
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utesth_f16i64_mm:
@@ -3433,9 +3397,8 @@ define i64 @utesth_f16i64_mm(half %x) {
; FULL-NEXT: subs r2, #1
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lo
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: csel r0, r2, r0, hs
+; FULL-NEXT: csel r1, r2, r1, hs
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptoui half %x to i128
@@ -3447,54 +3410,54 @@ entry:
define i64 @ustest_f16i64_mm(half %x) {
; SOFT-LABEL: ustest_f16i64_mm:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, lr}
-; SOFT-NEXT: push {r4, lr}
+; SOFT-NEXT: .save {r4, r5, r7, lr}
+; SOFT-NEXT: push {r4, r5, r7, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: movs r5, #1
; SOFT-NEXT: movs r1, #0
; SOFT-NEXT: subs r2, r2, #1
; SOFT-NEXT: mov r2, r3
; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB53_2
+; SOFT-NEXT: bge .LBB53_8
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: mov r2, r1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB53_3
-; SOFT-NEXT: b .LBB53_4
-; SOFT-NEXT: .LBB53_2:
-; SOFT-NEXT: movs r2, #1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB53_4
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: blt .LBB53_9
+; SOFT-NEXT: .LBB53_2: @ %entry
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: blt .LBB53_10
; SOFT-NEXT: .LBB53_3: @ %entry
-; SOFT-NEXT: mov r4, r2
+; SOFT-NEXT: bge .LBB53_5
; SOFT-NEXT: .LBB53_4: @ %entry
-; SOFT-NEXT: beq .LBB53_10
-; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: bne .LBB53_7
-; SOFT-NEXT: .LBB53_6: @ %entry
-; SOFT-NEXT: mov r3, r2
-; SOFT-NEXT: .LBB53_7: @ %entry
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r2, r1
+; SOFT-NEXT: mov r5, r3
+; SOFT-NEXT: .LBB53_5: @ %entry
+; SOFT-NEXT: cmp r5, #0
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: bpl .LBB53_11
-; SOFT-NEXT: @ %bb.8: @ %entry
+; SOFT-NEXT: @ %bb.6: @ %entry
; SOFT-NEXT: bpl .LBB53_12
+; SOFT-NEXT: .LBB53_7: @ %entry
+; SOFT-NEXT: pop {r4, r5, r7, pc}
+; SOFT-NEXT: .LBB53_8: @ %entry
+; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: mov r2, r5
+; SOFT-NEXT: bge .LBB53_2
; SOFT-NEXT: .LBB53_9: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: mov r4, r5
+; SOFT-NEXT: bge .LBB53_3
; SOFT-NEXT: .LBB53_10: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: beq .LBB53_6
-; SOFT-NEXT: b .LBB53_7
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: blt .LBB53_4
+; SOFT-NEXT: b .LBB53_5
; SOFT-NEXT: .LBB53_11: @ %entry
-; SOFT-NEXT: mov r2, r0
-; SOFT-NEXT: bmi .LBB53_9
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: bmi .LBB53_7
; SOFT-NEXT: .LBB53_12: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: pop {r4, pc}
+; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: pop {r4, r5, r7, pc}
;
; VFP2-LABEL: ustest_f16i64_mm:
; VFP2: @ %bb.0: @ %entry
@@ -3507,13 +3470,11 @@ define i64 @ustest_f16i64_mm(half %x) {
; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
; VFP2-NEXT: sbcs r2, r3, #0
-; VFP2-NEXT: it lt
+; VFP2-NEXT: iteet lt
; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itte eq
-; VFP2-NEXT: moveq r1, r12
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: movne r12, r3
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: movge r0, r12
+; VFP2-NEXT: movlt r12, r3
; VFP2-NEXT: cmp.w r12, #0
; VFP2-NEXT: itt mi
; VFP2-NEXT: movmi r0, #0
@@ -3530,10 +3491,9 @@ define i64 @ustest_f16i64_mm(half %x) {
; FULL-NEXT: subs r2, #1
; FULL-NEXT: sbcs r2, r3, #0
; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r1, r1, r2, ne
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r2, r3, r2, ne
+; FULL-NEXT: csel r1, r2, r1, ge
+; FULL-NEXT: csel r0, r2, r0, ge
+; FULL-NEXT: csel r2, r2, r3, ge
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: itt mi
; FULL-NEXT: movmi r0, #0
@@ -3560,11 +3520,11 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: movs r0, #0
; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
; SOFT-NEXT: movs r0, #1
; SOFT-NEXT: lsls r1, r0, #31
-; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
; SOFT-NEXT: lsls r7, r0, #10
; SOFT-NEXT: b .LBB54_2
; SOFT-NEXT: .LBB54_1: @ in Loop: Header=BB54_2 Depth=1
@@ -3585,25 +3545,25 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: mov r2, r1
; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: sbcs r2, r3
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: bge .LBB54_14
; SOFT-NEXT: @ %bb.3: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: bge .LBB54_15
+; SOFT-NEXT: blt .LBB54_15
; SOFT-NEXT: .LBB54_4: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB54_6
+; SOFT-NEXT: blt .LBB54_6
; SOFT-NEXT: .LBB54_5: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: mov r1, r2
+; SOFT-NEXT: ldr r0, .LCPI54_0
; SOFT-NEXT: .LBB54_6: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: subs r2, r2, r0
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB54_8
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: subs r1, r3, r0
+; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: sbcs r1, r2
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: bge .LBB54_8
; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB54_8: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: str r0, [r5]
+; SOFT-NEXT: str r1, [r5]
; SOFT-NEXT: ldr r0, [r4, #4]
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fmul
@@ -3613,40 +3573,39 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: mov r2, r1
; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; SOFT-NEXT: bge .LBB54_16
; SOFT-NEXT: @ %bb.9: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB54_17
+; SOFT-NEXT: blt .LBB54_17
; SOFT-NEXT: .LBB54_10: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: bne .LBB54_12
+; SOFT-NEXT: ldr r1, .LCPI54_0
+; SOFT-NEXT: bge .LBB54_12
; SOFT-NEXT: .LBB54_11: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r0, .LCPI54_0
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB54_12: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: subs r2, r2, r0
-; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: sbcs r2, r1
-; SOFT-NEXT: blt .LBB54_1
-; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: subs r0, r3, r1
; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: sbcs r0, r2
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: bge .LBB54_1
+; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: b .LBB54_1
; SOFT-NEXT: .LBB54_14: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: ldr r0, .LCPI54_0
-; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: blt .LBB54_4
-; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: beq .LBB54_5
+; SOFT-NEXT: bge .LBB54_4
+; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1
+; SOFT-NEXT: mov r2, r1
+; SOFT-NEXT: bge .LBB54_5
; SOFT-NEXT: b .LBB54_6
; SOFT-NEXT: .LBB54_16: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: cmp r2, #0
-; SOFT-NEXT: bne .LBB54_10
+; SOFT-NEXT: bge .LBB54_10
; SOFT-NEXT: .LBB54_17: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: mov r1, r2
-; SOFT-NEXT: beq .LBB54_11
+; SOFT-NEXT: mov r2, r1
+; SOFT-NEXT: ldr r1, .LCPI54_0
+; SOFT-NEXT: blt .LBB54_11
; SOFT-NEXT: b .LBB54_12
; SOFT-NEXT: .LBB54_18:
; SOFT-NEXT: add sp, #20
@@ -3762,7 +3721,7 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: lsls r7, r0, #10
; SOFT-NEXT: b .LBB55_2
; SOFT-NEXT: .LBB55_1: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: str r0, [r5, #4]
+; SOFT-NEXT: str r1, [r5, #4]
; SOFT-NEXT: adds r4, #8
; SOFT-NEXT: adds r5, #8
; SOFT-NEXT: subs r7, r7, #2
@@ -3786,15 +3745,16 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: @ %bb.5: @ in Loop: Header=BB55_2 Depth=1
; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB55_6: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r2, .LCPI55_0
-; SOFT-NEXT: subs r2, r0, r2
+; SOFT-NEXT: ldr r3, .LCPI55_0
+; SOFT-NEXT: subs r2, r0, r3
; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
; SOFT-NEXT: sbcs r1, r2
-; SOFT-NEXT: blt .LBB55_8
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: bge .LBB55_8
; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r0, .LCPI55_0
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB55_8: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: str r0, [r5]
+; SOFT-NEXT: str r1, [r5]
; SOFT-NEXT: ldr r0, [r4, #4]
; SOFT-NEXT: mov r1, r6
; SOFT-NEXT: bl __aeabi_fmul
@@ -3811,13 +3771,14 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: @ %bb.11: @ in Loop: Header=BB55_2 Depth=1
; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: .LBB55_12: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r2, .LCPI55_0
-; SOFT-NEXT: subs r2, r0, r2
+; SOFT-NEXT: ldr r3, .LCPI55_0
+; SOFT-NEXT: subs r2, r0, r3
; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload
; SOFT-NEXT: sbcs r1, r2
-; SOFT-NEXT: blt .LBB55_1
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: bge .LBB55_1
; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: ldr r0, .LCPI55_0
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: b .LBB55_1
; SOFT-NEXT: .LBB55_14:
; SOFT-NEXT: add sp, #12
@@ -3920,11 +3881,12 @@ define i32 @stest_f32i32i64(float %x) {
; SOFT-NEXT: .save {r4, lr}
; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __aeabi_f2lz
+; SOFT-NEXT: mov r2, r0
; SOFT-NEXT: movs r3, #0
-; SOFT-NEXT: ldr r2, .LCPI56_0
-; SOFT-NEXT: subs r4, r0, r2
+; SOFT-NEXT: ldr r0, .LCPI56_0
+; SOFT-NEXT: subs r4, r2, r0
; SOFT-NEXT: sbcs r1, r3
-; SOFT-NEXT: blt .LBB56_2
+; SOFT-NEXT: bge .LBB56_2
; SOFT-NEXT: @ %bb.1: @ %entry
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: .LBB56_2: @ %entry
@@ -3949,10 +3911,10 @@ define i32 @stest_f32i32i64(float %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_f2lz
; VFP2-NEXT: movw r2, #32767
-; VFP2-NEXT: subs r3, r0, r2
+; VFP2-NEXT: subs r2, r0, r2
; VFP2-NEXT: sbcs r1, r1, #0
; VFP2-NEXT: it ge
-; VFP2-NEXT: movge r0, r2
+; VFP2-NEXT: movwge r0, #32767
; VFP2-NEXT: movw r1, #32768
; VFP2-NEXT: cmn.w r0, #32768
; VFP2-NEXT: movt r1, #65535
@@ -3967,12 +3929,13 @@ define i32 @stest_f32i32i64(float %x) {
; FULL-NEXT: vmov r0, s0
; FULL-NEXT: bl __aeabi_f2lz
; FULL-NEXT: movw r2, #32767
-; FULL-NEXT: subs r3, r0, r2
+; FULL-NEXT: subs r2, r0, r2
; FULL-NEXT: sbcs r1, r1, #0
-; FULL-NEXT: csel r0, r0, r2, lt
+; FULL-NEXT: it ge
+; FULL-NEXT: movwge r0, #32767
; FULL-NEXT: movw r1, #32768
-; FULL-NEXT: movt r1, #65535
; FULL-NEXT: cmn.w r0, #32768
+; FULL-NEXT: movt r1, #65535
; FULL-NEXT: csel r0, r0, r1, gt
; FULL-NEXT: pop {r7, pc}
entry:
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 96f009a4da02d..1d75bef771f1c 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -20,43 +20,39 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK-NEXT: vmov.32 d9[0], r4
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov.32 d8[0], r0
-; CHECK-NEXT: mvn r3, #-2147483648
-; CHECK-NEXT: subs r4, r4, r3
+; CHECK-NEXT: mvn r12, #-2147483648
+; CHECK-NEXT: subs r3, r4, r12
; CHECK-NEXT: adr r2, .LCPI0_0
; CHECK-NEXT: vmov.32 d9[1], r5
-; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: mvn r3, #0
; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mvnne r5, #0
-; CHECK-NEXT: subs r0, r0, r3
-; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vmov.32 d8[1], r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vmov.i32 q10, #0x80000000
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movlt r5, r3
+; CHECK-NEXT: subs r0, r0, r12
+; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vdup.32 d19, r5
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128]
+; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r0, r3
+; CHECK-NEXT: vmov.i32 q10, #0x80000000
; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: vbit q8, q4, q9
; CHECK-NEXT: vmov r0, r1, d17
-; CHECK-NEXT: vmov r3, r5, d16
+; CHECK-NEXT: vmov r5, r4, d16
; CHECK-NEXT: rsbs r0, r0, #-2147483648
-; CHECK-NEXT: sbcs r0, r4, r1
+; CHECK-NEXT: sbcs r0, r3, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: rsbs r1, r3, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: movlt r0, r3
+; CHECK-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-NEXT: sbcs r1, r3, r4
; CHECK-NEXT: vdup.32 d19, r0
; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: movlt r2, r3
; CHECK-NEXT: vdup.32 d18, r2
; CHECK-NEXT: vbif q8, q10, q9
; CHECK-NEXT: vmovn.i64 d0, q8
@@ -101,14 +97,12 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mvnne r5, #0
+; CHECK-NEXT: movlo r5, r3
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vdup.32 d17, r5
; CHECK-NEXT: movwlo r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: movlo r2, r3
; CHECK-NEXT: vdup.32 d16, r2
; CHECK-NEXT: vand q9, q4, q8
; CHECK-NEXT: vorn q8, q9, q8
@@ -147,32 +141,28 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mvnne r5, #0
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: movlt r5, r3
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vdup.32 d17, r5
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vdup.32 d17, r5
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlt r0, r3
; CHECK-NEXT: vdup.32 d16, r0
; CHECK-NEXT: vbsl q8, q4, q9
; CHECK-NEXT: vmov r0, r1, d17
-; CHECK-NEXT: vmov r3, r5, d16
+; CHECK-NEXT: vmov r5, r4, d16
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: rsbs r1, r3, #0
-; CHECK-NEXT: rscs r1, r5, #0
+; CHECK-NEXT: movlt r0, r3
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r4, #0
; CHECK-NEXT: vmov.32 d19[0], r0
; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: movlt r2, r3
; CHECK-NEXT: vmov.32 d18[0], r2
; CHECK-NEXT: vand q8, q9, q8
; CHECK-NEXT: vmovn.i64 d0, q8
@@ -195,103 +185,95 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: adr r1, .LCPI3_0
-; CHECK-NEXT: vld1.64 {d10, d11}, [r1:128]
-; CHECK-NEXT: vmov r5, s17
-; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: adr r3, .LCPI3_0
; CHECK-NEXT: mvn r9, #-2147483648
-; CHECK-NEXT: vmov.32 d13[0], r6
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r6, r9
-; CHECK-NEXT: vmov.32 d12[0], r0
-; CHECK-NEXT: sbcs r2, r7, #0
+; CHECK-NEXT: vld1.64 {d10, d11}, [r3:128]
+; CHECK-NEXT: subs r3, r5, r9
+; CHECK-NEXT: sbcs r3, r6, #0
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: vmov.32 d13[0], r2
+; CHECK-NEXT: movwlt r3, #1
; CHECK-NEXT: vmov r8, s16
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: vmov.32 d13[1], r7
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r0, r0, r9
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vdup.32 d17, r2
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vmov.32 d12[1], r1
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: vorr q4, q8, q8
-; CHECK-NEXT: vbsl q4, q6, q5
+; CHECK-NEXT: vmov.32 d12[0], r5
+; CHECK-NEXT: movlt r3, r4
+; CHECK-NEXT: subs r2, r2, r9
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: vmov.32 d13[1], r1
+; CHECK-NEXT: sbcs r1, r1, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwlt r1, #1
+; CHECK-NEXT: vmov.32 d12[1], r6
+; CHECK-NEXT: movlt r1, r4
+; CHECK-NEXT: vdup.32 d9, r1
+; CHECK-NEXT: vdup.32 d8, r3
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vmov.32 d13[0], r0
+; CHECK-NEXT: vmov.32 d15[0], r0
; CHECK-NEXT: subs r0, r0, r9
+; CHECK-NEXT: vbsl q4, q6, q5
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: movlt r6, r4
; CHECK-NEXT: vmov r11, r10, d8
-; CHECK-NEXT: vmov.32 d13[1], r1
-; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: vmov r5, r7, d9
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vmov.32 d12[0], r0
-; CHECK-NEXT: subs r0, r0, r9
+; CHECK-NEXT: subs r2, r0, r9
+; CHECK-NEXT: vmov.32 d15[1], r5
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 d14[0], r0
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vdup.32 d17, r6
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vmov.32 d12[1], r1
-; CHECK-NEXT: rsbs r3, r11, #-2147483648
+; CHECK-NEXT: vdup.32 d17, r6
+; CHECK-NEXT: movlt r0, r4
+; CHECK-NEXT: vmov.32 d14[1], r1
+; CHECK-NEXT: rsbs r6, r11, #-2147483648
; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: mvn r0, #0
-; CHECK-NEXT: vbsl q8, q6, q5
-; CHECK-NEXT: adr r1, .LCPI3_1
-; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
-; CHECK-NEXT: sbcs r3, r0, r10
-; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: vmov r1, r2, d17
-; CHECK-NEXT: movwlt r3, #1
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mvnne r3, #0
-; CHECK-NEXT: rsbs r6, r5, #-2147483648
-; CHECK-NEXT: vmov r6, r5, d16
-; CHECK-NEXT: sbcs r7, r0, r7
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: mvnne r7, #0
-; CHECK-NEXT: vdup.32 d23, r7
-; CHECK-NEXT: vdup.32 d22, r3
+; CHECK-NEXT: sbcs r6, r4, r10
+; CHECK-NEXT: vbsl q8, q7, q5
+; CHECK-NEXT: adr r0, .LCPI3_1
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: vmov r0, r1, d17
+; CHECK-NEXT: movlt r6, r4
+; CHECK-NEXT: rsbs r2, r2, #-2147483648
+; CHECK-NEXT: sbcs r2, r4, r3
+; CHECK-NEXT: vmov r3, r5, d16
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: movlt r2, r4
+; CHECK-NEXT: vdup.32 d23, r2
+; CHECK-NEXT: vdup.32 d22, r6
; CHECK-NEXT: vbsl q11, q4, q9
+; CHECK-NEXT: rsbs r0, r0, #-2147483648
; CHECK-NEXT: vmovn.i64 d1, q11
-; CHECK-NEXT: rsbs r1, r1, #-2147483648
-; CHECK-NEXT: sbcs r1, r0, r2
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: rsbs r2, r6, #-2147483648
-; CHECK-NEXT: sbcs r0, r0, r5
-; CHECK-NEXT: vdup.32 d21, r1
-; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vdup.32 d20, r4
+; CHECK-NEXT: sbcs r0, r4, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movlt r0, r4
+; CHECK-NEXT: vdup.32 d21, r0
+; CHECK-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: movlt r7, r4
+; CHECK-NEXT: vdup.32 d20, r7
; CHECK-NEXT: vbif q8, q9, q10
; CHECK-NEXT: vmovn.i64 d0, q8
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
@@ -350,34 +332,30 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlo r0, r3
; CHECK-NEXT: subs r1, r5, r3
; CHECK-NEXT: sbcs r1, r4, #0
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movwlo r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: movlo r1, r3
; CHECK-NEXT: subs r7, r10, r3
; CHECK-NEXT: sbcs r7, r8, #0
; CHECK-NEXT: vdup.32 d19, r1
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: movwlo r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: mvnne r7, #0
-; CHECK-NEXT: subs r3, r6, r3
-; CHECK-NEXT: sbcs r3, r9, #0
+; CHECK-NEXT: vand q10, q5, q9
+; CHECK-NEXT: movlo r7, r3
+; CHECK-NEXT: subs r6, r6, r3
+; CHECK-NEXT: sbcs r6, r9, #0
; CHECK-NEXT: vdup.32 d17, r7
; CHECK-NEXT: movwlo r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vand q10, q5, q9
+; CHECK-NEXT: vorn q9, q10, q9
+; CHECK-NEXT: movlo r2, r3
; CHECK-NEXT: vdup.32 d16, r2
; CHECK-NEXT: vand q11, q4, q8
-; CHECK-NEXT: vorn q9, q10, q9
-; CHECK-NEXT: vorn q8, q11, q8
; CHECK-NEXT: vmovn.i64 d1, q9
+; CHECK-NEXT: vorn q8, q11, q8
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -394,100 +372,94 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: vmov r5, s17
-; CHECK-NEXT: vmov r8, s16
-; CHECK-NEXT: vmov.32 d9[0], r6
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: vmov r6, s17
+; CHECK-NEXT: vmov r4, s18
+; CHECK-NEXT: vmov.32 d11[0], r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mvn r9, #0
-; CHECK-NEXT: subs r2, r6, r9
-; CHECK-NEXT: sbcs r2, r7, #0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov.32 d8[0], r0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r0, r0, r9
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vmov.32 d9[1], r7
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vmov.32 d8[1], r1
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d17, r2
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: vbif q4, q5, q8
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov.32 d13[0], r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: vmov r7, r10, d8
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r5, r9
-; CHECK-NEXT: vmov.32 d12[0], r0
-; CHECK-NEXT: sbcs r2, r6, #0
+; CHECK-NEXT: vmov.32 d9[1], r10
+; CHECK-NEXT: mvn r3, #0
+; CHECK-NEXT: subs r5, r5, r3
+; CHECK-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-NEXT: vmov.32 d8[1], r7
+; CHECK-NEXT: sbcs r7, r7, #0
+; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: vmov.32 d13[1], r6
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r0, r0, r9
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: movlt r7, r3
+; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: vdup.32 d17, r2
+; CHECK-NEXT: vmov.32 d11[1], r9
; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vmov.32 d12[1], r1
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vmov r2, r3, d9
-; CHECK-NEXT: vdup.32 d16, r0
-; CHECK-NEXT: rsbs r7, r7, #0
-; CHECK-NEXT: vbsl q8, q6, q5
-; CHECK-NEXT: rscs r7, r10, #0
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: vmov r0, r1, d17
-; CHECK-NEXT: mvnne r7, #0
-; CHECK-NEXT: vmov r6, r5, d16
+; CHECK-NEXT: vmov.32 d10[1], r1
+; CHECK-NEXT: movlt r0, r3
+; CHECK-NEXT: subs r1, r8, r3
+; CHECK-NEXT: sbcs r1, r9, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwlt r1, #1
+; CHECK-NEXT: movlt r1, r3
+; CHECK-NEXT: subs r6, r6, r3
+; CHECK-NEXT: vdup.32 d19, r1
+; CHECK-NEXT: sbcs r6, r10, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: vdup.32 d18, r0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: vbsl q9, q5, q8
+; CHECK-NEXT: movlt r6, r3
+; CHECK-NEXT: vdup.32 d21, r6
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vdup.32 d20, r7
+; CHECK-NEXT: vbit q8, q4, q10
+; CHECK-NEXT: vmov r5, r4, d19
+; CHECK-NEXT: vmov r7, r6, d17
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: rsbs r1, r2, #0
-; CHECK-NEXT: rscs r1, r3, #0
-; CHECK-NEXT: vmov.32 d19[0], r0
+; CHECK-NEXT: movlt r0, r3
+; CHECK-NEXT: rsbs r1, r7, #0
+; CHECK-NEXT: rscs r1, r6, #0
+; CHECK-NEXT: vmov r7, r6, d16
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: rsbs r0, r6, #0
-; CHECK-NEXT: rscs r0, r5, #0
+; CHECK-NEXT: movlt r1, r3
+; CHECK-NEXT: rsbs r5, r5, #0
+; CHECK-NEXT: rscs r5, r4, #0
; CHECK-NEXT: vmov.32 d21[0], r1
-; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: vmov.32 d20[0], r7
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vmov.32 d18[0], r4
-; CHECK-NEXT: vand q10, q10, q4
-; CHECK-NEXT: vand q8, q9, q8
-; CHECK-NEXT: vmovn.i64 d1, q10
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: movlt r5, r3
+; CHECK-NEXT: vmov.32 d23[0], r5
+; CHECK-NEXT: vmov.32 d22[0], r0
+; CHECK-NEXT: vand q9, q11, q9
+; CHECK-NEXT: vmovn.i64 d1, q9
+; CHECK-NEXT: rsbs r1, r7, #0
+; CHECK-NEXT: rscs r1, r6, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: movlt r2, r3
+; CHECK-NEXT: vmov.32 d20[0], r2
+; CHECK-NEXT: vand q8, q10, q8
; CHECK-NEXT: vmovn.i64 d0, q8
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -506,108 +478,101 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: .pad #4
; CHECK-NEON-NEXT: sub sp, sp, #4
-; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s3
-; CHECK-NEON-NEXT: vmov.f32 s18, s1
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: mov r10, r0
; CHECK-NEON-NEXT: vmov r0, s16
-; CHECK-NEON-NEXT: mov r5, r1
+; CHECK-NEON-NEXT: mov r9, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov.32 d13[0], r10
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov r2, s18
-; CHECK-NEON-NEXT: adr r3, .LCPI6_0
-; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r3:128]
-; CHECK-NEON-NEXT: mvn r9, #-2147483648
-; CHECK-NEON-NEXT: subs r3, r6, r9
-; CHECK-NEON-NEXT: mov r4, #0
-; CHECK-NEON-NEXT: sbcs r3, r5, #0
-; CHECK-NEON-NEXT: vmov.32 d15[0], r0
-; CHECK-NEON-NEXT: movwlt r4, #1
-; CHECK-NEON-NEXT: cmp r4, #0
-; CHECK-NEON-NEXT: mvnne r4, #0
-; CHECK-NEON-NEXT: subs r0, r0, r9
-; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: vmov.32 d14[0], r6
-; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: mov r6, r1
+; CHECK-NEON-NEXT: adr r1, .LCPI6_0
+; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r1:128]
; CHECK-NEON-NEXT: vmov r8, s20
-; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: vmov.32 d15[1], r1
-; CHECK-NEON-NEXT: mov r7, #0
-; CHECK-NEON-NEXT: vdup.32 d11, r0
-; CHECK-NEON-NEXT: vmov.32 d14[1], r5
-; CHECK-NEON-NEXT: mov r0, r2
+; CHECK-NEON-NEXT: vmov.32 d11[0], r7
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: vdup.32 d10, r4
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov.32 d13[0], r0
-; CHECK-NEON-NEXT: subs r0, r0, r9
-; CHECK-NEON-NEXT: vbsl q5, q7, q4
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: mvn r11, #-2147483648
+; CHECK-NEON-NEXT: subs r0, r0, r11
+; CHECK-NEON-NEXT: mvn r5, #0
; CHECK-NEON-NEXT: sbcs r0, r1, #0
+; CHECK-NEON-NEXT: vmov.32 d11[1], r6
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: vmov.32 d10[1], r1
+; CHECK-NEON-NEXT: movlt r0, r5
+; CHECK-NEON-NEXT: subs r1, r7, r11
+; CHECK-NEON-NEXT: sbcs r1, r6, #0
; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: mov r0, r8
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: movwlt r1, #1
+; CHECK-NEON-NEXT: movlt r1, r5
+; CHECK-NEON-NEXT: vdup.32 d17, r1
+; CHECK-NEON-NEXT: vdup.32 d16, r0
+; CHECK-NEON-NEXT: subs r0, r10, r11
+; CHECK-NEON-NEXT: vbif q5, q4, q8
+; CHECK-NEON-NEXT: sbcs r0, r9, #0
; CHECK-NEON-NEXT: movwlt r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: vmov r11, r10, d10
-; CHECK-NEON-NEXT: vmov.32 d13[1], r1
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: vmov r5, r4, d11
+; CHECK-NEON-NEXT: mov r0, r8
+; CHECK-NEON-NEXT: movlt r6, r5
+; CHECK-NEON-NEXT: vmov r7, r10, d10
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: subs r2, r0, r11
+; CHECK-NEON-NEXT: vmov.32 d13[1], r9
+; CHECK-NEON-NEXT: vmov r2, r3, d11
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
-; CHECK-NEON-NEXT: subs r0, r0, r9
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: vdup.32 d17, r6
; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: vdup.32 d17, r6
+; CHECK-NEON-NEXT: movlt r0, r5
; CHECK-NEON-NEXT: vmov.32 d12[1], r1
-; CHECK-NEON-NEXT: rsbs r3, r11, #-2147483648
+; CHECK-NEON-NEXT: rsbs r7, r7, #-2147483648
; CHECK-NEON-NEXT: vdup.32 d16, r0
-; CHECK-NEON-NEXT: mvn r0, #0
+; CHECK-NEON-NEXT: sbcs r7, r5, r10
; CHECK-NEON-NEXT: vbsl q8, q6, q4
-; CHECK-NEON-NEXT: adr r1, .LCPI6_1
-; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r1:128]
-; CHECK-NEON-NEXT: sbcs r3, r0, r10
-; CHECK-NEON-NEXT: mov r3, #0
-; CHECK-NEON-NEXT: vmov r1, r2, d17
-; CHECK-NEON-NEXT: movwlt r3, #1
-; CHECK-NEON-NEXT: cmp r3, #0
-; CHECK-NEON-NEXT: mvnne r3, #0
-; CHECK-NEON-NEXT: rsbs r6, r5, #-2147483648
-; CHECK-NEON-NEXT: sbcs r6, r0, r4
-; CHECK-NEON-NEXT: vmov r5, r4, d16
-; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: movwlt r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: vdup.32 d23, r6
-; CHECK-NEON-NEXT: vdup.32 d22, r3
+; CHECK-NEON-NEXT: adr r0, .LCPI6_1
+; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r0:128]
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: movwlt r7, #1
+; CHECK-NEON-NEXT: vmov r0, r1, d17
+; CHECK-NEON-NEXT: movlt r7, r5
+; CHECK-NEON-NEXT: rsbs r2, r2, #-2147483648
+; CHECK-NEON-NEXT: sbcs r2, r5, r3
+; CHECK-NEON-NEXT: vmov r3, r6, d16
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: movlt r2, r5
+; CHECK-NEON-NEXT: vdup.32 d23, r2
+; CHECK-NEON-NEXT: vdup.32 d22, r7
; CHECK-NEON-NEXT: vbsl q11, q5, q9
+; CHECK-NEON-NEXT: rsbs r0, r0, #-2147483648
; CHECK-NEON-NEXT: vmovn.i64 d1, q11
-; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r0, r2
-; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: rsbs r2, r5, #-2147483648
-; CHECK-NEON-NEXT: sbcs r0, r0, r4
-; CHECK-NEON-NEXT: vdup.32 d21, r1
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: mvnne r7, #0
-; CHECK-NEON-NEXT: vdup.32 d20, r7
+; CHECK-NEON-NEXT: sbcs r0, r5, r1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: movlt r0, r5
+; CHECK-NEON-NEXT: vdup.32 d21, r0
+; CHECK-NEON-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r5, r6
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: movlt r4, r5
+; CHECK-NEON-NEXT: vdup.32 d20, r4
; CHECK-NEON-NEXT: vbif q8, q9, q10
; CHECK-NEON-NEXT: vmovn.i64 d0, q8
-; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEON-NEXT: add sp, sp, #4
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEON-NEXT: .p2align 4
@@ -627,104 +592,102 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: .pad #8
+; CHECK-FP16-NEXT: sub sp, sp, #8
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
; CHECK-FP16-NEXT: vorr d8, d0, d0
-; CHECK-FP16-NEXT: vmov.u16 r8, d0[0]
; CHECK-FP16-NEXT: vmov.u16 r9, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
; CHECK-FP16-NEXT: mov r5, r1
-; CHECK-FP16-NEXT: vmov.32 d9[0], r4
+; CHECK-FP16-NEXT: vmov.32 d13[0], r4
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: adr r2, .LCPI6_0
-; CHECK-FP16-NEXT: mvn r10, #-2147483648
+; CHECK-FP16-NEXT: mvn r8, #-2147483648
; CHECK-FP16-NEXT: vld1.64 {d10, d11}, [r2:128]
-; CHECK-FP16-NEXT: subs r2, r4, r10
+; CHECK-FP16-NEXT: subs r2, r4, r8
; CHECK-FP16-NEXT: sbcs r2, r5, #0
-; CHECK-FP16-NEXT: vmov s0, r9
+; CHECK-FP16-NEXT: vmov s0, r7
+; CHECK-FP16-NEXT: vmov.32 d12[0], r0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: vmov.32 d8[0], r0
; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: subs r0, r0, r10
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: vmov.32 d13[1], r5
+; CHECK-FP16-NEXT: mvn r5, #0
+; CHECK-FP16-NEXT: movlt r2, r5
+; CHECK-FP16-NEXT: subs r0, r0, r8
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: vmov.32 d9[1], r5
+; CHECK-FP16-NEXT: vstr s0, [sp, #4] @ 4-byte Spill
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: vmov s0, r9
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: vmov.32 d8[1], r1
-; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r2
-; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: vbif q4, q5, q8
+; CHECK-FP16-NEXT: vdup.32 d9, r2
+; CHECK-FP16-NEXT: movlt r0, r5
+; CHECK-FP16-NEXT: vmov.32 d12[1], r1
+; CHECK-FP16-NEXT: vdup.32 d8, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov.32 d13[0], r0
-; CHECK-FP16-NEXT: subs r0, r0, r10
-; CHECK-FP16-NEXT: vmov s0, r8
-; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: vbif q6, q5, q4
+; CHECK-FP16-NEXT: vmov.32 d15[0], r0
+; CHECK-FP16-NEXT: subs r0, r0, r8
; CHECK-FP16-NEXT: mov r7, #0
-; CHECK-FP16-NEXT: vmov r9, r8, d8
+; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: vmov.32 d13[1], r1
-; CHECK-FP16-NEXT: vmov r5, r4, d9
-; CHECK-FP16-NEXT: mvnne r7, #0
+; CHECK-FP16-NEXT: mov r4, r1
+; CHECK-FP16-NEXT: movlt r7, r5
+; CHECK-FP16-NEXT: vmov r10, r9, d12
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov.32 d12[0], r0
-; CHECK-FP16-NEXT: subs r0, r0, r10
+; CHECK-FP16-NEXT: subs r2, r0, r8
+; CHECK-FP16-NEXT: vmov.32 d15[1], r4
+; CHECK-FP16-NEXT: vmov r2, r3, d13
+; CHECK-FP16-NEXT: vmov.32 d14[0], r0
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r7
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: vmov.32 d12[1], r1
-; CHECK-FP16-NEXT: rsbs r3, r9, #-2147483648
+; CHECK-FP16-NEXT: vdup.32 d17, r7
+; CHECK-FP16-NEXT: movlt r0, r5
+; CHECK-FP16-NEXT: vmov.32 d14[1], r1
+; CHECK-FP16-NEXT: rsbs r7, r10, #-2147483648
; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: mvn r0, #0
-; CHECK-FP16-NEXT: vbsl q8, q6, q5
-; CHECK-FP16-NEXT: adr r1, .LCPI6_1
-; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r1:128]
-; CHECK-FP16-NEXT: sbcs r3, r0, r8
-; CHECK-FP16-NEXT: mov r3, #0
-; CHECK-FP16-NEXT: vmov r1, r2, d17
-; CHECK-FP16-NEXT: movwlt r3, #1
-; CHECK-FP16-NEXT: cmp r3, #0
-; CHECK-FP16-NEXT: mvnne r3, #0
-; CHECK-FP16-NEXT: rsbs r7, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r7, r0, r4
-; CHECK-FP16-NEXT: vmov r5, r4, d16
+; CHECK-FP16-NEXT: sbcs r7, r5, r9
+; CHECK-FP16-NEXT: vbsl q8, q7, q5
+; CHECK-FP16-NEXT: adr r0, .LCPI6_1
+; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r0:128]
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: vdup.32 d23, r7
-; CHECK-FP16-NEXT: vdup.32 d22, r3
-; CHECK-FP16-NEXT: vbsl q11, q4, q9
+; CHECK-FP16-NEXT: vmov r0, r1, d17
+; CHECK-FP16-NEXT: movlt r7, r5
+; CHECK-FP16-NEXT: rsbs r2, r2, #-2147483648
+; CHECK-FP16-NEXT: sbcs r2, r5, r3
+; CHECK-FP16-NEXT: vmov r3, r4, d16
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: movlt r2, r5
+; CHECK-FP16-NEXT: vdup.32 d23, r2
+; CHECK-FP16-NEXT: vdup.32 d22, r7
+; CHECK-FP16-NEXT: vbsl q11, q6, q9
+; CHECK-FP16-NEXT: rsbs r0, r0, #-2147483648
; CHECK-FP16-NEXT: vmovn.i64 d1, q11
-; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r0, r2
-; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: mvnne r1, #0
-; CHECK-FP16-NEXT: rsbs r2, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r0, r0, r4
-; CHECK-FP16-NEXT: vdup.32 d21, r1
+; CHECK-FP16-NEXT: sbcs r0, r5, r1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: movlt r0, r5
+; CHECK-FP16-NEXT: vdup.32 d21, r0
+; CHECK-FP16-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r5, r4
; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: mvnne r6, #0
+; CHECK-FP16-NEXT: movlt r6, r5
; CHECK-FP16-NEXT: vdup.32 d20, r6
; CHECK-FP16-NEXT: vbif q8, q9, q10
; CHECK-FP16-NEXT: vmovn.i64 d0, q8
-; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-FP16-NEXT: add sp, sp, #8
+; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-FP16-NEXT: .p2align 4
; CHECK-FP16-NEXT: @ %bb.1:
@@ -788,34 +751,30 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlo r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: movlo r0, r3
; CHECK-NEON-NEXT: subs r1, r4, r3
; CHECK-NEON-NEXT: sbcs r1, r8, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlo r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: mvnne r1, #0
+; CHECK-NEON-NEXT: movlo r1, r3
; CHECK-NEON-NEXT: subs r6, r6, r3
; CHECK-NEON-NEXT: sbcs r6, r9, #0
; CHECK-NEON-NEXT: vdup.32 d19, r1
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: vdup.32 d18, r0
; CHECK-NEON-NEXT: movwlo r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: subs r3, r5, r3
-; CHECK-NEON-NEXT: sbcs r3, r7, #0
+; CHECK-NEON-NEXT: vand q10, q4, q9
+; CHECK-NEON-NEXT: movlo r6, r3
+; CHECK-NEON-NEXT: subs r5, r5, r3
+; CHECK-NEON-NEXT: sbcs r7, r7, #0
; CHECK-NEON-NEXT: vdup.32 d17, r6
; CHECK-NEON-NEXT: movwlo r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: mvnne r2, #0
-; CHECK-NEON-NEXT: vand q10, q4, q9
+; CHECK-NEON-NEXT: vorn q9, q10, q9
+; CHECK-NEON-NEXT: movlo r2, r3
; CHECK-NEON-NEXT: vdup.32 d16, r2
; CHECK-NEON-NEXT: vand q11, q6, q8
-; CHECK-NEON-NEXT: vorn q9, q10, q9
-; CHECK-NEON-NEXT: vorn q8, q11, q8
; CHECK-NEON-NEXT: vmovn.i64 d1, q9
+; CHECK-NEON-NEXT: vorn q8, q11, q8
; CHECK-NEON-NEXT: vmovn.i64 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: vpop {d12, d13}
@@ -856,34 +815,30 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlo r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: movlo r0, r3
; CHECK-FP16-NEXT: subs r1, r5, r3
; CHECK-FP16-NEXT: sbcs r1, r7, #0
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlo r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: mvnne r1, #0
+; CHECK-FP16-NEXT: movlo r1, r3
; CHECK-FP16-NEXT: subs r7, r4, r3
; CHECK-FP16-NEXT: sbcs r7, r8, #0
; CHECK-FP16-NEXT: vdup.32 d19, r1
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: vdup.32 d18, r0
; CHECK-FP16-NEXT: movwlo r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: subs r3, r6, r3
-; CHECK-FP16-NEXT: sbcs r3, r9, #0
+; CHECK-FP16-NEXT: vand q10, q4, q9
+; CHECK-FP16-NEXT: movlo r7, r3
+; CHECK-FP16-NEXT: subs r6, r6, r3
+; CHECK-FP16-NEXT: sbcs r6, r9, #0
; CHECK-FP16-NEXT: vdup.32 d17, r7
; CHECK-FP16-NEXT: movwlo r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: vand q10, q4, q9
+; CHECK-FP16-NEXT: vorn q9, q10, q9
+; CHECK-FP16-NEXT: movlo r2, r3
; CHECK-FP16-NEXT: vdup.32 d16, r2
; CHECK-FP16-NEXT: vand q11, q5, q8
-; CHECK-FP16-NEXT: vorn q9, q10, q9
-; CHECK-FP16-NEXT: vorn q8, q11, q8
; CHECK-FP16-NEXT: vmovn.i64 d1, q9
+; CHECK-FP16-NEXT: vorn q8, q11, q8
; CHECK-FP16-NEXT: vmovn.i64 d0, q8
; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
@@ -900,209 +855,200 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEON-NEXT: vmov r0, s3
; CHECK-NEON-NEXT: vmov.f32 s16, s2
; CHECK-NEON-NEXT: vmov.f32 s18, s1
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: vmov r0, s16
-; CHECK-NEON-NEXT: mov r6, r1
+; CHECK-NEON-NEXT: mov r8, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: mov r9, r1
+; CHECK-NEON-NEXT: vmov r6, s18
+; CHECK-NEON-NEXT: vmov r10, s16
+; CHECK-NEON-NEXT: vmov.32 d9[0], r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: vmov r8, s20
-; CHECK-NEON-NEXT: vmov.32 d13[0], r5
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov r2, s18
-; CHECK-NEON-NEXT: vmov.32 d12[0], r0
-; CHECK-NEON-NEXT: mvn r9, #0
-; CHECK-NEON-NEXT: subs r0, r0, r9
-; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: vmov.32 d13[1], r6
-; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: mov r7, #0
-; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: vmov.32 d12[1], r1
-; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: subs r1, r5, r9
-; CHECK-NEON-NEXT: sbcs r1, r6, #0
-; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: vdup.32 d9, r1
-; CHECK-NEON-NEXT: vdup.32 d8, r0
-; CHECK-NEON-NEXT: mov r0, r2
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: mov r0, r6
+; CHECK-NEON-NEXT: mov r5, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEON-NEXT: vbsl q4, q6, q5
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: vmov.32 d13[0], r0
-; CHECK-NEON-NEXT: mov r0, r8
-; CHECK-NEON-NEXT: mov r6, r1
-; CHECK-NEON-NEXT: vmov r4, r10, d8
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r0, r10
+; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r5, r9
-; CHECK-NEON-NEXT: vmov.32 d12[0], r0
-; CHECK-NEON-NEXT: sbcs r2, r6, #0
+; CHECK-NEON-NEXT: vmov.32 d11[1], r7
+; CHECK-NEON-NEXT: mvn r3, #0
+; CHECK-NEON-NEXT: subs r4, r4, r3
+; CHECK-NEON-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-NEON-NEXT: vmov.32 d10[1], r5
+; CHECK-NEON-NEXT: sbcs r5, r5, #0
+; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: vmov.32 d13[1], r6
-; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: mvnne r2, #0
-; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: movlt r5, r3
+; CHECK-NEON-NEXT: subs r0, r0, r3
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: vdup.32 d17, r2
+; CHECK-NEON-NEXT: vmov.32 d9[1], r9
; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: vmov.32 d12[1], r1
; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: vmov r2, r3, d9
-; CHECK-NEON-NEXT: vdup.32 d16, r0
-; CHECK-NEON-NEXT: rsbs r6, r4, #0
-; CHECK-NEON-NEXT: vbsl q8, q6, q5
-; CHECK-NEON-NEXT: rscs r6, r10, #0
-; CHECK-NEON-NEXT: mov r6, #0
-; CHECK-NEON-NEXT: movwlt r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: vmov r0, r1, d17
-; CHECK-NEON-NEXT: mvnne r6, #0
-; CHECK-NEON-NEXT: vmov r5, r4, d16
+; CHECK-NEON-NEXT: vmov.32 d8[1], r1
+; CHECK-NEON-NEXT: movlt r0, r3
+; CHECK-NEON-NEXT: subs r1, r8, r3
+; CHECK-NEON-NEXT: sbcs r1, r9, #0
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: movwlt r1, #1
+; CHECK-NEON-NEXT: movlt r1, r3
+; CHECK-NEON-NEXT: subs r6, r6, r3
+; CHECK-NEON-NEXT: sbcs r7, r7, #0
+; CHECK-NEON-NEXT: vdup.32 d19, r1
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: vdup.32 d18, r0
+; CHECK-NEON-NEXT: movwlt r7, #1
+; CHECK-NEON-NEXT: vbsl q9, q4, q8
+; CHECK-NEON-NEXT: movlt r7, r3
+; CHECK-NEON-NEXT: vdup.32 d21, r7
+; CHECK-NEON-NEXT: vdup.32 d20, r5
+; CHECK-NEON-NEXT: vmov r0, r1, d18
+; CHECK-NEON-NEXT: vbit q8, q5, q10
+; CHECK-NEON-NEXT: vmov r5, r4, d19
+; CHECK-NEON-NEXT: vmov r7, r6, d17
; CHECK-NEON-NEXT: rsbs r0, r0, #0
; CHECK-NEON-NEXT: rscs r0, r1, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: mvnne r0, #0
-; CHECK-NEON-NEXT: rsbs r1, r2, #0
-; CHECK-NEON-NEXT: rscs r1, r3, #0
-; CHECK-NEON-NEXT: vmov.32 d19[0], r0
+; CHECK-NEON-NEXT: movlt r0, r3
+; CHECK-NEON-NEXT: rsbs r1, r7, #0
+; CHECK-NEON-NEXT: rscs r1, r6, #0
+; CHECK-NEON-NEXT: vmov r7, r6, d16
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: mvnne r1, #0
-; CHECK-NEON-NEXT: rsbs r0, r5, #0
-; CHECK-NEON-NEXT: rscs r0, r4, #0
+; CHECK-NEON-NEXT: movlt r1, r3
+; CHECK-NEON-NEXT: rsbs r5, r5, #0
+; CHECK-NEON-NEXT: rscs r5, r4, #0
; CHECK-NEON-NEXT: vmov.32 d21[0], r1
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: vmov.32 d20[0], r6
-; CHECK-NEON-NEXT: mvnne r7, #0
-; CHECK-NEON-NEXT: vmov.32 d18[0], r7
-; CHECK-NEON-NEXT: vand q10, q10, q4
-; CHECK-NEON-NEXT: vand q8, q9, q8
-; CHECK-NEON-NEXT: vmovn.i64 d1, q10
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: movlt r5, r3
+; CHECK-NEON-NEXT: vmov.32 d23[0], r5
+; CHECK-NEON-NEXT: vmov.32 d22[0], r0
+; CHECK-NEON-NEXT: vand q9, q11, q9
+; CHECK-NEON-NEXT: vmovn.i64 d1, q9
+; CHECK-NEON-NEXT: rsbs r1, r7, #0
+; CHECK-NEON-NEXT: rscs r1, r6, #0
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: movlt r2, r3
+; CHECK-NEON-NEXT: vmov.32 d20[0], r2
+; CHECK-NEON-NEXT: vand q8, q10, q8
; CHECK-NEON-NEXT: vmovn.i64 d0, q8
-; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: ustest_f16i32:
-; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
; CHECK-FP16-NEXT: vorr d8, d0, d0
-; CHECK-FP16-NEXT: vmov.u16 r8, d0[0]
-; CHECK-FP16-NEXT: vmov.u16 r9, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[1]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: mov r4, r0
-; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
-; CHECK-FP16-NEXT: mov r5, r1
-; CHECK-FP16-NEXT: vmov.32 d9[0], r4
+; CHECK-FP16-NEXT: mov r8, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
+; CHECK-FP16-NEXT: mov r9, r1
+; CHECK-FP16-NEXT: vmov.32 d13[0], r8
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: mvn r10, #0
-; CHECK-FP16-NEXT: subs r2, r4, r10
-; CHECK-FP16-NEXT: sbcs r2, r5, #0
-; CHECK-FP16-NEXT: vmov.32 d8[0], r0
-; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: vmov s0, r9
-; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: subs r0, r0, r10
-; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: vmov.32 d9[1], r5
-; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: vmov.32 d8[1], r1
-; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r2
-; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: vbif q4, q5, q8
-; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov s0, r8
+; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: mov r5, r1
-; CHECK-FP16-NEXT: vmov.32 d13[0], r0
-; CHECK-FP16-NEXT: vmov r7, r8, d8
+; CHECK-FP16-NEXT: vmov.32 d10[0], r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r4, r10
-; CHECK-FP16-NEXT: vmov.32 d12[0], r0
-; CHECK-FP16-NEXT: sbcs r2, r5, #0
+; CHECK-FP16-NEXT: mov r6, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
+; CHECK-FP16-NEXT: mov r7, r1
+; CHECK-FP16-NEXT: vmov.32 d11[0], r6
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: vmov.32 d11[1], r7
+; CHECK-FP16-NEXT: mvn r3, #0
+; CHECK-FP16-NEXT: subs r4, r4, r3
+; CHECK-FP16-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-FP16-NEXT: vmov.32 d10[1], r5
+; CHECK-FP16-NEXT: sbcs r5, r5, #0
+; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: vmov.32 d13[1], r5
-; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: mvnne r2, #0
-; CHECK-FP16-NEXT: subs r0, r0, r10
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: vmov.32 d12[0], r0
+; CHECK-FP16-NEXT: movlt r5, r3
+; CHECK-FP16-NEXT: subs r0, r0, r3
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: vdup.32 d17, r2
+; CHECK-FP16-NEXT: vmov.32 d13[1], r9
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: vmov.32 d12[1], r1
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: vmov r2, r3, d9
-; CHECK-FP16-NEXT: vdup.32 d16, r0
-; CHECK-FP16-NEXT: rsbs r7, r7, #0
-; CHECK-FP16-NEXT: vbsl q8, q6, q5
-; CHECK-FP16-NEXT: rscs r7, r8, #0
+; CHECK-FP16-NEXT: vmov.32 d12[1], r1
+; CHECK-FP16-NEXT: movlt r0, r3
+; CHECK-FP16-NEXT: subs r1, r8, r3
+; CHECK-FP16-NEXT: sbcs r1, r9, #0
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: movwlt r1, #1
+; CHECK-FP16-NEXT: movlt r1, r3
+; CHECK-FP16-NEXT: subs r6, r6, r3
+; CHECK-FP16-NEXT: sbcs r7, r7, #0
+; CHECK-FP16-NEXT: vdup.32 d19, r1
; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: vdup.32 d18, r0
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: vmov r0, r1, d17
-; CHECK-FP16-NEXT: mvnne r7, #0
-; CHECK-FP16-NEXT: vmov r5, r4, d16
+; CHECK-FP16-NEXT: vbsl q9, q6, q8
+; CHECK-FP16-NEXT: movlt r7, r3
+; CHECK-FP16-NEXT: vdup.32 d21, r7
+; CHECK-FP16-NEXT: vdup.32 d20, r5
+; CHECK-FP16-NEXT: vmov r0, r1, d18
+; CHECK-FP16-NEXT: vbit q8, q5, q10
+; CHECK-FP16-NEXT: vmov r5, r4, d19
+; CHECK-FP16-NEXT: vmov r7, r6, d17
; CHECK-FP16-NEXT: rsbs r0, r0, #0
; CHECK-FP16-NEXT: rscs r0, r1, #0
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: mvnne r0, #0
-; CHECK-FP16-NEXT: rsbs r1, r2, #0
-; CHECK-FP16-NEXT: rscs r1, r3, #0
-; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: movlt r0, r3
+; CHECK-FP16-NEXT: rsbs r1, r7, #0
+; CHECK-FP16-NEXT: rscs r1, r6, #0
+; CHECK-FP16-NEXT: vmov r7, r6, d16
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: mvnne r1, #0
-; CHECK-FP16-NEXT: rsbs r0, r5, #0
-; CHECK-FP16-NEXT: rscs r0, r4, #0
+; CHECK-FP16-NEXT: movlt r1, r3
+; CHECK-FP16-NEXT: rsbs r5, r5, #0
+; CHECK-FP16-NEXT: rscs r5, r4, #0
; CHECK-FP16-NEXT: vmov.32 d21[0], r1
-; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: vmov.32 d20[0], r7
-; CHECK-FP16-NEXT: mvnne r6, #0
-; CHECK-FP16-NEXT: vmov.32 d18[0], r6
-; CHECK-FP16-NEXT: vand q10, q10, q4
-; CHECK-FP16-NEXT: vand q8, q9, q8
-; CHECK-FP16-NEXT: vmovn.i64 d1, q10
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: movlt r5, r3
+; CHECK-FP16-NEXT: vmov.32 d23[0], r5
+; CHECK-FP16-NEXT: vmov.32 d22[0], r0
+; CHECK-FP16-NEXT: vand q9, q11, q9
+; CHECK-FP16-NEXT: vmovn.i64 d1, q9
+; CHECK-FP16-NEXT: rsbs r1, r7, #0
+; CHECK-FP16-NEXT: rscs r1, r6, #0
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: movlt r2, r3
+; CHECK-FP16-NEXT: vmov.32 d20[0], r2
+; CHECK-FP16-NEXT: vand q8, q10, q8
; CHECK-FP16-NEXT: vmovn.i64 d0, q8
-; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -1617,8 +1563,8 @@ entry:
define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-LABEL: stest_f64i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
@@ -1627,55 +1573,50 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mvn r9, #0
; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: sbcs r1, r4, r8
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: sbcs r1, r2, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: mvnge r4, #-2147483648
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r1
+; CHECK-NEXT: movlt r1, r2
; CHECK-NEXT: rsbs r2, r0, #0
; CHECK-NEXT: rscs r2, r4, #-2147483648
; CHECK-NEXT: sbcs r1, r9, r1
; CHECK-NEXT: sbcs r1, r9, r3
; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: movge r4, #-2147483648
+; CHECK-NEXT: movlt r7, r0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r6, r0, r9
; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
+; CHECK-NEXT: sbcs r6, r1, r8
; CHECK-NEXT: sbcs r6, r2, #0
; CHECK-NEXT: sbcs r6, r3, #0
; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvnge r1, #-2147483648
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r6
+; CHECK-NEXT: movlt r6, r2
+; CHECK-NEXT: rsbs r2, r0, #0
+; CHECK-NEXT: rscs r2, r1, #-2147483648
+; CHECK-NEXT: sbcs r2, r9, r6
+; CHECK-NEXT: sbcs r2, r9, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: movge r1, #-2147483648
+; CHECK-NEXT: movlt r5, r0
+; CHECK-NEXT: vmov.32 d0[0], r5
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vmov.32 d0[1], r1
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -1703,17 +1644,15 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: movhs r4, r6
+; CHECK-NEXT: movlo r6, r0
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movhs r0, r5
+; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
@@ -1744,41 +1683,37 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movge r2, r8
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: moveq r4, r1
-; CHECK-NEXT: movne r1, r0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: movge r3, r1
+; CHECK-NEXT: movge r4, r1
+; CHECK-NEXT: movlt r1, r0
; CHECK-NEXT: rsbs r0, r1, #0
; CHECK-NEXT: rscs r0, r4, #0
-; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: rscs r0, r3, #0
+; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: moveq r4, r7
-; CHECK-NEXT: movne r7, r1
+; CHECK-NEXT: movge r4, r7
+; CHECK-NEXT: movlt r7, r1
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r6, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r7
; CHECK-NEXT: sbcs r6, r3, #0
+; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movlt r8, r2
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: moveq r3, r2
-; CHECK-NEXT: moveq r1, r2
-; CHECK-NEXT: movne r2, r0
-; CHECK-NEXT: rsbs r0, r2, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: movge r3, r6
+; CHECK-NEXT: movge r1, r6
+; CHECK-NEXT: movlt r6, r0
+; CHECK-NEXT: rsbs r0, r6, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: rscs r0, r8, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r2, r5
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: vmov.32 d0[0], r2
+; CHECK-NEXT: movge r6, r5
+; CHECK-NEXT: movlt r5, r1
+; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
@@ -1796,8 +1731,8 @@ entry:
define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
@@ -1806,55 +1741,50 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mvn r9, #0
; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: sbcs r1, r4, r8
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: sbcs r1, r2, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: mvnge r4, #-2147483648
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r1
+; CHECK-NEXT: movlt r1, r2
; CHECK-NEXT: rsbs r2, r0, #0
; CHECK-NEXT: rscs r2, r4, #-2147483648
; CHECK-NEXT: sbcs r1, r9, r1
; CHECK-NEXT: sbcs r1, r9, r3
; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: movge r4, #-2147483648
+; CHECK-NEXT: movlt r7, r0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r6, r0, r9
; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
+; CHECK-NEXT: sbcs r6, r1, r8
; CHECK-NEXT: sbcs r6, r2, #0
; CHECK-NEXT: sbcs r6, r3, #0
; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvnge r1, #-2147483648
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r6
+; CHECK-NEXT: movlt r6, r2
+; CHECK-NEXT: rsbs r2, r0, #0
+; CHECK-NEXT: rscs r2, r1, #-2147483648
+; CHECK-NEXT: sbcs r2, r9, r6
+; CHECK-NEXT: sbcs r2, r9, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: movge r1, #-2147483648
+; CHECK-NEXT: movlt r5, r0
+; CHECK-NEXT: vmov.32 d0[0], r5
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vmov.32 d0[1], r1
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -1882,17 +1812,15 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: movhs r4, r6
+; CHECK-NEXT: movlo r6, r0
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movhs r0, r5
+; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
@@ -1919,45 +1847,41 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: subs r1, r2, #1
; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #1
-; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: movge r2, r8
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r8, #1
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: moveq r4, r1
-; CHECK-NEXT: movne r1, r0
+; CHECK-NEXT: movge r2, r8
+; CHECK-NEXT: movge r3, r1
+; CHECK-NEXT: movge r4, r1
+; CHECK-NEXT: movlt r1, r0
; CHECK-NEXT: rsbs r0, r1, #0
; CHECK-NEXT: rscs r0, r4, #0
-; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: rscs r0, r2, #0
+; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: moveq r4, r7
-; CHECK-NEXT: movne r7, r1
+; CHECK-NEXT: movge r4, r7
+; CHECK-NEXT: movlt r7, r1
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r6, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r7
; CHECK-NEXT: sbcs r6, r3, #0
+; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movlt r8, r2
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: moveq r3, r2
-; CHECK-NEXT: moveq r1, r2
-; CHECK-NEXT: movne r2, r0
-; CHECK-NEXT: rsbs r0, r2, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: movge r3, r6
+; CHECK-NEXT: movge r1, r6
+; CHECK-NEXT: movlt r6, r0
+; CHECK-NEXT: rsbs r0, r6, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: rscs r0, r8, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r2, r5
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: vmov.32 d0[0], r2
+; CHECK-NEXT: movge r6, r5
+; CHECK-NEXT: movlt r5, r1
+; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8}
@@ -1975,14 +1899,14 @@ entry:
define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: mov r8, r0
+; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
@@ -1990,60 +1914,55 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: subs r1, r0, r9
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r4, r6
-; CHECK-NEON-NEXT: vmov s0, r8
+; CHECK-NEON-NEXT: mvn r8, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r4, r8
+; CHECK-NEON-NEXT: vmov s0, r5
; CHECK-NEON-NEXT: sbcs r1, r2, #0
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: sbcs r1, r3, #0
-; CHECK-NEON-NEXT: mov r8, #-2147483648
+; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: mvnge r4, #-2147483648
; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: moveq r3, r1
-; CHECK-NEON-NEXT: movne r1, r2
-; CHECK-NEON-NEXT: moveq r4, r6
-; CHECK-NEON-NEXT: moveq r0, r9
+; CHECK-NEON-NEXT: mvnge r0, #0
+; CHECK-NEON-NEXT: movge r3, r1
+; CHECK-NEON-NEXT: movlt r1, r2
; CHECK-NEON-NEXT: rsbs r2, r0, #0
; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648
; CHECK-NEON-NEXT: sbcs r1, r9, r1
; CHECK-NEON-NEXT: sbcs r1, r9, r3
; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: movne r5, r0
-; CHECK-NEON-NEXT: moveq r4, r8
+; CHECK-NEON-NEXT: movge r4, #-2147483648
+; CHECK-NEON-NEXT: movlt r5, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r7, r0, r9
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
-; CHECK-NEON-NEXT: sbcs r7, r1, r6
+; CHECK-NEON-NEXT: sbcs r7, r1, r8
; CHECK-NEON-NEXT: sbcs r7, r2, #0
; CHECK-NEON-NEXT: sbcs r7, r3, #0
; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: mvnge r1, #-2147483648
; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r3, r7
-; CHECK-NEON-NEXT: movne r7, r2
-; CHECK-NEON-NEXT: movne r6, r1
-; CHECK-NEON-NEXT: moveq r0, r9
-; CHECK-NEON-NEXT: rsbs r1, r0, #0
-; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r9, r7
-; CHECK-NEON-NEXT: sbcs r1, r9, r3
-; CHECK-NEON-NEXT: movwlt r10, #1
-; CHECK-NEON-NEXT: cmp r10, #0
-; CHECK-NEON-NEXT: movne r10, r0
-; CHECK-NEON-NEXT: moveq r6, r8
-; CHECK-NEON-NEXT: vmov.32 d0[0], r10
+; CHECK-NEON-NEXT: mvnge r0, #0
+; CHECK-NEON-NEXT: movge r3, r7
+; CHECK-NEON-NEXT: movlt r7, r2
+; CHECK-NEON-NEXT: rsbs r2, r0, #0
+; CHECK-NEON-NEXT: rscs r2, r1, #-2147483648
+; CHECK-NEON-NEXT: sbcs r2, r9, r7
+; CHECK-NEON-NEXT: sbcs r2, r9, r3
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: movge r1, #-2147483648
+; CHECK-NEON-NEXT: movlt r6, r0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r6
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: vmov.32 d0[1], r1
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
@@ -2051,54 +1970,49 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-FP16-NEXT: mov r4, r1
; CHECK-FP16-NEXT: mvn r9, #0
; CHECK-FP16-NEXT: subs r1, r0, r9
-; CHECK-FP16-NEXT: mvn r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r4, r5
+; CHECK-FP16-NEXT: mvn r8, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r4, r8
; CHECK-FP16-NEXT: vmov s0, r7
; CHECK-FP16-NEXT: sbcs r1, r2, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: sbcs r1, r3, #0
-; CHECK-FP16-NEXT: mov r8, #-2147483648
+; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: mov r10, #0
+; CHECK-FP16-NEXT: mvnge r4, #-2147483648
; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: moveq r3, r1
-; CHECK-FP16-NEXT: movne r1, r2
-; CHECK-FP16-NEXT: moveq r4, r5
-; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: mvnge r0, #0
+; CHECK-FP16-NEXT: movge r3, r1
+; CHECK-FP16-NEXT: movlt r1, r2
; CHECK-FP16-NEXT: rsbs r2, r0, #0
; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648
; CHECK-FP16-NEXT: sbcs r1, r9, r1
; CHECK-FP16-NEXT: sbcs r1, r9, r3
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: movne r7, r0
-; CHECK-FP16-NEXT: moveq r4, r8
+; CHECK-FP16-NEXT: movge r4, #-2147483648
+; CHECK-FP16-NEXT: movlt r7, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: subs r6, r0, r9
; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: sbcs r6, r1, r5
+; CHECK-FP16-NEXT: sbcs r6, r1, r8
; CHECK-FP16-NEXT: sbcs r6, r2, #0
; CHECK-FP16-NEXT: sbcs r6, r3, #0
; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: mvnge r1, #-2147483648
; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r3, r6
-; CHECK-FP16-NEXT: movne r6, r2
-; CHECK-FP16-NEXT: movne r5, r1
-; CHECK-FP16-NEXT: moveq r0, r9
-; CHECK-FP16-NEXT: rsbs r1, r0, #0
-; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r9, r6
-; CHECK-FP16-NEXT: sbcs r1, r9, r3
-; CHECK-FP16-NEXT: movwlt r10, #1
-; CHECK-FP16-NEXT: cmp r10, #0
-; CHECK-FP16-NEXT: movne r10, r0
-; CHECK-FP16-NEXT: moveq r5, r8
-; CHECK-FP16-NEXT: vmov.32 d0[0], r10
+; CHECK-FP16-NEXT: mvnge r0, #0
+; CHECK-FP16-NEXT: movge r3, r6
+; CHECK-FP16-NEXT: movlt r6, r2
+; CHECK-FP16-NEXT: rsbs r2, r0, #0
+; CHECK-FP16-NEXT: rscs r2, r1, #-2147483648
+; CHECK-FP16-NEXT: sbcs r2, r9, r6
+; CHECK-FP16-NEXT: sbcs r2, r9, r3
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: movge r1, #-2147483648
+; CHECK-FP16-NEXT: movlt r5, r0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r5
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT: vmov.32 d0[1], r1
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
@@ -2131,17 +2045,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: movwlo r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: moveq r4, r5
-; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: movhs r4, r5
+; CHECK-NEON-NEXT: movlo r5, r0
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: subs r2, r2, #1
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: movwlo r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: moveq r0, r6
-; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: movhs r0, r6
+; CHECK-NEON-NEXT: movlo r6, r1
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
; CHECK-NEON-NEXT: vmov.32 d0[1], r6
@@ -2163,17 +2075,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlo r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r4, r6
-; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: movhs r4, r6
+; CHECK-FP16-NEXT: movlo r6, r0
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: subs r2, r2, #1
; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: sbcs r2, r3, #0
; CHECK-FP16-NEXT: movwlo r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: moveq r0, r5
-; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: movhs r0, r5
+; CHECK-FP16-NEXT: movlo r5, r1
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
; CHECK-FP16-NEXT: vmov.32 d0[1], r5
@@ -2204,47 +2114,43 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: subs r1, r2, #1
; CHECK-NEON-NEXT: sbcs r1, r3, #0
-; CHECK-NEON-NEXT: mov r8, #1
+; CHECK-NEON-NEXT: vmov s0, r5
; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: movge r2, r8
+; CHECK-NEON-NEXT: mov r5, #1
; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: moveq r3, r1
-; CHECK-NEON-NEXT: moveq r4, r1
-; CHECK-NEON-NEXT: movne r1, r0
+; CHECK-NEON-NEXT: movge r2, r5
+; CHECK-NEON-NEXT: movge r3, r1
+; CHECK-NEON-NEXT: movge r4, r1
+; CHECK-NEON-NEXT: movlt r1, r0
; CHECK-NEON-NEXT: rsbs r0, r1, #0
; CHECK-NEON-NEXT: rscs r0, r4, #0
-; CHECK-NEON-NEXT: vmov s0, r5
-; CHECK-NEON-NEXT: rscs r0, r2, #0
; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: rscs r0, r2, #0
+; CHECK-NEON-NEXT: mov r8, #0
; CHECK-NEON-NEXT: rscs r0, r3, #0
-; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r4, r7
-; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: movge r4, r7
+; CHECK-NEON-NEXT: movlt r7, r1
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r6, r2, #1
; CHECK-NEON-NEXT: vmov.32 d1[0], r7
; CHECK-NEON-NEXT: sbcs r6, r3, #0
-; CHECK-NEON-NEXT: movlt r8, r2
-; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: moveq r3, r2
-; CHECK-NEON-NEXT: moveq r1, r2
-; CHECK-NEON-NEXT: movne r2, r0
-; CHECK-NEON-NEXT: rsbs r0, r2, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: movlt r5, r2
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: movge r3, r6
+; CHECK-NEON-NEXT: movge r1, r6
+; CHECK-NEON-NEXT: movlt r6, r0
+; CHECK-NEON-NEXT: rsbs r0, r6, #0
; CHECK-NEON-NEXT: rscs r0, r1, #0
-; CHECK-NEON-NEXT: rscs r0, r8, #0
+; CHECK-NEON-NEXT: rscs r0, r5, #0
; CHECK-NEON-NEXT: rscs r0, r3, #0
-; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: moveq r2, r5
-; CHECK-NEON-NEXT: movne r5, r1
-; CHECK-NEON-NEXT: vmov.32 d0[0], r2
+; CHECK-NEON-NEXT: movwlt r8, #1
+; CHECK-NEON-NEXT: movge r6, r8
+; CHECK-NEON-NEXT: movlt r8, r1
+; CHECK-NEON-NEXT: vmov.32 d0[0], r6
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r5
+; CHECK-NEON-NEXT: vmov.32 d0[1], r8
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
;
@@ -2253,53 +2159,49 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
-; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: mov r4, r1
; CHECK-FP16-NEXT: subs r1, r2, #1
; CHECK-FP16-NEXT: sbcs r1, r3, #0
-; CHECK-FP16-NEXT: mov r8, #1
+; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: movge r2, r8
+; CHECK-FP16-NEXT: mov r6, #1
; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: moveq r3, r1
-; CHECK-FP16-NEXT: moveq r4, r1
-; CHECK-FP16-NEXT: movne r1, r0
+; CHECK-FP16-NEXT: movge r2, r6
+; CHECK-FP16-NEXT: movge r3, r1
+; CHECK-FP16-NEXT: movge r4, r1
+; CHECK-FP16-NEXT: movlt r1, r0
; CHECK-FP16-NEXT: rsbs r0, r1, #0
; CHECK-FP16-NEXT: rscs r0, r4, #0
-; CHECK-FP16-NEXT: vmov s0, r5
-; CHECK-FP16-NEXT: rscs r0, r2, #0
; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: rscs r0, r2, #0
+; CHECK-FP16-NEXT: mov r8, #0
; CHECK-FP16-NEXT: rscs r0, r3, #0
-; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: moveq r4, r7
-; CHECK-FP16-NEXT: movne r7, r1
+; CHECK-FP16-NEXT: movge r4, r7
+; CHECK-FP16-NEXT: movlt r7, r1
; CHECK-FP16-NEXT: bl __fixhfti
-; CHECK-FP16-NEXT: subs r6, r2, #1
+; CHECK-FP16-NEXT: subs r5, r2, #1
; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: sbcs r6, r3, #0
-; CHECK-FP16-NEXT: movlt r8, r2
-; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: moveq r3, r2
-; CHECK-FP16-NEXT: moveq r1, r2
-; CHECK-FP16-NEXT: movne r2, r0
-; CHECK-FP16-NEXT: rsbs r0, r2, #0
+; CHECK-FP16-NEXT: sbcs r5, r3, #0
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movlt r6, r2
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: movge r3, r5
+; CHECK-FP16-NEXT: movge r1, r5
+; CHECK-FP16-NEXT: movlt r5, r0
+; CHECK-FP16-NEXT: rsbs r0, r5, #0
; CHECK-FP16-NEXT: rscs r0, r1, #0
-; CHECK-FP16-NEXT: rscs r0, r8, #0
+; CHECK-FP16-NEXT: rscs r0, r6, #0
; CHECK-FP16-NEXT: rscs r0, r3, #0
-; CHECK-FP16-NEXT: movwlt r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: moveq r2, r5
-; CHECK-FP16-NEXT: movne r5, r1
-; CHECK-FP16-NEXT: vmov.32 d0[0], r2
+; CHECK-FP16-NEXT: movwlt r8, #1
+; CHECK-FP16-NEXT: movge r5, r8
+; CHECK-FP16-NEXT: movlt r8, r1
+; CHECK-FP16-NEXT: vmov.32 d0[0], r5
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: vmov.32 d0[1], r8
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
@@ -2326,34 +2228,32 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: vmov r0, r1, d9
-; CHECK-NEXT: mvn r6, #-2147483648
-; CHECK-NEXT: subs r2, r4, r6
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: sbcs r2, r8, #0
-; CHECK-NEXT: mov r7, #0
-; CHECK-NEXT: movge r4, r6
-; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: vmov r0, r2, d9
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: subs r3, r4, r5
+; CHECK-NEXT: sbcs r3, r1, #0
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: movge r4, r5
+; CHECK-NEXT: movwlt r3, #1
+; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: movlt r3, r1
+; CHECK-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-NEXT: sbcs r1, r7, r3
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movge r4, r8
+; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: subs r2, r0, r6
-; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r6, r0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: mov r0, #-2147483648
-; CHECK-NEXT: movne r7, r1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r5, r8
-; CHECK-NEXT: rsbs r2, r4, #-2147483648
-; CHECK-NEXT: mvn r1, #0
-; CHECK-NEXT: sbcs r2, r1, r5
-; CHECK-NEXT: movge r4, r0
-; CHECK-NEXT: rsbs r2, r6, #-2147483648
+; CHECK-NEXT: subs r2, r0, r5
; CHECK-NEXT: vmov.32 d0[0], r4
-; CHECK-NEXT: sbcs r1, r1, r7
-; CHECK-NEXT: movge r6, r0
-; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: sbcs r2, r1, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: movlt r5, r0
+; CHECK-NEXT: movlt r6, r1
+; CHECK-NEXT: rsbs r0, r5, #-2147483648
+; CHECK-NEXT: sbcs r0, r7, r6
+; CHECK-NEXT: movge r5, r8
+; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
@@ -2406,38 +2306,34 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov r2, r12, d9
-; CHECK-NEXT: mvn r4, #0
-; CHECK-NEXT: subs r5, r0, r4
-; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: sbcs r5, r1, #0
+; CHECK-NEXT: mvn r5, #0
+; CHECK-NEXT: subs r3, r0, r5
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movge r0, r4
+; CHECK-NEXT: sbcs r3, r1, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: movge r0, r5
; CHECK-NEXT: movwlt r3, #1
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movne r3, r1
+; CHECK-NEXT: movlt r3, r1
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r3, #0
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: movlt r6, r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r12
; CHECK-NEXT: bl __aeabi_d2lz
-; CHECK-NEXT: subs r2, r0, r4
+; CHECK-NEXT: subs r2, r0, r5
; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r4, r0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: movne r0, r1
-; CHECK-NEXT: rsbs r1, r4, #0
-; CHECK-NEXT: rscs r0, r0, #0
-; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r5, r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r5, r0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: movlt r2, r1
+; CHECK-NEXT: rsbs r0, r5, #0
+; CHECK-NEXT: rscs r0, r2, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: movlt r4, r5
+; CHECK-NEXT: vmov.32 d0[1], r4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
@@ -2451,81 +2347,68 @@ entry:
define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: stest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: vorr q4, q0, q0
-; CHECK-NEXT: vmov r0, s19
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: vmov r2, s18
-; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: vmov r0, s17
-; CHECK-NEXT: mvn r6, #-2147483648
-; CHECK-NEXT: mov r3, #-2147483648
-; CHECK-NEXT: mvn r10, #0
-; CHECK-NEXT: vmov r7, s16
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: subs r2, r11, r6
-; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movge r11, r6
-; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: movne r2, r1
-; CHECK-NEXT: rsbs r1, r11, #-2147483648
-; CHECK-NEXT: sbcs r1, r10, r2
-; CHECK-NEXT: movge r11, r3
+; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mvn r9, #0
+; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r5, s16
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mvn r7, #-2147483648
+; CHECK-NEXT: subs r0, r0, r7
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movge r4, r7
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movlt r0, r1
+; CHECK-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-NEXT: sbcs r0, r9, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: movge r4, r8
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: subs r0, r0, r6
+; CHECK-NEXT: subs r0, r0, r7
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: mov r9, #0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: movge r5, r6
-; CHECK-NEXT: movwlt r9, #1
-; CHECK-NEXT: cmp r9, #0
-; CHECK-NEXT: movne r9, r1
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: movlt r2, r1
+; CHECK-NEXT: movge r5, r7
+; CHECK-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-NEXT: sbcs r1, r9, r2
+; CHECK-NEXT: movge r5, r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: subs r0, r0, r6
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: subs r0, r0, r7
; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: mov r8, #0
-; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: movge r7, r6
-; CHECK-NEXT: movwlt r8, #1
-; CHECK-NEXT: cmp r8, #0
-; CHECK-NEXT: movne r8, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movge r6, r7
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movlt r0, r1
+; CHECK-NEXT: rsbs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r9, r0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: movge r6, r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
+; CHECK-NEXT: subs r2, r0, r7
+; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r6, r0
-; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: movne r4, r1
-; CHECK-NEXT: rsbs r0, r6, #-2147483648
-; CHECK-NEXT: sbcs r0, r10, r4
-; CHECK-NEXT: mov r1, #-2147483648
-; CHECK-NEXT: movge r6, r1
+; CHECK-NEXT: movwlt r10, #1
+; CHECK-NEXT: movlt r7, r0
+; CHECK-NEXT: movlt r10, r1
; CHECK-NEXT: rsbs r0, r7, #-2147483648
-; CHECK-NEXT: sbcs r0, r10, r8
-; CHECK-NEXT: vmov.32 d1[0], r6
-; CHECK-NEXT: movge r7, r1
-; CHECK-NEXT: rsbs r0, r5, #-2147483648
-; CHECK-NEXT: vmov.32 d0[0], r7
-; CHECK-NEXT: sbcs r0, r10, r9
-; CHECK-NEXT: movge r5, r1
-; CHECK-NEXT: vmov.32 d1[1], r11
-; CHECK-NEXT: vmov.32 d0[1], r5
-; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: vmov.32 d0[0], r5
+; CHECK-NEXT: sbcs r0, r9, r10
+; CHECK-NEXT: movge r7, r8
+; CHECK-NEXT: vmov.32 d1[1], r4
+; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: add sp, sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2591,70 +2474,62 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: vmov r2, s16
-; CHECK-NEXT: mvn r6, #0
-; CHECK-NEXT: subs r3, r0, r6
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: subs r3, r0, r7
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: sbcs r3, r1, #0
-; CHECK-NEXT: vmov r8, s17
+; CHECK-NEXT: mov r10, #0
; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: movge r0, r6
+; CHECK-NEXT: movge r0, r7
; CHECK-NEXT: movwlt r3, #1
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: movne r3, r1
+; CHECK-NEXT: vmov r9, s18
+; CHECK-NEXT: movlt r3, r1
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r3, #0
-; CHECK-NEXT: vmov r9, s18
+; CHECK-NEXT: vmov r8, s17
; CHECK-NEXT: movwlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: movne r4, r0
-; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: movlt r4, r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
+; CHECK-NEXT: subs r2, r0, r7
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: sbcs r2, r1, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movge r0, r6
+; CHECK-NEXT: movge r0, r7
; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: movne r2, r1
+; CHECK-NEXT: movlt r2, r1
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r2, #0
; CHECK-NEXT: movwlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r5, r0
+; CHECK-NEXT: movlt r5, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
-; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: subs r2, r0, r7
+; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: sbcs r2, r1, #0
; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movge r0, r6
+; CHECK-NEXT: movge r0, r7
; CHECK-NEXT: movwlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: movne r2, r1
+; CHECK-NEXT: movlt r2, r1
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r2, #0
-; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: movlt r6, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs r2, r0, r6
-; CHECK-NEXT: vmov.32 d1[0], r7
+; CHECK-NEXT: subs r2, r0, r7
+; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r1, #0
-; CHECK-NEXT: movlt r6, r0
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: movne r0, r1
-; CHECK-NEXT: rsbs r1, r6, #0
-; CHECK-NEXT: rscs r0, r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r7, r0
+; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: vmov.32 d0[0], r5
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
+; CHECK-NEXT: movlt r2, r1
+; CHECK-NEXT: rsbs r0, r7, #0
+; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: movne r10, r6
+; CHECK-NEXT: movwlt r10, #1
+; CHECK-NEXT: movlt r10, r7
; CHECK-NEXT: vmov.32 d0[1], r10
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -2669,164 +2544,142 @@ entry:
define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT: .pad #4
-; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
; CHECK-NEON-NEXT: vpush {d8, d9, d10}
-; CHECK-NEON-NEXT: .pad #8
-; CHECK-NEON-NEXT: sub sp, sp, #8
; CHECK-NEON-NEXT: vmov r0, s3
-; CHECK-NEON-NEXT: vmov.f32 s16, s2
-; CHECK-NEON-NEXT: vmov.f32 s18, s1
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: vmov r2, s16
-; CHECK-NEON-NEXT: mov r11, r0
-; CHECK-NEON-NEXT: vmov r0, s18
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: mov r3, #-2147483648
-; CHECK-NEON-NEXT: mvn r10, #0
-; CHECK-NEON-NEXT: vmov r7, s20
-; CHECK-NEON-NEXT: mov r4, #0
-; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEON-NEXT: subs r2, r11, r6
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: subs r2, r4, r7
; CHECK-NEON-NEXT: sbcs r2, r1, #0
+; CHECK-NEON-NEXT: mov r8, #-2147483648
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movge r11, r6
+; CHECK-NEON-NEXT: movge r4, r7
; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: movne r2, r1
-; CHECK-NEON-NEXT: rsbs r1, r11, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r10, r2
-; CHECK-NEON-NEXT: movge r11, r3
+; CHECK-NEON-NEXT: mvn r9, #0
+; CHECK-NEON-NEXT: movlt r2, r1
+; CHECK-NEON-NEXT: rsbs r1, r4, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r9, r2
+; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: movge r4, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: subs r0, r0, r6
+; CHECK-NEON-NEXT: subs r0, r0, r7
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: mov r8, #0
-; CHECK-NEON-NEXT: mov r0, r7
-; CHECK-NEON-NEXT: movge r5, r6
-; CHECK-NEON-NEXT: movwlt r8, #1
-; CHECK-NEON-NEXT: cmp r8, #0
-; CHECK-NEON-NEXT: movne r8, r1
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: movlt r2, r1
+; CHECK-NEON-NEXT: movge r5, r7
+; CHECK-NEON-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r9, r2
+; CHECK-NEON-NEXT: movge r5, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: mov r7, r0
-; CHECK-NEON-NEXT: subs r0, r0, r6
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: subs r0, r0, r7
; CHECK-NEON-NEXT: sbcs r0, r1, #0
-; CHECK-NEON-NEXT: mov r9, #0
-; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEON-NEXT: movge r7, r6
-; CHECK-NEON-NEXT: movwlt r9, #1
-; CHECK-NEON-NEXT: cmp r9, #0
-; CHECK-NEON-NEXT: movne r9, r1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movge r6, r7
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: movlt r0, r1
+; CHECK-NEON-NEXT: rsbs r1, r6, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r9, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: movge r6, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
+; CHECK-NEON-NEXT: subs r2, r0, r7
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r1, #0
-; CHECK-NEON-NEXT: movlt r6, r0
-; CHECK-NEON-NEXT: movwlt r4, #1
-; CHECK-NEON-NEXT: cmp r4, #0
-; CHECK-NEON-NEXT: movne r4, r1
-; CHECK-NEON-NEXT: rsbs r0, r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r0, r10, r4
-; CHECK-NEON-NEXT: mov r1, #-2147483648
-; CHECK-NEON-NEXT: movge r6, r1
+; CHECK-NEON-NEXT: movwlt r10, #1
+; CHECK-NEON-NEXT: movlt r7, r0
+; CHECK-NEON-NEXT: movlt r10, r1
; CHECK-NEON-NEXT: rsbs r0, r7, #-2147483648
-; CHECK-NEON-NEXT: sbcs r0, r10, r9
-; CHECK-NEON-NEXT: vmov.32 d1[0], r6
-; CHECK-NEON-NEXT: movge r7, r1
-; CHECK-NEON-NEXT: rsbs r0, r5, #-2147483648
-; CHECK-NEON-NEXT: vmov.32 d0[0], r7
-; CHECK-NEON-NEXT: sbcs r0, r10, r8
-; CHECK-NEON-NEXT: movge r5, r1
-; CHECK-NEON-NEXT: vmov.32 d1[1], r11
-; CHECK-NEON-NEXT: vmov.32 d0[1], r5
-; CHECK-NEON-NEXT: add sp, sp, #8
+; CHECK-NEON-NEXT: vmov.32 d0[0], r5
+; CHECK-NEON-NEXT: sbcs r0, r9, r10
+; CHECK-NEON-NEXT: movge r7, r8
+; CHECK-NEON-NEXT: vmov.32 d1[1], r4
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
-; CHECK-NEON-NEXT: add sp, sp, #4
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: stest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP16-NEXT: .pad #4
-; CHECK-FP16-NEXT: sub sp, sp, #4
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: .vsave {d8, d9}
; CHECK-FP16-NEXT: vpush {d8, d9}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
-; CHECK-FP16-NEXT: vmov.u16 r4, d0[2]
-; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
-; CHECK-FP16-NEXT: vmov.u16 r6, d0[1]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r5, d0[2]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: mov r10, r0
+; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: mvn r7, #-2147483648
; CHECK-FP16-NEXT: subs r0, r0, r7
; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: sbcs r0, r1, #0
-; CHECK-FP16-NEXT: mov r2, #-2147483648
+; CHECK-FP16-NEXT: mov r8, #-2147483648
; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: movge r10, r7
+; CHECK-FP16-NEXT: movge r4, r7
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: movne r0, r1
-; CHECK-FP16-NEXT: rsbs r1, r10, #-2147483648
; CHECK-FP16-NEXT: mvn r9, #0
+; CHECK-FP16-NEXT: movlt r0, r1
+; CHECK-FP16-NEXT: rsbs r1, r4, #-2147483648
; CHECK-FP16-NEXT: sbcs r0, r9, r0
-; CHECK-FP16-NEXT: vmov s16, r4
-; CHECK-FP16-NEXT: mov r11, #0
+; CHECK-FP16-NEXT: mov r10, #0
; CHECK-FP16-NEXT: vmov s18, r5
-; CHECK-FP16-NEXT: movge r10, r2
+; CHECK-FP16-NEXT: movge r4, r8
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.f32 s0, s18
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: subs r0, r0, r7
-; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movge r5, r7
-; CHECK-FP16-NEXT: movwlt r4, #1
-; CHECK-FP16-NEXT: cmp r4, #0
-; CHECK-FP16-NEXT: movne r4, r1
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: movlt r0, r1
+; CHECK-FP16-NEXT: rsbs r1, r5, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r9, r0
+; CHECK-FP16-NEXT: movge r5, r8
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: mov r6, r0
; CHECK-FP16-NEXT: subs r0, r0, r7
-; CHECK-FP16-NEXT: mov r8, #0
; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movge r6, r7
-; CHECK-FP16-NEXT: movwlt r8, #1
-; CHECK-FP16-NEXT: cmp r8, #0
-; CHECK-FP16-NEXT: movne r8, r1
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: movlt r0, r1
+; CHECK-FP16-NEXT: vmov.u16 r1, d8[1]
+; CHECK-FP16-NEXT: vmov s0, r1
+; CHECK-FP16-NEXT: rsbs r1, r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r9, r0
+; CHECK-FP16-NEXT: movge r6, r8
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: subs r2, r0, r7
+; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: sbcs r2, r1, #0
+; CHECK-FP16-NEXT: movwlt r10, #1
; CHECK-FP16-NEXT: movlt r7, r0
-; CHECK-FP16-NEXT: movwlt r11, #1
-; CHECK-FP16-NEXT: cmp r11, #0
-; CHECK-FP16-NEXT: movne r11, r1
+; CHECK-FP16-NEXT: movlt r10, r1
; CHECK-FP16-NEXT: rsbs r0, r7, #-2147483648
-; CHECK-FP16-NEXT: sbcs r0, r9, r11
-; CHECK-FP16-NEXT: mov r1, #-2147483648
-; CHECK-FP16-NEXT: movge r7, r1
-; CHECK-FP16-NEXT: rsbs r0, r6, #-2147483648
-; CHECK-FP16-NEXT: sbcs r0, r9, r8
-; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: movge r6, r1
-; CHECK-FP16-NEXT: rsbs r0, r5, #-2147483648
-; CHECK-FP16-NEXT: vmov.32 d0[0], r6
-; CHECK-FP16-NEXT: sbcs r0, r9, r4
-; CHECK-FP16-NEXT: movge r5, r1
-; CHECK-FP16-NEXT: vmov.32 d1[1], r10
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: vmov.32 d0[0], r5
+; CHECK-FP16-NEXT: sbcs r0, r9, r10
+; CHECK-FP16-NEXT: movge r7, r8
+; CHECK-FP16-NEXT: vmov.32 d1[1], r4
+; CHECK-FP16-NEXT: vmov.32 d0[1], r7
; CHECK-FP16-NEXT: vpop {d8, d9}
-; CHECK-FP16-NEXT: add sp, sp, #4
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2944,73 +2797,65 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: vmov r2, s20
-; CHECK-NEON-NEXT: mvn r6, #0
-; CHECK-NEON-NEXT: subs r3, r0, r6
+; CHECK-NEON-NEXT: mvn r7, #0
+; CHECK-NEON-NEXT: subs r3, r0, r7
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: sbcs r3, r1, #0
-; CHECK-NEON-NEXT: vmov r8, s18
+; CHECK-NEON-NEXT: mov r10, #0
; CHECK-NEON-NEXT: mov r3, #0
-; CHECK-NEON-NEXT: movge r0, r6
+; CHECK-NEON-NEXT: movge r0, r7
; CHECK-NEON-NEXT: movwlt r3, #1
-; CHECK-NEON-NEXT: cmp r3, #0
-; CHECK-NEON-NEXT: movne r3, r1
+; CHECK-NEON-NEXT: vmov r8, s18
+; CHECK-NEON-NEXT: movlt r3, r1
; CHECK-NEON-NEXT: rsbs r1, r0, #0
; CHECK-NEON-NEXT: rscs r1, r3, #0
; CHECK-NEON-NEXT: vmov r9, s16
; CHECK-NEON-NEXT: movwlt r4, #1
-; CHECK-NEON-NEXT: cmp r4, #0
-; CHECK-NEON-NEXT: movne r4, r0
-; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: movlt r4, r0
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
+; CHECK-NEON-NEXT: subs r2, r0, r7
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: sbcs r2, r1, #0
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movge r0, r6
+; CHECK-NEON-NEXT: movge r0, r7
; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: movne r2, r1
+; CHECK-NEON-NEXT: movlt r2, r1
; CHECK-NEON-NEXT: rsbs r1, r0, #0
; CHECK-NEON-NEXT: rscs r1, r2, #0
; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: movlt r5, r0
; CHECK-NEON-NEXT: mov r0, r9
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
-; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: subs r2, r0, r7
+; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: sbcs r2, r1, #0
; CHECK-NEON-NEXT: mov r2, #0
-; CHECK-NEON-NEXT: movge r0, r6
+; CHECK-NEON-NEXT: movge r0, r7
; CHECK-NEON-NEXT: movwlt r2, #1
-; CHECK-NEON-NEXT: cmp r2, #0
-; CHECK-NEON-NEXT: movne r2, r1
+; CHECK-NEON-NEXT: movlt r2, r1
; CHECK-NEON-NEXT: rsbs r1, r0, #0
; CHECK-NEON-NEXT: rscs r1, r2, #0
-; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: movne r7, r0
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: movlt r6, r0
; CHECK-NEON-NEXT: mov r0, r8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
-; CHECK-NEON-NEXT: subs r2, r0, r6
-; CHECK-NEON-NEXT: vmov.32 d1[0], r7
+; CHECK-NEON-NEXT: subs r2, r0, r7
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r1, #0
-; CHECK-NEON-NEXT: movlt r6, r0
-; CHECK-NEON-NEXT: mov r0, #0
-; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: movne r0, r1
-; CHECK-NEON-NEXT: rsbs r1, r6, #0
-; CHECK-NEON-NEXT: rscs r0, r0, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movlt r7, r0
+; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: vmov.32 d0[0], r5
-; CHECK-NEON-NEXT: movwlt r10, #1
-; CHECK-NEON-NEXT: cmp r10, #0
+; CHECK-NEON-NEXT: movlt r2, r1
+; CHECK-NEON-NEXT: rsbs r0, r7, #0
+; CHECK-NEON-NEXT: rscs r0, r2, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: movne r10, r6
+; CHECK-NEON-NEXT: movwlt r10, #1
+; CHECK-NEON-NEXT: movlt r10, r7
; CHECK-NEON-NEXT: vmov.32 d0[1], r10
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
@@ -3023,75 +2868,67 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-FP16-NEXT: vpush {d8, d9}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
; CHECK-FP16-NEXT: vorr d8, d0, d0
-; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[0]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[2]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.u16 r2, d8[1]
-; CHECK-FP16-NEXT: mvn r4, #0
-; CHECK-FP16-NEXT: vmov.u16 r3, d8[2]
-; CHECK-FP16-NEXT: vmov s0, r5
+; CHECK-FP16-NEXT: mvn r5, #0
+; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: mov r8, #0
+; CHECK-FP16-NEXT: vmov s18, r7
; CHECK-FP16-NEXT: vmov s16, r2
-; CHECK-FP16-NEXT: subs r2, r0, r4
+; CHECK-FP16-NEXT: subs r2, r0, r5
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: vmov s18, r3
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movge r0, r4
+; CHECK-FP16-NEXT: movge r0, r5
; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: movlt r2, r1
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: movlt r6, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r0, r4
+; CHECK-FP16-NEXT: subs r2, r0, r5
; CHECK-FP16-NEXT: vmov.f32 s0, s18
; CHECK-FP16-NEXT: sbcs r2, r1, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movge r0, r4
+; CHECK-FP16-NEXT: movge r0, r5
; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: movlt r2, r1
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: movne r7, r0
+; CHECK-FP16-NEXT: movlt r7, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r0, r4
+; CHECK-FP16-NEXT: subs r2, r0, r5
; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: mov r2, #0
-; CHECK-FP16-NEXT: movge r0, r4
+; CHECK-FP16-NEXT: movge r0, r5
; CHECK-FP16-NEXT: movwlt r2, #1
-; CHECK-FP16-NEXT: cmp r2, #0
-; CHECK-FP16-NEXT: movne r2, r1
+; CHECK-FP16-NEXT: movlt r2, r1
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
-; CHECK-FP16-NEXT: movwlt r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: movne r5, r0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: movlt r4, r0
; CHECK-FP16-NEXT: bl __fixhfdi
-; CHECK-FP16-NEXT: subs r2, r0, r4
-; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: subs r2, r0, r5
+; CHECK-FP16-NEXT: vmov.32 d1[0], r4
; CHECK-FP16-NEXT: sbcs r2, r1, #0
-; CHECK-FP16-NEXT: movlt r4, r0
-; CHECK-FP16-NEXT: mov r0, #0
-; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: movne r0, r1
-; CHECK-FP16-NEXT: rsbs r1, r4, #0
-; CHECK-FP16-NEXT: rscs r0, r0, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movlt r5, r0
+; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: vmov.32 d0[0], r7
-; CHECK-FP16-NEXT: movwlt r8, #1
-; CHECK-FP16-NEXT: cmp r8, #0
+; CHECK-FP16-NEXT: movlt r2, r1
+; CHECK-FP16-NEXT: rsbs r0, r5, #0
+; CHECK-FP16-NEXT: rscs r0, r2, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r6
-; CHECK-FP16-NEXT: movne r8, r4
+; CHECK-FP16-NEXT: movwlt r8, #1
+; CHECK-FP16-NEXT: movlt r8, r5
; CHECK-FP16-NEXT: vmov.32 d0[1], r8
; CHECK-FP16-NEXT: vpop {d8, d9}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc}
@@ -3592,8 +3429,8 @@ entry:
define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: stest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
@@ -3602,55 +3439,50 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mvn r9, #0
; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: sbcs r1, r4, r8
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: sbcs r1, r2, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: mvnge r4, #-2147483648
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r1
+; CHECK-NEXT: movlt r1, r2
; CHECK-NEXT: rsbs r2, r0, #0
; CHECK-NEXT: rscs r2, r4, #-2147483648
; CHECK-NEXT: sbcs r1, r9, r1
; CHECK-NEXT: sbcs r1, r9, r3
; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: movge r4, #-2147483648
+; CHECK-NEXT: movlt r7, r0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r6, r0, r9
; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
+; CHECK-NEXT: sbcs r6, r1, r8
; CHECK-NEXT: sbcs r6, r2, #0
; CHECK-NEXT: sbcs r6, r3, #0
; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvnge r1, #-2147483648
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r6
+; CHECK-NEXT: movlt r6, r2
+; CHECK-NEXT: rsbs r2, r0, #0
+; CHECK-NEXT: rscs r2, r1, #-2147483648
+; CHECK-NEXT: sbcs r2, r9, r6
+; CHECK-NEXT: sbcs r2, r9, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: movge r1, #-2147483648
+; CHECK-NEXT: movlt r5, r0
+; CHECK-NEXT: vmov.32 d0[0], r5
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vmov.32 d0[1], r1
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -3676,17 +3508,15 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: movhs r4, r6
+; CHECK-NEXT: movlo r6, r0
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movhs r0, r5
+; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
@@ -3716,12 +3546,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: moveq r5, r0
-; CHECK-NEXT: moveq r4, r0
-; CHECK-NEXT: movne r0, r3
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movge r5, r0
+; CHECK-NEXT: movge r4, r0
+; CHECK-NEXT: movlt r0, r3
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwmi r4, #0
; CHECK-NEXT: movwmi r5, #0
; CHECK-NEXT: bl __fixdfti
@@ -3729,10 +3558,9 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r1, r6
-; CHECK-NEXT: moveq r0, r6
-; CHECK-NEXT: movne r6, r3
+; CHECK-NEXT: movge r1, r6
+; CHECK-NEXT: movge r0, r6
+; CHECK-NEXT: movlt r6, r3
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: movwmi r0, #0
; CHECK-NEXT: movwmi r1, #0
@@ -3752,8 +3580,8 @@ entry:
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
@@ -3762,55 +3590,50 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mvn r9, #0
; CHECK-NEXT: subs r1, r0, r9
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: sbcs r1, r4, r8
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: sbcs r1, r2, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: sbcs r1, r3, #0
-; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: mvnge r4, #-2147483648
; CHECK-NEXT: movwlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: moveq r3, r1
-; CHECK-NEXT: movne r1, r2
-; CHECK-NEXT: moveq r4, r5
-; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r1
+; CHECK-NEXT: movlt r1, r2
; CHECK-NEXT: rsbs r2, r0, #0
; CHECK-NEXT: rscs r2, r4, #-2147483648
; CHECK-NEXT: sbcs r1, r9, r1
; CHECK-NEXT: sbcs r1, r9, r3
; CHECK-NEXT: movwlt r7, #1
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: movne r7, r0
-; CHECK-NEXT: moveq r4, r8
+; CHECK-NEXT: movge r4, #-2147483648
+; CHECK-NEXT: movlt r7, r0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r6, r0, r9
; CHECK-NEXT: vmov.32 d1[0], r7
-; CHECK-NEXT: sbcs r6, r1, r5
+; CHECK-NEXT: sbcs r6, r1, r8
; CHECK-NEXT: sbcs r6, r2, #0
; CHECK-NEXT: sbcs r6, r3, #0
; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvnge r1, #-2147483648
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r3, r6
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: movne r5, r1
-; CHECK-NEXT: moveq r0, r9
-; CHECK-NEXT: rsbs r1, r0, #0
-; CHECK-NEXT: rscs r1, r5, #-2147483648
-; CHECK-NEXT: sbcs r1, r9, r6
-; CHECK-NEXT: sbcs r1, r9, r3
-; CHECK-NEXT: movwlt r10, #1
-; CHECK-NEXT: cmp r10, #0
-; CHECK-NEXT: movne r10, r0
-; CHECK-NEXT: moveq r5, r8
-; CHECK-NEXT: vmov.32 d0[0], r10
+; CHECK-NEXT: mvnge r0, #0
+; CHECK-NEXT: movge r3, r6
+; CHECK-NEXT: movlt r6, r2
+; CHECK-NEXT: rsbs r2, r0, #0
+; CHECK-NEXT: rscs r2, r1, #-2147483648
+; CHECK-NEXT: sbcs r2, r9, r6
+; CHECK-NEXT: sbcs r2, r9, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: movge r1, #-2147483648
+; CHECK-NEXT: movlt r5, r0
+; CHECK-NEXT: vmov.32 d0[0], r5
; CHECK-NEXT: vmov.32 d1[1], r4
-; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vmov.32 d0[1], r1
; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -3836,17 +3659,15 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlo r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: movhs r4, r6
+; CHECK-NEXT: movlo r6, r0
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlo r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: moveq r0, r5
-; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: movhs r0, r5
+; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: vmov.32 d0[1], r5
@@ -3877,10 +3698,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: moveq r5, r0
-; CHECK-NEXT: moveq r4, r0
-; CHECK-NEXT: movne r0, r3
+; CHECK-NEXT: movge r5, r0
+; CHECK-NEXT: movge r4, r0
+; CHECK-NEXT: movlt r0, r3
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwmi r4, #0
; CHECK-NEXT: movwmi r5, #0
@@ -3889,10 +3709,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: moveq r1, r6
-; CHECK-NEXT: moveq r0, r6
-; CHECK-NEXT: movne r6, r3
+; CHECK-NEXT: movge r1, r6
+; CHECK-NEXT: movge r0, r6
+; CHECK-NEXT: movlt r6, r3
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: movwmi r0, #0
; CHECK-NEXT: movwmi r1, #0
@@ -3912,14 +3731,14 @@ entry:
define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
-; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
-; CHECK-NEON-NEXT: mov r8, r0
+; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
@@ -3927,60 +3746,55 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: subs r1, r0, r9
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r4, r6
-; CHECK-NEON-NEXT: vmov s0, r8
+; CHECK-NEON-NEXT: mvn r8, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r4, r8
+; CHECK-NEON-NEXT: vmov s0, r5
; CHECK-NEON-NEXT: sbcs r1, r2, #0
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: sbcs r1, r3, #0
-; CHECK-NEON-NEXT: mov r8, #-2147483648
+; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: mvnge r4, #-2147483648
; CHECK-NEON-NEXT: movwlt r1, #1
-; CHECK-NEON-NEXT: cmp r1, #0
-; CHECK-NEON-NEXT: moveq r3, r1
-; CHECK-NEON-NEXT: movne r1, r2
-; CHECK-NEON-NEXT: moveq r4, r6
-; CHECK-NEON-NEXT: moveq r0, r9
+; CHECK-NEON-NEXT: mvnge r0, #0
+; CHECK-NEON-NEXT: movge r3, r1
+; CHECK-NEON-NEXT: movlt r1, r2
; CHECK-NEON-NEXT: rsbs r2, r0, #0
; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648
; CHECK-NEON-NEXT: sbcs r1, r9, r1
; CHECK-NEON-NEXT: sbcs r1, r9, r3
; CHECK-NEON-NEXT: movwlt r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: movne r5, r0
-; CHECK-NEON-NEXT: moveq r4, r8
+; CHECK-NEON-NEXT: movge r4, #-2147483648
+; CHECK-NEON-NEXT: movlt r5, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r7, r0, r9
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
-; CHECK-NEON-NEXT: sbcs r7, r1, r6
+; CHECK-NEON-NEXT: sbcs r7, r1, r8
; CHECK-NEON-NEXT: sbcs r7, r2, #0
; CHECK-NEON-NEXT: sbcs r7, r3, #0
; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: mvnge r1, #-2147483648
; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r3, r7
-; CHECK-NEON-NEXT: movne r7, r2
-; CHECK-NEON-NEXT: movne r6, r1
-; CHECK-NEON-NEXT: moveq r0, r9
-; CHECK-NEON-NEXT: rsbs r1, r0, #0
-; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648
-; CHECK-NEON-NEXT: sbcs r1, r9, r7
-; CHECK-NEON-NEXT: sbcs r1, r9, r3
-; CHECK-NEON-NEXT: movwlt r10, #1
-; CHECK-NEON-NEXT: cmp r10, #0
-; CHECK-NEON-NEXT: movne r10, r0
-; CHECK-NEON-NEXT: moveq r6, r8
-; CHECK-NEON-NEXT: vmov.32 d0[0], r10
+; CHECK-NEON-NEXT: mvnge r0, #0
+; CHECK-NEON-NEXT: movge r3, r7
+; CHECK-NEON-NEXT: movlt r7, r2
+; CHECK-NEON-NEXT: rsbs r2, r0, #0
+; CHECK-NEON-NEXT: rscs r2, r1, #-2147483648
+; CHECK-NEON-NEXT: sbcs r2, r9, r7
+; CHECK-NEON-NEXT: sbcs r2, r9, r3
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: movge r1, #-2147483648
+; CHECK-NEON-NEXT: movlt r6, r0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r6
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
-; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: vmov.32 d0[1], r1
; CHECK-NEON-NEXT: vpop {d8}
-; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
@@ -3988,54 +3802,49 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-NEXT: mov r4, r1
; CHECK-FP16-NEXT: mvn r9, #0
; CHECK-FP16-NEXT: subs r1, r0, r9
-; CHECK-FP16-NEXT: mvn r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r4, r5
+; CHECK-FP16-NEXT: mvn r8, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r4, r8
; CHECK-FP16-NEXT: vmov s0, r7
; CHECK-FP16-NEXT: sbcs r1, r2, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: sbcs r1, r3, #0
-; CHECK-FP16-NEXT: mov r8, #-2147483648
+; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: mov r1, #0
-; CHECK-FP16-NEXT: mov r10, #0
+; CHECK-FP16-NEXT: mvnge r4, #-2147483648
; CHECK-FP16-NEXT: movwlt r1, #1
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: moveq r3, r1
-; CHECK-FP16-NEXT: movne r1, r2
-; CHECK-FP16-NEXT: moveq r4, r5
-; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: mvnge r0, #0
+; CHECK-FP16-NEXT: movge r3, r1
+; CHECK-FP16-NEXT: movlt r1, r2
; CHECK-FP16-NEXT: rsbs r2, r0, #0
; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648
; CHECK-FP16-NEXT: sbcs r1, r9, r1
; CHECK-FP16-NEXT: sbcs r1, r9, r3
; CHECK-FP16-NEXT: movwlt r7, #1
-; CHECK-FP16-NEXT: cmp r7, #0
-; CHECK-FP16-NEXT: movne r7, r0
-; CHECK-FP16-NEXT: moveq r4, r8
+; CHECK-FP16-NEXT: movge r4, #-2147483648
+; CHECK-FP16-NEXT: movlt r7, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: subs r6, r0, r9
; CHECK-FP16-NEXT: vmov.32 d1[0], r7
-; CHECK-FP16-NEXT: sbcs r6, r1, r5
+; CHECK-FP16-NEXT: sbcs r6, r1, r8
; CHECK-FP16-NEXT: sbcs r6, r2, #0
; CHECK-FP16-NEXT: sbcs r6, r3, #0
; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: mvnge r1, #-2147483648
; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r3, r6
-; CHECK-FP16-NEXT: movne r6, r2
-; CHECK-FP16-NEXT: movne r5, r1
-; CHECK-FP16-NEXT: moveq r0, r9
-; CHECK-FP16-NEXT: rsbs r1, r0, #0
-; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648
-; CHECK-FP16-NEXT: sbcs r1, r9, r6
-; CHECK-FP16-NEXT: sbcs r1, r9, r3
-; CHECK-FP16-NEXT: movwlt r10, #1
-; CHECK-FP16-NEXT: cmp r10, #0
-; CHECK-FP16-NEXT: movne r10, r0
-; CHECK-FP16-NEXT: moveq r5, r8
-; CHECK-FP16-NEXT: vmov.32 d0[0], r10
+; CHECK-FP16-NEXT: mvnge r0, #0
+; CHECK-FP16-NEXT: movge r3, r6
+; CHECK-FP16-NEXT: movlt r6, r2
+; CHECK-FP16-NEXT: rsbs r2, r0, #0
+; CHECK-FP16-NEXT: rscs r2, r1, #-2147483648
+; CHECK-FP16-NEXT: sbcs r2, r9, r6
+; CHECK-FP16-NEXT: sbcs r2, r9, r3
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: movge r1, #-2147483648
+; CHECK-FP16-NEXT: movlt r5, r0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r5
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
-; CHECK-FP16-NEXT: vmov.32 d0[1], r5
-; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT: vmov.32 d0[1], r1
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
@@ -4066,17 +3875,15 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: movwlo r5, #1
-; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: moveq r4, r5
-; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: movhs r4, r5
+; CHECK-NEON-NEXT: movlo r5, r0
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: subs r2, r2, #1
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: movwlo r6, #1
-; CHECK-NEON-NEXT: cmp r6, #0
-; CHECK-NEON-NEXT: moveq r0, r6
-; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: movhs r0, r6
+; CHECK-NEON-NEXT: movlo r6, r1
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
; CHECK-NEON-NEXT: vmov.32 d0[1], r6
@@ -4098,17 +3905,15 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlo r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r4, r6
-; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: movhs r4, r6
+; CHECK-FP16-NEXT: movlo r6, r0
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: subs r2, r2, #1
; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: sbcs r2, r3, #0
; CHECK-FP16-NEXT: movwlo r5, #1
-; CHECK-FP16-NEXT: cmp r5, #0
-; CHECK-FP16-NEXT: moveq r0, r5
-; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: movhs r0, r5
+; CHECK-FP16-NEXT: movlo r5, r1
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
; CHECK-FP16-NEXT: vmov.32 d0[1], r5
@@ -4142,12 +3947,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: movwlt r0, #1
-; CHECK-NEON-NEXT: cmp r0, #0
-; CHECK-NEON-NEXT: moveq r5, r0
-; CHECK-NEON-NEXT: moveq r4, r0
-; CHECK-NEON-NEXT: movne r0, r3
-; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: movge r5, r0
+; CHECK-NEON-NEXT: movge r4, r0
+; CHECK-NEON-NEXT: movlt r0, r3
+; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: movwmi r4, #0
; CHECK-NEON-NEXT: movwmi r5, #0
; CHECK-NEON-NEXT: bl __fixsfti
@@ -4155,10 +3959,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: movwlt r7, #1
-; CHECK-NEON-NEXT: cmp r7, #0
-; CHECK-NEON-NEXT: moveq r1, r7
-; CHECK-NEON-NEXT: moveq r0, r7
-; CHECK-NEON-NEXT: movne r7, r3
+; CHECK-NEON-NEXT: movge r1, r7
+; CHECK-NEON-NEXT: movge r0, r7
+; CHECK-NEON-NEXT: movlt r7, r3
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: movwmi r0, #0
; CHECK-NEON-NEXT: movwmi r1, #0
@@ -4183,12 +3986,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: mov r4, r1
; CHECK-FP16-NEXT: movwlt r0, #1
-; CHECK-FP16-NEXT: cmp r0, #0
-; CHECK-FP16-NEXT: moveq r5, r0
-; CHECK-FP16-NEXT: moveq r4, r0
-; CHECK-FP16-NEXT: movne r0, r3
-; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: movge r5, r0
+; CHECK-FP16-NEXT: movge r4, r0
+; CHECK-FP16-NEXT: movlt r0, r3
+; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: movwmi r4, #0
; CHECK-FP16-NEXT: movwmi r5, #0
; CHECK-FP16-NEXT: bl __fixhfti
@@ -4196,10 +3998,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-NEXT: vmov.32 d1[0], r5
; CHECK-FP16-NEXT: sbcs r2, r3, #0
; CHECK-FP16-NEXT: movwlt r6, #1
-; CHECK-FP16-NEXT: cmp r6, #0
-; CHECK-FP16-NEXT: moveq r1, r6
-; CHECK-FP16-NEXT: moveq r0, r6
-; CHECK-FP16-NEXT: movne r6, r3
+; CHECK-FP16-NEXT: movge r1, r6
+; CHECK-FP16-NEXT: movge r0, r6
+; CHECK-FP16-NEXT: movlt r6, r3
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: movwmi r0, #0
; CHECK-FP16-NEXT: movwmi r1, #0
diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll
index ffc72b242f829..d8c038d081fd5 100644
--- a/llvm/test/CodeGen/ARM/neon_vabd.ll
+++ b/llvm/test/CodeGen/ARM/neon_vabd.ll
@@ -145,24 +145,23 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov r0, r1, d1
-; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvn r6, #0
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: vmov r12, lr, d0
; CHECK-NEXT: vmov r4, r5, d2
; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: sbcs r1, r3, r1
; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwlt r1, #1
+; CHECK-NEXT: movlt r1, r6
+; CHECK-NEXT: subs r2, r4, r12
+; CHECK-NEXT: sbcs r2, r5, lr
+; CHECK-NEXT: vdup.32 d19, r1
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r1, r4, r12
-; CHECK-NEXT: sbcs r1, r5, lr
-; CHECK-NEXT: vdup.32 d19, r0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: vdup.32 d18, r6
+; CHECK-NEXT: movlt r0, r6
+; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: veor q8, q8, q9
; CHECK-NEXT: vsub.i64 q0, q9, q8
; CHECK-NEXT: pop {r4, r5, r6, pc}
@@ -476,24 +475,23 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov r0, r1, d1
-; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvn r6, #0
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: vmov r12, lr, d0
; CHECK-NEXT: vmov r4, r5, d2
; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: sbcs r1, r3, r1
; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwlt r1, #1
+; CHECK-NEXT: movlt r1, r6
+; CHECK-NEXT: subs r2, r4, r12
+; CHECK-NEXT: sbcs r2, r5, lr
+; CHECK-NEXT: vdup.32 d19, r1
; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r1, r4, r12
-; CHECK-NEXT: sbcs r1, r5, lr
-; CHECK-NEXT: vdup.32 d19, r0
-; CHECK-NEXT: movwlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: vdup.32 d18, r6
+; CHECK-NEXT: movlt r0, r6
+; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: veor q8, q8, q9
; CHECK-NEXT: vsub.i64 q0, q9, q8
; CHECK-NEXT: pop {r4, r5, r6, pc}
diff --git a/llvm/test/CodeGen/ARM/smml.ll b/llvm/test/CodeGen/ARM/smml.ll
index a09ec504d8b78..f1f25adcdd726 100644
--- a/llvm/test/CodeGen/ARM/smml.ll
+++ b/llvm/test/CodeGen/ARM/smml.ll
@@ -165,10 +165,10 @@ define void @test_used_flags(i32 %in1, i32 %in2) {
; CHECK-V6-NEXT: .save {r11, lr}
; CHECK-V6-NEXT: push {r11, lr}
; CHECK-V6-NEXT: smull r1, r2, r0, r1
-; CHECK-V6-NEXT: mov r0, #56
+; CHECK-V6-NEXT: mov r0, #42
; CHECK-V6-NEXT: subs r1, r1, #1
; CHECK-V6-NEXT: sbcs r1, r2, #0
-; CHECK-V6-NEXT: movlt r0, #42
+; CHECK-V6-NEXT: movge r0, #56
; CHECK-V6-NEXT: bl opaque
; CHECK-V6-NEXT: pop {r11, pc}
;
@@ -177,10 +177,10 @@ define void @test_used_flags(i32 %in1, i32 %in2) {
; CHECK-V7-NEXT: .save {r11, lr}
; CHECK-V7-NEXT: push {r11, lr}
; CHECK-V7-NEXT: smull r1, r2, r0, r1
-; CHECK-V7-NEXT: mov r0, #56
+; CHECK-V7-NEXT: mov r0, #42
; CHECK-V7-NEXT: subs r1, r1, #1
; CHECK-V7-NEXT: sbcs r1, r2, #0
-; CHECK-V7-NEXT: movwlt r0, #42
+; CHECK-V7-NEXT: movwge r0, #56
; CHECK-V7-NEXT: bl opaque
; CHECK-V7-NEXT: pop {r11, pc}
;
@@ -247,11 +247,11 @@ define void @test_used_flags(i32 %in1, i32 %in2) {
; CHECK-THUMBV7-NEXT: .save {r7, lr}
; CHECK-THUMBV7-NEXT: push {r7, lr}
; CHECK-THUMBV7-NEXT: smull r1, r2, r0, r1
-; CHECK-THUMBV7-NEXT: movs r0, #56
+; CHECK-THUMBV7-NEXT: movs r0, #42
; CHECK-THUMBV7-NEXT: subs r1, #1
; CHECK-THUMBV7-NEXT: sbcs r1, r2, #0
-; CHECK-THUMBV7-NEXT: it lt
-; CHECK-THUMBV7-NEXT: movlt r0, #42
+; CHECK-THUMBV7-NEXT: it ge
+; CHECK-THUMBV7-NEXT: movge r0, #56
; CHECK-THUMBV7-NEXT: bl opaque
; CHECK-THUMBV7-NEXT: pop {r7, pc}
;
@@ -260,11 +260,11 @@ define void @test_used_flags(i32 %in1, i32 %in2) {
; CHECK-THUMBV7M-NEXT: .save {r7, lr}
; CHECK-THUMBV7M-NEXT: push {r7, lr}
; CHECK-THUMBV7M-NEXT: smull r1, r2, r0, r1
-; CHECK-THUMBV7M-NEXT: movs r0, #56
+; CHECK-THUMBV7M-NEXT: movs r0, #42
; CHECK-THUMBV7M-NEXT: subs r1, #1
; CHECK-THUMBV7M-NEXT: sbcs r1, r2, #0
-; CHECK-THUMBV7M-NEXT: it lt
-; CHECK-THUMBV7M-NEXT: movlt r0, #42
+; CHECK-THUMBV7M-NEXT: it ge
+; CHECK-THUMBV7M-NEXT: movge r0, #56
; CHECK-THUMBV7M-NEXT: bl opaque
; CHECK-THUMBV7M-NEXT: pop {r7, pc}
;
@@ -273,11 +273,11 @@ define void @test_used_flags(i32 %in1, i32 %in2) {
; CHECK-THUMBV7EM-NEXT: .save {r7, lr}
; CHECK-THUMBV7EM-NEXT: push {r7, lr}
; CHECK-THUMBV7EM-NEXT: smull r1, r2, r0, r1
-; CHECK-THUMBV7EM-NEXT: movs r0, #56
+; CHECK-THUMBV7EM-NEXT: movs r0, #42
; CHECK-THUMBV7EM-NEXT: subs r1, #1
; CHECK-THUMBV7EM-NEXT: sbcs r1, r2, #0
-; CHECK-THUMBV7EM-NEXT: it lt
-; CHECK-THUMBV7EM-NEXT: movlt r0, #42
+; CHECK-THUMBV7EM-NEXT: it ge
+; CHECK-THUMBV7EM-NEXT: movge r0, #56
; CHECK-THUMBV7EM-NEXT: bl opaque
; CHECK-THUMBV7EM-NEXT: pop {r7, pc}
%in1.64 = sext i32 %in1 to i64
@@ -294,3 +294,5 @@ false:
call void @opaque(i32 56)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-CALLSITE: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/vector-trunc.ll b/llvm/test/CodeGen/ARM/vector-trunc.ll
index 9acf463c2be93..17bc5059d736d 100644
--- a/llvm/test/CodeGen/ARM/vector-trunc.ll
+++ b/llvm/test/CodeGen/ARM/vector-trunc.ll
@@ -10,8 +10,7 @@ define i32 @test(i64 %arg1) {
; LE-NEXT: sbcs r0, r1, #0
; LE-NEXT: vldr s0, .LCPI0_0
; LE-NEXT: movwhs r2, #1
-; LE-NEXT: cmp r2, #0
-; LE-NEXT: mvnne r2, #0
+; LE-NEXT: mvnhs r2, #0
; LE-NEXT: vmov s1, r2
; LE-NEXT: vmovn.i32 d16, q0
; LE-NEXT: vmovn.i16 d16, q8
@@ -30,8 +29,7 @@ define i32 @test(i64 %arg1) {
; BE-NEXT: sbcs r0, r0, #0
; BE-NEXT: vldr s0, .LCPI0_0
; BE-NEXT: movwhs r2, #1
-; BE-NEXT: cmp r2, #0
-; BE-NEXT: mvnne r2, #0
+; BE-NEXT: mvnhs r2, #0
; BE-NEXT: vmov s1, r2
; BE-NEXT: vmovn.i32 d16, q0
; BE-NEXT: vmovn.i16 d16, q8
diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index bd5e3061f0d18..ec2ce65097430 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -111,53 +111,50 @@ define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
; CHECK-LABEL: func_blend18:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
+; CHECK-NEXT: mvn r12, #0
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
-; CHECK-NEXT: vmov r4, r6, d16
; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
+; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
-; CHECK-NEXT: vmov lr, r12, d18
+; CHECK-NEXT: vmov r2, lr, d18
+; CHECK-NEXT: vmov r0, r4, d20
+; CHECK-NEXT: subs r0, r0, r2
+; CHECK-NEXT: sbcs r2, r4, lr
+; CHECK-NEXT: vmov r0, r6, d16
+; CHECK-NEXT: vmov r4, r5, d22
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r2, #1
+; CHECK-NEXT: movlt r2, r12
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r6
+; CHECK-NEXT: vmov r6, lr, d17
+; CHECK-NEXT: vmov r4, r5, d23
; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: vmov r2, r1, d20
-; CHECK-NEXT: subs r2, r2, lr
-; CHECK-NEXT: vmov r2, r5, d22
-; CHECK-NEXT: sbcs r1, r1, r12
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: subs r2, r2, r4
-; CHECK-NEXT: sbcs r6, r5, r6
-; CHECK-NEXT: vmov r2, r12, d17
-; CHECK-NEXT: vmov r5, r4, d23
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: movlt r0, r12
+; CHECK-NEXT: subs r4, r4, r6
+; CHECK-NEXT: sbcs r6, r5, lr
+; CHECK-NEXT: vmov r5, lr, d19
+; CHECK-NEXT: vmov r4, r7, d21
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: mvnne r6, #0
-; CHECK-NEXT: subs r2, r5, r2
-; CHECK-NEXT: sbcs r2, r4, r12
-; CHECK-NEXT: vmov lr, r12, d19
-; CHECK-NEXT: vmov r4, r5, d21
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d25, r2
-; CHECK-NEXT: vdup.32 d24, r6
+; CHECK-NEXT: movlt r6, r12
+; CHECK-NEXT: vdup.32 d25, r6
+; CHECK-NEXT: vdup.32 d24, r0
; CHECK-NEXT: vbit q8, q11, q12
-; CHECK-NEXT: subs r4, r4, lr
-; CHECK-NEXT: sbcs r5, r5, r12
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: vdup.32 d27, r0
-; CHECK-NEXT: vdup.32 d26, r1
+; CHECK-NEXT: subs r4, r4, r5
+; CHECK-NEXT: sbcs r7, r7, lr
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: movlt r1, r12
+; CHECK-NEXT: vdup.32 d27, r1
+; CHECK-NEXT: vdup.32 d26, r2
; CHECK-NEXT: vbit q9, q10, q13
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]!
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
-; CHECK-NEXT: pop {r4, r5, r6, lr}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: mov pc, lr
; COST: func_blend18
; COST: cost of 0 {{.*}} icmp
@@ -180,8 +177,9 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: vld1.64 {d28, d29}, [r1:128]!
; CHECK-NEXT: mov lr, #0
; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]!
-; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]!
; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
+; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]!
+; CHECK-NEXT: vmov r4, r5, d25
; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]!
; CHECK-NEXT: vld1.64 {d26, d27}, [r0:128]!
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]
@@ -189,78 +187,70 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
; CHECK-NEXT: vmov r0, r12, d16
; CHECK-NEXT: vmov r1, r2, d18
; CHECK-NEXT: subs r0, r1, r0
-; CHECK-NEXT: vmov r1, r4, d25
+; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: sbcs r0, r2, r12
-; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: mvn r12, #0
; CHECK-NEXT: vmov r2, r0, d21
-; CHECK-NEXT: movlt r12, #1
-; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: mvnne r12, #0
-; CHECK-NEXT: subs r1, r1, r2
-; CHECK-NEXT: sbcs r0, r4, r0
-; CHECK-NEXT: vmov r2, r4, d24
+; CHECK-NEXT: movlt lr, #1
+; CHECK-NEXT: movlt lr, r12
+; CHECK-NEXT: subs r2, r4, r2
+; CHECK-NEXT: sbcs r0, r5, r0
+; CHECK-NEXT: vmov r4, r5, d24
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlt r0, r12
; CHECK-NEXT: vdup.32 d1, r0
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs r0, r4, r1
-; CHECK-NEXT: vmov r2, r4, d26
+; CHECK-NEXT: vmov r0, r2, d20
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r4, r5, d31
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlt r0, r12
; CHECK-NEXT: vdup.32 d0, r0
-; CHECK-NEXT: vmov r0, r1, d22
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: sbcs r0, r4, r1
-; CHECK-NEXT: vmov r4, r5, d31
-; CHECK-NEXT: vmov r0, r1, d29
-; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vmov r0, r2, d29
; CHECK-NEXT: subs r0, r4, r0
-; CHECK-NEXT: sbcs r0, r5, r1
+; CHECK-NEXT: sbcs r0, r5, r2
; CHECK-NEXT: vmov r4, r5, d30
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlt r0, r12
; CHECK-NEXT: vdup.32 d3, r0
-; CHECK-NEXT: vmov r0, r1, d28
+; CHECK-NEXT: vmov r0, r2, d28
; CHECK-NEXT: subs r0, r4, r0
-; CHECK-NEXT: sbcs r0, r5, r1
+; CHECK-NEXT: sbcs r0, r5, r2
; CHECK-NEXT: vmov r4, r5, d27
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlt r0, r12
; CHECK-NEXT: vdup.32 d2, r0
-; CHECK-NEXT: vmov r0, r1, d23
+; CHECK-NEXT: vmov r0, r2, d23
; CHECK-NEXT: vbit q14, q15, q1
; CHECK-NEXT: vbit q10, q12, q0
; CHECK-NEXT: subs r0, r4, r0
-; CHECK-NEXT: sbcs r0, r5, r1
-; CHECK-NEXT: vmov r1, r4, d17
-; CHECK-NEXT: vmov r5, r6, d19
+; CHECK-NEXT: sbcs r0, r5, r2
+; CHECK-NEXT: vmov r2, r4, d22
+; CHECK-NEXT: vmov r5, r6, d26
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: movlt r0, r12
; CHECK-NEXT: vdup.32 d31, r0
-; CHECK-NEXT: vdup.32 d30, r2
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: sbcs r2, r6, r4
+; CHECK-NEXT: vmov r5, r6, d19
+; CHECK-NEXT: vmov r2, r4, d17
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: movlt r0, r12
+; CHECK-NEXT: vdup.32 d30, r0
; CHECK-NEXT: vbit q11, q13, q15
; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]!
-; CHECK-NEXT: subs r1, r5, r1
-; CHECK-NEXT: sbcs r1, r6, r4
-; CHECK-NEXT: movlt lr, #1
-; CHECK-NEXT: cmp lr, #0
-; CHECK-NEXT: mvnne lr, #0
-; CHECK-NEXT: vdup.32 d3, lr
-; CHECK-NEXT: vdup.32 d2, r12
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: sbcs r2, r6, r4
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: movlt r1, r12
+; CHECK-NEXT: vdup.32 d3, r1
+; CHECK-NEXT: vdup.32 d2, lr
; CHECK-NEXT: vbit q8, q9, q1
; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]!
; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]!
@@ -283,194 +273,180 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea
define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
; CHECK-LABEL: func_blend20:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add r8, r1, #64
-; CHECK-NEXT: add lr, r0, #64
-; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
+; CHECK-NEXT: add r9, r0, #64
+; CHECK-NEXT: vld1.64 {d18, d19}, [r9:128]!
+; CHECK-NEXT: mvn lr, #0
; CHECK-NEXT: mov r12, #0
-; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
-; CHECK-NEXT: vmov r4, r5, d17
-; CHECK-NEXT: vmov r6, r7, d25
-; CHECK-NEXT: vld1.64 {d18, d19}, [lr:128]!
-; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]!
-; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]!
-; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]!
-; CHECK-NEXT: subs r4, r6, r4
-; CHECK-NEXT: sbcs r4, r7, r5
+; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]!
+; CHECK-NEXT: vmov r2, r7, d19
+; CHECK-NEXT: vmov r5, r6, d17
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]!
+; CHECK-NEXT: vld1.64 {d24, d25}, [r1:128]!
+; CHECK-NEXT: vld1.64 {d30, d31}, [r1:128]!
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128]!
+; CHECK-NEXT: vld1.64 {d22, d23}, [r9:128]!
+; CHECK-NEXT: subs r2, r2, r5
+; CHECK-NEXT: sbcs r2, r7, r6
; CHECK-NEXT: vmov r5, r6, d16
-; CHECK-NEXT: vmov r7, r2, d24
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: movlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vdup.32 d27, r4
+; CHECK-NEXT: vmov r7, r4, d18
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r2, #1
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d29, r2
; CHECK-NEXT: subs r5, r7, r5
-; CHECK-NEXT: sbcs r2, r2, r6
+; CHECK-NEXT: sbcs r4, r4, r6
; CHECK-NEXT: vmov r5, r6, d1
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movlt r4, #1
+; CHECK-NEXT: movlt r4, lr
+; CHECK-NEXT: vdup.32 d28, r4
+; CHECK-NEXT: vmov r2, r4, d31
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: sbcs r2, r6, r4
+; CHECK-NEXT: vmov r5, r6, d0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r2, #1
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d3, r2
+; CHECK-NEXT: vmov r2, r4, d30
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: sbcs r2, r6, r4
+; CHECK-NEXT: vmov r5, r6, d21
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r2, #1
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d2, r2
+; CHECK-NEXT: vmov r2, r4, d25
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: sbcs r2, r6, r4
+; CHECK-NEXT: vmov r5, r6, d20
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movlt r2, #1
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d27, r2
+; CHECK-NEXT: vmov r2, r4, d24
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: sbcs r2, r6, r4
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: movlt r2, lr
; CHECK-NEXT: vdup.32 d26, r2
-; CHECK-NEXT: vmov r2, r4, d23
-; CHECK-NEXT: vbit q8, q12, q13
-; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
-; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]!
-; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]!
+; CHECK-NEXT: vbif q10, q12, q13
+; CHECK-NEXT: vld1.64 {d26, d27}, [r8:128]!
+; CHECK-NEXT: vld1.64 {d24, d25}, [r8:128]!
+; CHECK-NEXT: vbit q8, q9, q14
+; CHECK-NEXT: vorr q9, q1, q1
+; CHECK-NEXT: vmov r2, r4, d25
+; CHECK-NEXT: vbsl q9, q0, q15
+; CHECK-NEXT: vld1.64 {d28, d29}, [r9:128]!
+; CHECK-NEXT: vmov r5, r6, d29
+; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128]!
+; CHECK-NEXT: vld1.64 {d10, d11}, [r1:128]!
+; CHECK-NEXT: vld1.64 {d2, d3}, [r1:128]
+; CHECK-NEXT: vld1.64 {d4, d5}, [r0:128]
+; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128]
; CHECK-NEXT: subs r2, r5, r2
; CHECK-NEXT: sbcs r2, r6, r4
-; CHECK-NEXT: vmov r4, r5, d22
-; CHECK-NEXT: vmov r6, r7, d0
+; CHECK-NEXT: vmov r5, r6, d28
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d3, r2
-; CHECK-NEXT: subs r4, r6, r4
-; CHECK-NEXT: sbcs r4, r7, r5
-; CHECK-NEXT: vmov r2, r5, d27
-; CHECK-NEXT: vmov r6, r7, d25
-; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: movlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: vdup.32 d2, r4
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d31, r2
+; CHECK-NEXT: vmov r2, r4, d24
+; CHECK-NEXT: subs r2, r5, r2
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: sbcs r2, r6, r4
+; CHECK-NEXT: vmov r6, r7, d23
+; CHECK-NEXT: vmov r2, r4, d27
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: movlt r5, lr
+; CHECK-NEXT: vdup.32 d30, r5
; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
-; CHECK-NEXT: vmov r6, r7, d24
+; CHECK-NEXT: sbcs r2, r7, r4
+; CHECK-NEXT: vmov r6, r7, d22
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d5, r2
-; CHECK-NEXT: vmov r2, r5, d26
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d7, r2
+; CHECK-NEXT: vmov r2, r4, d26
; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
-; CHECK-NEXT: vmov r6, r7, d19
+; CHECK-NEXT: sbcs r2, r7, r4
+; CHECK-NEXT: vmov r6, r7, d9
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d4, r2
-; CHECK-NEXT: vmov r2, r5, d21
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d6, r2
+; CHECK-NEXT: vmov r2, r4, d11
+; CHECK-NEXT: vbif q11, q13, q3
; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
-; CHECK-NEXT: vmov r6, r7, d18
+; CHECK-NEXT: sbcs r2, r7, r4
+; CHECK-NEXT: vmov r6, r7, d8
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d31, r2
-; CHECK-NEXT: vmov r2, r5, d20
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d13, r2
+; CHECK-NEXT: vmov r2, r4, d10
; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r7, r5
+; CHECK-NEXT: vmov r5, r6, d2
+; CHECK-NEXT: sbcs r2, r7, r4
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vdup.32 d30, r2
-; CHECK-NEXT: vbif q9, q10, q15
-; CHECK-NEXT: vld1.64 {d30, d31}, [r8:128]!
-; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]
-; CHECK-NEXT: vbit q13, q12, q2
-; CHECK-NEXT: vld1.64 {d24, d25}, [lr:128]
-; CHECK-NEXT: vmov r2, r7, d21
-; CHECK-NEXT: vbit q11, q0, q1
-; CHECK-NEXT: mov lr, #0
-; CHECK-NEXT: vmov r6, r5, d25
-; CHECK-NEXT: vld1.64 {d4, d5}, [r1:128]!
-; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]!
-; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128]
-; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128]
-; CHECK-NEXT: subs r1, r6, r2
-; CHECK-NEXT: vmov r0, r6, d2
-; CHECK-NEXT: sbcs r1, r5, r7
-; CHECK-NEXT: vmov r2, r7, d0
-; CHECK-NEXT: movlt lr, #1
-; CHECK-NEXT: cmp lr, #0
-; CHECK-NEXT: mvnne lr, #0
-; CHECK-NEXT: subs r0, r0, r2
-; CHECK-NEXT: sbcs r0, r6, r7
-; CHECK-NEXT: vmov r2, r7, d30
-; CHECK-NEXT: vmov r6, r5, d28
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: vdup.32 d12, r2
+; CHECK-NEXT: vmov r2, r4, d1
+; CHECK-NEXT: vorr q13, q6, q6
+; CHECK-NEXT: vbsl q13, q4, q5
+; CHECK-NEXT: vbit q12, q14, q15
+; CHECK-NEXT: vld1.64 {d28, d29}, [r8:128]
+; CHECK-NEXT: vmov r0, r1, d29
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: vmov r2, r7, d4
+; CHECK-NEXT: sbcs r0, r4, r1
+; CHECK-NEXT: vmov r1, r4, d3
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mvnne r0, #0
-; CHECK-NEXT: subs r2, r6, r2
-; CHECK-NEXT: sbcs r2, r5, r7
-; CHECK-NEXT: vmov r7, r6, d31
-; CHECK-NEXT: vmov r5, r4, d29
+; CHECK-NEXT: movlt r0, lr
+; CHECK-NEXT: subs r2, r2, r5
+; CHECK-NEXT: sbcs r2, r7, r6
+; CHECK-NEXT: vmov r5, r6, d5
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movlt r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: subs r7, r5, r7
-; CHECK-NEXT: vmov r5, r1, d7
-; CHECK-NEXT: sbcs r7, r4, r6
+; CHECK-NEXT: movlt r2, lr
+; CHECK-NEXT: subs r1, r5, r1
+; CHECK-NEXT: sbcs r4, r6, r4
+; CHECK-NEXT: vmov r1, r5, d28
; CHECK-NEXT: mov r4, #0
-; CHECK-NEXT: vmov r7, r6, d5
; CHECK-NEXT: movlt r4, #1
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: mvnne r4, #0
-; CHECK-NEXT: subs r5, r5, r7
-; CHECK-NEXT: sbcs r1, r1, r6
-; CHECK-NEXT: vmov r6, r7, d6
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: vdup.32 d9, r1
-; CHECK-NEXT: vmov r1, r5, d4
-; CHECK-NEXT: subs r1, r6, r1
-; CHECK-NEXT: sbcs r1, r7, r5
-; CHECK-NEXT: vmov r6, r7, d3
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: vdup.32 d8, r1
-; CHECK-NEXT: vmov r1, r5, d1
-; CHECK-NEXT: vbit q2, q3, q4
-; CHECK-NEXT: vdup.32 d9, r4
-; CHECK-NEXT: vdup.32 d8, r2
-; CHECK-NEXT: subs r1, r6, r1
-; CHECK-NEXT: sbcs r1, r7, r5
-; CHECK-NEXT: vmov r5, r6, d24
-; CHECK-NEXT: mov r1, #0
-; CHECK-NEXT: movlt r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: mvnne r1, #0
-; CHECK-NEXT: vdup.32 d7, r1
-; CHECK-NEXT: vmov r1, r4, d20
-; CHECK-NEXT: vdup.32 d6, r0
-; CHECK-NEXT: subs r1, r5, r1
-; CHECK-NEXT: mov r1, r3
-; CHECK-NEXT: sbcs r0, r6, r4
-; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]!
-; CHECK-NEXT: vorr q8, q4, q4
-; CHECK-NEXT: movlt r12, #1
-; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: vbsl q8, q14, q15
-; CHECK-NEXT: vdup.32 d29, lr
-; CHECK-NEXT: vorr q15, q3, q3
-; CHECK-NEXT: mvnne r12, #0
-; CHECK-NEXT: vdup.32 d28, r12
+; CHECK-NEXT: movlt r4, lr
+; CHECK-NEXT: vdup.32 d31, r4
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: vdup.32 d30, r2
+; CHECK-NEXT: vmov r2, r6, d0
+; CHECK-NEXT: vst1.64 {d20, d21}, [r4:128]!
+; CHECK-NEXT: vbsl q15, q2, q1
+; CHECK-NEXT: vdup.32 d21, r0
; CHECK-NEXT: add r0, r3, #64
-; CHECK-NEXT: vbsl q15, q1, q0
-; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]!
-; CHECK-NEXT: vbit q10, q12, q14
-; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]!
-; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]!
-; CHECK-NEXT: vst1.64 {d4, d5}, [r1:128]!
+; CHECK-NEXT: vst1.64 {d18, d19}, [r4:128]!
+; CHECK-NEXT: subs r1, r2, r1
+; CHECK-NEXT: sbcs r1, r6, r5
+; CHECK-NEXT: movlt r12, #1
+; CHECK-NEXT: movlt r12, lr
+; CHECK-NEXT: vdup.32 d20, r12
+; CHECK-NEXT: vorr q9, q10, q10
+; CHECK-NEXT: vbsl q9, q0, q14
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]!
-; CHECK-NEXT: vst1.64 {d30, d31}, [r1:128]
-; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]!
+; CHECK-NEXT: vst1.64 {d26, d27}, [r4:128]!
+; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128]!
+; CHECK-NEXT: vst1.64 {d30, d31}, [r4:128]
+; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: mov pc, lr
; COST: func_blend20
; COST: cost of 0 {{.*}} icmp
diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll
index 9acf8d249ddf1..38f31682cc2a7 100644
--- a/llvm/test/CodeGen/ARM/wide-compares.ll
+++ b/llvm/test/CodeGen/ARM/wide-compares.ll
@@ -8,9 +8,9 @@ define i32 @test_slt1(i64 %a, i64 %b) {
; CHECK-ARM-LABEL: test_slt1:
; CHECK-ARM: @ %bb.0: @ %entry
; CHECK-ARM-NEXT: subs r0, r0, r2
-; CHECK-ARM-NEXT: mov r12, #2
+; CHECK-ARM-NEXT: mov r12, #1
; CHECK-ARM-NEXT: sbcs r0, r1, r3
-; CHECK-ARM-NEXT: movwlt r12, #1
+; CHECK-ARM-NEXT: movwge r12, #2
; CHECK-ARM-NEXT: mov r0, r12
; CHECK-ARM-NEXT: bx lr
;
@@ -41,10 +41,10 @@ define i32 @test_slt1(i64 %a, i64 %b) {
; CHECK-THUMB2-LABEL: test_slt1:
; CHECK-THUMB2: @ %bb.0: @ %entry
; CHECK-THUMB2-NEXT: subs r0, r0, r2
-; CHECK-THUMB2-NEXT: mov.w r12, #2
+; CHECK-THUMB2-NEXT: mov.w r12, #1
; CHECK-THUMB2-NEXT: sbcs.w r0, r1, r3
-; CHECK-THUMB2-NEXT: it lt
-; CHECK-THUMB2-NEXT: movlt.w r12, #1
+; CHECK-THUMB2-NEXT: it ge
+; CHECK-THUMB2-NEXT: movge.w r12, #2
; CHECK-THUMB2-NEXT: mov r0, r12
; CHECK-THUMB2-NEXT: bx lr
entry:
More information about the llvm-commits
mailing list