[llvm] 91f8289 - Revert "[DAGCombine] Fold redundant select"
Samuel Parker via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 7 02:37:32 PST 2023
Author: Samuel Parker
Date: 2023-02-07T10:37:20Z
New Revision: 91f8289ff0408e071e2ea32cf6b270778b1acdce
URL: https://github.com/llvm/llvm-project/commit/91f8289ff0408e071e2ea32cf6b270778b1acdce
DIFF: https://github.com/llvm/llvm-project/commit/91f8289ff0408e071e2ea32cf6b270778b1acdce.diff
LOG: Revert "[DAGCombine] Fold redundant select"
This reverts commit bbdf24357932b064f2aa18ea1356b474e0220dde.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/ARM/fpclamptosat.ll
llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
llvm/test/CodeGen/RISCV/fpclamptosat.ll
llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
llvm/test/CodeGen/X86/fpclamptosat.ll
llvm/test/CodeGen/X86/fpclamptosat_vec.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dbbbcd1da9a4e..d7040bf8c2975 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10956,73 +10956,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
- // If we have a chain of two selects, which share a true/false value and
- // both are controlled from the two setcc nodes which cannot produce the
- // same value, we can fold away N.
- // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y
- auto IsSelect = [](SDValue Op) {
- return Op->getOpcode() == ISD::SELECT;
- };
- if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) {
- auto AreSame = [](SDValue Op0, SDValue Op1) {
- if (Op0 == Op1)
- return true;
- auto *C0 = dyn_cast<ConstantSDNode>(Op0);
- auto *C1 = dyn_cast<ConstantSDNode>(Op1);
- return C0 && C1 &&
- APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
- };
-
- SDValue OtherSelect;
- bool SelectsShareOp = false;
- if (IsSelect(N1)) {
- OtherSelect = N1;
- SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2);
- } else {
- OtherSelect = N2;
- SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1);
- }
-
- auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) {
- if (SetCC0->getOpcode() != ISD::SETCC ||
- SetCC1->getOpcode() != ISD::SETCC ||
- SetCC0->getOperand(0) != SetCC1->getOperand(0))
- return false;
-
- ISD::CondCode CC0 = cast<CondCodeSDNode>(SetCC0.getOperand(2))->get();
- ISD::CondCode CC1 = cast<CondCodeSDNode>(SetCC1.getOperand(2))->get();
- auto *C0 = dyn_cast<ConstantSDNode>(SetCC0.getOperand(1));
- auto *C1 = dyn_cast<ConstantSDNode>(SetCC1.getOperand(1));
- if (!C0 || !C1)
- return false;
-
- bool ConstantsAreSame =
- APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
- auto IsEqual = [](ISD::CondCode CC) {
- return CC == ISD::SETEQ;
- };
- auto IsNotEqual = [](ISD::CondCode CC) {
- return CC == ISD::SETLT || CC == ISD::SETULT ||
- CC == ISD::SETGT || CC == ISD::SETUGT ||
- CC == ISD::SETNE;
- };
-
- if (ConstantsAreSame && IsNotEqual(CC0) && IsEqual(CC1))
- return true;
- if (ConstantsAreSame && IsNotEqual(CC1) && IsEqual(CC0))
- return true;
- if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1))
- return true;
-
- return false;
- };
-
- SDValue SetCC0 = N0;
- SDValue SetCC1 = OtherSelect.getOperand(0);
- if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1))
- return OtherSelect;
- }
-
if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
(!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 1f5e3051e92c1..91c1a21964309 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -3099,117 +3099,130 @@ define i64 @stest_f64i64_mm(double %x) {
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r4, r5, r6, r7, lr}
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
-; SOFT-NEXT: .pad #20
-; SOFT-NEXT: sub sp, #20
+; SOFT-NEXT: .pad #12
+; SOFT-NEXT: sub sp, #12
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: mov r6, r0
; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: ldr r0, .LCPI45_0
-; SOFT-NEXT: cmp r1, r0
-; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: ldr r5, .LCPI45_0
+; SOFT-NEXT: cmp r1, r5
; SOFT-NEXT: blo .LBB45_2
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: ldr r5, .LCPI45_0
+; SOFT-NEXT: mov r1, r5
; SOFT-NEXT: .LBB45_2: @ %entry
; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r1, r4
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: bmi .LBB45_4
; SOFT-NEXT: @ %bb.3: @ %entry
-; SOFT-NEXT: ldr r1, .LCPI45_0
+; SOFT-NEXT: mov r0, r5
; SOFT-NEXT: .LBB45_4: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: orrs r0, r3
-; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: mov r7, r2
+; SOFT-NEXT: orrs r7, r3
; SOFT-NEXT: beq .LBB45_6
; SOFT-NEXT: @ %bb.5: @ %entry
-; SOFT-NEXT: mov r5, r1
+; SOFT-NEXT: mov r1, r0
; SOFT-NEXT: .LBB45_6: @ %entry
; SOFT-NEXT: movs r0, #0
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r7, r3
-; SOFT-NEXT: bmi .LBB45_8
+; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mvns r2, r0
+; SOFT-NEXT: cmp r4, r5
+; SOFT-NEXT: mov r0, r6
+; SOFT-NEXT: blo .LBB45_8
; SOFT-NEXT: @ %bb.7: @ %entry
-; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: .LBB45_8: @ %entry
-; SOFT-NEXT: movs r1, #1
-; SOFT-NEXT: lsls r1, r1, #31
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: bge .LBB45_10
+; SOFT-NEXT: cmp r4, r5
+; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: bne .LBB45_26
; SOFT-NEXT: @ %bb.9: @ %entry
-; SOFT-NEXT: mov r6, r1
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bpl .LBB45_27
; SOFT-NEXT: .LBB45_10: @ %entry
-; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r0, r5
-; SOFT-NEXT: bhi .LBB45_12
-; SOFT-NEXT: @ %bb.11: @ %entry
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: beq .LBB45_12
+; SOFT-NEXT: .LBB45_11: @ %entry
+; SOFT-NEXT: mov r4, r6
; SOFT-NEXT: .LBB45_12: @ %entry
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: asrs r0, r3, #31
-; SOFT-NEXT: ands r0, r2
-; SOFT-NEXT: ands r0, r7
-; SOFT-NEXT: adds r0, r0, #1
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB45_14
+; SOFT-NEXT: movs r0, #1
+; SOFT-NEXT: lsls r5, r0, #31
+; SOFT-NEXT: cmp r1, r5
+; SOFT-NEXT: mov r2, r4
+; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: bhi .LBB45_14
; SOFT-NEXT: @ %bb.13: @ %entry
-; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: mov r2, r6
; SOFT-NEXT: .LBB45_14: @ %entry
-; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: ldr r2, .LCPI45_0
-; SOFT-NEXT: cmp r4, r2
-; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: blo .LBB45_16
+; SOFT-NEXT: cmp r1, r5
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: beq .LBB45_16
; SOFT-NEXT: @ %bb.15: @ %entry
-; SOFT-NEXT: mov r6, r0
+; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: .LBB45_16: @ %entry
-; SOFT-NEXT: cmp r4, r2
-; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB45_18
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: mov r7, r3
+; SOFT-NEXT: bpl .LBB45_28
; SOFT-NEXT: @ %bb.17: @ %entry
-; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: blt .LBB45_29
; SOFT-NEXT: .LBB45_18: @ %entry
; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: bmi .LBB45_20
-; SOFT-NEXT: @ %bb.19: @ %entry
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: beq .LBB45_20
+; SOFT-NEXT: .LBB45_19:
+; SOFT-NEXT: asrs r3, r3, #31
+; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; SOFT-NEXT: ands r3, r2
; SOFT-NEXT: .LBB45_20: @ %entry
-; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; SOFT-NEXT: cmp r0, #0
+; SOFT-NEXT: ands r3, r7
+; SOFT-NEXT: adds r2, r3, #1
; SOFT-NEXT: beq .LBB45_22
; SOFT-NEXT: @ %bb.21: @ %entry
-; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: mov r0, r4
; SOFT-NEXT: .LBB45_22: @ %entry
-; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bhi .LBB45_24
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r3, r1
+; SOFT-NEXT: blt .LBB45_30
; SOFT-NEXT: @ %bb.23: @ %entry
-; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: cmp r1, r5
+; SOFT-NEXT: bls .LBB45_31
; SOFT-NEXT: .LBB45_24: @ %entry
-; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: beq .LBB45_26
-; SOFT-NEXT: @ %bb.25: @ %entry
-; SOFT-NEXT: mov r0, r2
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bne .LBB45_32
+; SOFT-NEXT: .LBB45_25: @ %entry
+; SOFT-NEXT: add sp, #12
+; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .LBB45_26: @ %entry
+; SOFT-NEXT: mov r4, r0
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bmi .LBB45_10
+; SOFT-NEXT: .LBB45_27: @ %entry
+; SOFT-NEXT: mov r6, r2
; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bge .LBB45_28
-; SOFT-NEXT: @ %bb.27: @ %entry
-; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: bne .LBB45_11
+; SOFT-NEXT: b .LBB45_12
; SOFT-NEXT: .LBB45_28: @ %entry
-; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
-; SOFT-NEXT: cmp r1, #0
-; SOFT-NEXT: beq .LBB45_30
-; SOFT-NEXT: @ %bb.29: @ %entry
-; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: mov r7, r6
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bge .LBB45_18
+; SOFT-NEXT: .LBB45_29: @ %entry
+; SOFT-NEXT: mov r4, r6
+; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: bne .LBB45_19
+; SOFT-NEXT: b .LBB45_20
; SOFT-NEXT: .LBB45_30: @ %entry
-; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; SOFT-NEXT: add sp, #20
+; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: cmp r1, r5
+; SOFT-NEXT: bhi .LBB45_24
+; SOFT-NEXT: .LBB45_31: @ %entry
+; SOFT-NEXT: mov r1, r5
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: beq .LBB45_25
+; SOFT-NEXT: .LBB45_32: @ %entry
+; SOFT-NEXT: mov r1, r3
+; SOFT-NEXT: add sp, #12
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.31:
+; SOFT-NEXT: @ %bb.33:
; SOFT-NEXT: .LCPI45_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -3231,43 +3244,46 @@ define i64 @stest_f64i64_mm(double %x) {
; VFP2-NEXT: mov.w r5, #0
; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r4, r12
-; VFP2-NEXT: orrs.w r7, r2, r3
+; VFP2-NEXT: orrs.w r9, r2, r3
; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r4, r1
; VFP2-NEXT: cmp r3, #0
; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r5, r3
-; VFP2-NEXT: and.w r2, r2, r3, asr #31
-; VFP2-NEXT: mov.w r1, #-2147483648
; VFP2-NEXT: cmp.w r5, #-1
-; VFP2-NEXT: mov.w r6, #-2147483648
-; VFP2-NEXT: and.w r2, r2, r5
+; VFP2-NEXT: mov.w r7, #-2147483648
+; VFP2-NEXT: mov.w r1, #-2147483648
; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r6, r4
+; VFP2-NEXT: movgt r7, r4
; VFP2-NEXT: cmp.w r4, #-2147483648
+; VFP2-NEXT: mov r6, r3
; VFP2-NEXT: it hi
; VFP2-NEXT: movhi r1, r4
-; VFP2-NEXT: adds r2, #1
+; VFP2-NEXT: cmp r3, #0
; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r1, r6
+; VFP2-NEXT: andne.w r6, r2, r6, asr #31
+; VFP2-NEXT: and.w r2, r6, r5
; VFP2-NEXT: mov.w r6, #-1
+; VFP2-NEXT: adds r2, #1
+; VFP2-NEXT: it ne
+; VFP2-NEXT: movne r1, r7
+; VFP2-NEXT: mov.w r7, #-1
; VFP2-NEXT: cmp r12, r8
; VFP2-NEXT: it lo
-; VFP2-NEXT: movlo r6, r0
+; VFP2-NEXT: movlo r7, r0
+; VFP2-NEXT: mov.w lr, #0
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r6, r0
+; VFP2-NEXT: moveq r7, r0
; VFP2-NEXT: cmp r3, #0
-; VFP2-NEXT: mov.w r9, #-1
; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl r0, r9
-; VFP2-NEXT: cmp r7, #0
+; VFP2-NEXT: movpl r0, r6
+; VFP2-NEXT: cmp.w r9, #0
; VFP2-NEXT: mov.w r3, #0
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r6
+; VFP2-NEXT: moveq r0, r7
; VFP2-NEXT: cmp.w r4, #-2147483648
; VFP2-NEXT: it hi
; VFP2-NEXT: movhi r3, r0
-; VFP2-NEXT: mov.w lr, #0
; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r3, r0
; VFP2-NEXT: cmp.w r5, #-1
@@ -3281,8 +3297,10 @@ define i64 @stest_f64i64_mm(double %x) {
;
; FULL-LABEL: stest_f64i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: .pad #4
+; FULL-NEXT: sub sp, #4
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: mvn r12, #-2147483648
; FULL-NEXT: cmp r1, r12
@@ -3295,14 +3313,17 @@ define i64 @stest_f64i64_mm(double %x) {
; FULL-NEXT: cmp r3, #0
; FULL-NEXT: mov.w r7, #-2147483648
; FULL-NEXT: csel r6, r3, lr, mi
-; FULL-NEXT: and.w r2, r2, r3, asr #31
+; FULL-NEXT: mov r5, r3
; FULL-NEXT: cmp.w r6, #-1
-; FULL-NEXT: and.w r2, r2, r6
-; FULL-NEXT: csel r5, r4, r7, gt
+; FULL-NEXT: csel r9, r4, r7, gt
; FULL-NEXT: cmp.w r4, #-2147483648
; FULL-NEXT: csel r7, r4, r7, hi
-; FULL-NEXT: adds r2, #1
-; FULL-NEXT: csel r5, r7, r5, eq
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it ne
+; FULL-NEXT: andne.w r5, r2, r5, asr #31
+; FULL-NEXT: and.w r2, r5, r6
+; FULL-NEXT: adds r5, r2, #1
+; FULL-NEXT: csel r2, r7, r9, eq
; FULL-NEXT: mov.w r7, #-1
; FULL-NEXT: cmp r1, r12
; FULL-NEXT: csel r1, r0, r7, lo
@@ -3316,10 +3337,11 @@ define i64 @stest_f64i64_mm(double %x) {
; FULL-NEXT: csel r1, r0, r1, eq
; FULL-NEXT: cmp.w r6, #-1
; FULL-NEXT: csel r0, r0, lr, gt
-; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: cmp r5, #0
; FULL-NEXT: csel r0, r1, r0, eq
-; FULL-NEXT: mov r1, r5
-; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; FULL-NEXT: mov r1, r2
+; FULL-NEXT: add sp, #4
+; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
%conv = fptosi double %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3671,6 +3693,7 @@ define i64 @stest_f32i64_mm(float %x) {
; SOFT-NEXT: bl __fixsfti
; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r7, r3
; SOFT-NEXT: ldr r0, .LCPI48_0
; SOFT-NEXT: cmp r1, r0
; SOFT-NEXT: mov r5, r1
@@ -3678,105 +3701,114 @@ define i64 @stest_f32i64_mm(float %x) {
; SOFT-NEXT: @ %bb.1: @ %entry
; SOFT-NEXT: ldr r5, .LCPI48_0
; SOFT-NEXT: .LBB48_2: @ %entry
-; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bmi .LBB48_4
; SOFT-NEXT: @ %bb.3: @ %entry
; SOFT-NEXT: ldr r1, .LCPI48_0
; SOFT-NEXT: .LBB48_4: @ %entry
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: orrs r0, r3
+; SOFT-NEXT: orrs r0, r7
; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: beq .LBB48_6
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: .LBB48_6: @ %entry
-; SOFT-NEXT: movs r0, #0
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r7, r3
+; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r2, r7
; SOFT-NEXT: bmi .LBB48_8
; SOFT-NEXT: @ %bb.7: @ %entry
-; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB48_8: @ %entry
; SOFT-NEXT: movs r1, #1
; SOFT-NEXT: lsls r1, r1, #31
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: mov r6, r5
; SOFT-NEXT: bge .LBB48_10
; SOFT-NEXT: @ %bb.9: @ %entry
; SOFT-NEXT: mov r6, r1
; SOFT-NEXT: .LBB48_10: @ %entry
; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: bhi .LBB48_12
; SOFT-NEXT: @ %bb.11: @ %entry
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: mov r3, r1
; SOFT-NEXT: .LBB48_12: @ %entry
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: asrs r0, r3, #31
-; SOFT-NEXT: ands r0, r2
-; SOFT-NEXT: ands r0, r7
-; SOFT-NEXT: adds r0, r0, #1
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB48_14
+; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB48_14
; SOFT-NEXT: @ %bb.13: @ %entry
+; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: b .LBB48_15
+; SOFT-NEXT: .LBB48_14:
+; SOFT-NEXT: asrs r3, r7, #31
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT: ands r3, r0
+; SOFT-NEXT: .LBB48_15: @ %entry
+; SOFT-NEXT: ands r3, r2
+; SOFT-NEXT: adds r0, r3, #1
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: beq .LBB48_17
+; SOFT-NEXT: @ %bb.16: @ %entry
; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: .LBB48_14: @ %entry
-; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: ldr r2, .LCPI48_0
-; SOFT-NEXT: cmp r4, r2
-; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: blo .LBB48_16
-; SOFT-NEXT: @ %bb.15: @ %entry
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: .LBB48_16: @ %entry
-; SOFT-NEXT: cmp r4, r2
+; SOFT-NEXT: .LBB48_17: @ %entry
+; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: mvns r6, r3
+; SOFT-NEXT: ldr r0, .LCPI48_0
+; SOFT-NEXT: cmp r4, r0
+; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: blo .LBB48_19
+; SOFT-NEXT: @ %bb.18: @ %entry
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: .LBB48_19: @ %entry
+; SOFT-NEXT: cmp r4, r0
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB48_18
-; SOFT-NEXT: @ %bb.17: @ %entry
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: .LBB48_18: @ %entry
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: bmi .LBB48_20
-; SOFT-NEXT: @ %bb.19: @ %entry
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: .LBB48_20: @ %entry
+; SOFT-NEXT: beq .LBB48_21
+; SOFT-NEXT: @ %bb.20: @ %entry
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: .LBB48_21: @ %entry
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bmi .LBB48_23
+; SOFT-NEXT: @ %bb.22: @ %entry
+; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: .LBB48_23: @ %entry
; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB48_22
-; SOFT-NEXT: @ %bb.21: @ %entry
+; SOFT-NEXT: beq .LBB48_25
+; SOFT-NEXT: @ %bb.24: @ %entry
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: .LBB48_22: @ %entry
+; SOFT-NEXT: .LBB48_25: @ %entry
; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bhi .LBB48_24
-; SOFT-NEXT: @ %bb.23: @ %entry
-; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB48_24: @ %entry
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: bhi .LBB48_27
+; SOFT-NEXT: @ %bb.26: @ %entry
+; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: .LBB48_27: @ %entry
; SOFT-NEXT: cmp r5, r1
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: beq .LBB48_26
-; SOFT-NEXT: @ %bb.25: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: .LBB48_26: @ %entry
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bge .LBB48_28
-; SOFT-NEXT: @ %bb.27: @ %entry
+; SOFT-NEXT: beq .LBB48_29
+; SOFT-NEXT: @ %bb.28: @ %entry
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: .LBB48_29: @ %entry
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bge .LBB48_31
+; SOFT-NEXT: @ %bb.30: @ %entry
; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB48_28: @ %entry
+; SOFT-NEXT: .LBB48_31: @ %entry
; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
; SOFT-NEXT: cmp r1, #0
-; SOFT-NEXT: beq .LBB48_30
-; SOFT-NEXT: @ %bb.29: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: .LBB48_30: @ %entry
; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: beq .LBB48_33
+; SOFT-NEXT: @ %bb.32: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: .LBB48_33: @ %entry
; SOFT-NEXT: add sp, #20
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.31:
+; SOFT-NEXT: @ %bb.34:
; SOFT-NEXT: .LCPI48_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -3798,43 +3830,46 @@ define i64 @stest_f32i64_mm(float %x) {
; VFP2-NEXT: mov.w r5, #0
; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r4, r12
-; VFP2-NEXT: orrs.w r7, r2, r3
+; VFP2-NEXT: orrs.w r9, r2, r3
; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r4, r1
; VFP2-NEXT: cmp r3, #0
; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r5, r3
-; VFP2-NEXT: and.w r2, r2, r3, asr #31
-; VFP2-NEXT: mov.w r1, #-2147483648
; VFP2-NEXT: cmp.w r5, #-1
-; VFP2-NEXT: mov.w r6, #-2147483648
-; VFP2-NEXT: and.w r2, r2, r5
+; VFP2-NEXT: mov.w r7, #-2147483648
+; VFP2-NEXT: mov.w r1, #-2147483648
; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r6, r4
+; VFP2-NEXT: movgt r7, r4
; VFP2-NEXT: cmp.w r4, #-2147483648
+; VFP2-NEXT: mov r6, r3
; VFP2-NEXT: it hi
; VFP2-NEXT: movhi r1, r4
-; VFP2-NEXT: adds r2, #1
+; VFP2-NEXT: cmp r3, #0
; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r1, r6
+; VFP2-NEXT: andne.w r6, r2, r6, asr #31
+; VFP2-NEXT: and.w r2, r6, r5
; VFP2-NEXT: mov.w r6, #-1
+; VFP2-NEXT: adds r2, #1
+; VFP2-NEXT: it ne
+; VFP2-NEXT: movne r1, r7
+; VFP2-NEXT: mov.w r7, #-1
; VFP2-NEXT: cmp r12, r8
; VFP2-NEXT: it lo
-; VFP2-NEXT: movlo r6, r0
+; VFP2-NEXT: movlo r7, r0
+; VFP2-NEXT: mov.w lr, #0
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r6, r0
+; VFP2-NEXT: moveq r7, r0
; VFP2-NEXT: cmp r3, #0
-; VFP2-NEXT: mov.w r9, #-1
; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl r0, r9
-; VFP2-NEXT: cmp r7, #0
+; VFP2-NEXT: movpl r0, r6
+; VFP2-NEXT: cmp.w r9, #0
; VFP2-NEXT: mov.w r3, #0
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r6
+; VFP2-NEXT: moveq r0, r7
; VFP2-NEXT: cmp.w r4, #-2147483648
; VFP2-NEXT: it hi
; VFP2-NEXT: movhi r3, r0
-; VFP2-NEXT: mov.w lr, #0
; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r3, r0
; VFP2-NEXT: cmp.w r5, #-1
@@ -3848,8 +3883,10 @@ define i64 @stest_f32i64_mm(float %x) {
;
; FULL-LABEL: stest_f32i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: .pad #4
+; FULL-NEXT: sub sp, #4
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: mvn r12, #-2147483648
; FULL-NEXT: cmp r1, r12
@@ -3862,14 +3899,17 @@ define i64 @stest_f32i64_mm(float %x) {
; FULL-NEXT: cmp r3, #0
; FULL-NEXT: mov.w r7, #-2147483648
; FULL-NEXT: csel r6, r3, lr, mi
-; FULL-NEXT: and.w r2, r2, r3, asr #31
+; FULL-NEXT: mov r5, r3
; FULL-NEXT: cmp.w r6, #-1
-; FULL-NEXT: and.w r2, r2, r6
-; FULL-NEXT: csel r5, r4, r7, gt
+; FULL-NEXT: csel r9, r4, r7, gt
; FULL-NEXT: cmp.w r4, #-2147483648
; FULL-NEXT: csel r7, r4, r7, hi
-; FULL-NEXT: adds r2, #1
-; FULL-NEXT: csel r5, r7, r5, eq
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it ne
+; FULL-NEXT: andne.w r5, r2, r5, asr #31
+; FULL-NEXT: and.w r2, r5, r6
+; FULL-NEXT: adds r5, r2, #1
+; FULL-NEXT: csel r2, r7, r9, eq
; FULL-NEXT: mov.w r7, #-1
; FULL-NEXT: cmp r1, r12
; FULL-NEXT: csel r1, r0, r7, lo
@@ -3883,10 +3923,11 @@ define i64 @stest_f32i64_mm(float %x) {
; FULL-NEXT: csel r1, r0, r1, eq
; FULL-NEXT: cmp.w r6, #-1
; FULL-NEXT: csel r0, r0, lr, gt
-; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: cmp r5, #0
; FULL-NEXT: csel r0, r1, r0, eq
-; FULL-NEXT: mov r1, r5
-; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; FULL-NEXT: mov r1, r2
+; FULL-NEXT: add sp, #4
+; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
%conv = fptosi float %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -4234,6 +4275,7 @@ define i64 @stest_f16i64_mm(half %x) {
; SOFT-NEXT: bl __fixsfti
; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
; SOFT-NEXT: mov r4, r1
+; SOFT-NEXT: mov r7, r3
; SOFT-NEXT: ldr r0, .LCPI51_0
; SOFT-NEXT: cmp r1, r0
; SOFT-NEXT: mov r5, r1
@@ -4241,105 +4283,114 @@ define i64 @stest_f16i64_mm(half %x) {
; SOFT-NEXT: @ %bb.1: @ %entry
; SOFT-NEXT: ldr r5, .LCPI51_0
; SOFT-NEXT: .LBB51_2: @ %entry
-; SOFT-NEXT: cmp r3, #0
+; SOFT-NEXT: cmp r7, #0
; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bmi .LBB51_4
; SOFT-NEXT: @ %bb.3: @ %entry
; SOFT-NEXT: ldr r1, .LCPI51_0
; SOFT-NEXT: .LBB51_4: @ %entry
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: orrs r0, r3
+; SOFT-NEXT: orrs r0, r7
; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
; SOFT-NEXT: beq .LBB51_6
; SOFT-NEXT: @ %bb.5: @ %entry
; SOFT-NEXT: mov r5, r1
; SOFT-NEXT: .LBB51_6: @ %entry
-; SOFT-NEXT: movs r0, #0
-; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: mov r7, r3
+; SOFT-NEXT: movs r1, #0
+; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: mov r2, r7
; SOFT-NEXT: bmi .LBB51_8
; SOFT-NEXT: @ %bb.7: @ %entry
-; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB51_8: @ %entry
; SOFT-NEXT: movs r1, #1
; SOFT-NEXT: lsls r1, r1, #31
-; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: cmp r2, #0
; SOFT-NEXT: mov r6, r5
; SOFT-NEXT: bge .LBB51_10
; SOFT-NEXT: @ %bb.9: @ %entry
; SOFT-NEXT: mov r6, r1
; SOFT-NEXT: .LBB51_10: @ %entry
; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: mov r3, r5
; SOFT-NEXT: bhi .LBB51_12
; SOFT-NEXT: @ %bb.11: @ %entry
-; SOFT-NEXT: mov r0, r1
+; SOFT-NEXT: mov r3, r1
; SOFT-NEXT: .LBB51_12: @ %entry
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: asrs r0, r3, #31
-; SOFT-NEXT: ands r0, r2
-; SOFT-NEXT: ands r0, r7
-; SOFT-NEXT: adds r0, r0, #1
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: beq .LBB51_14
+; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bne .LBB51_14
; SOFT-NEXT: @ %bb.13: @ %entry
+; SOFT-NEXT: mov r3, r7
+; SOFT-NEXT: b .LBB51_15
+; SOFT-NEXT: .LBB51_14:
+; SOFT-NEXT: asrs r3, r7, #31
+; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload
+; SOFT-NEXT: ands r3, r0
+; SOFT-NEXT: .LBB51_15: @ %entry
+; SOFT-NEXT: ands r3, r2
+; SOFT-NEXT: adds r0, r3, #1
+; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
+; SOFT-NEXT: beq .LBB51_17
+; SOFT-NEXT: @ %bb.16: @ %entry
; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: .LBB51_14: @ %entry
-; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: ldr r2, .LCPI51_0
-; SOFT-NEXT: cmp r4, r2
-; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: blo .LBB51_16
-; SOFT-NEXT: @ %bb.15: @ %entry
-; SOFT-NEXT: mov r6, r0
-; SOFT-NEXT: .LBB51_16: @ %entry
-; SOFT-NEXT: cmp r4, r2
+; SOFT-NEXT: .LBB51_17: @ %entry
+; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: mvns r6, r3
+; SOFT-NEXT: ldr r0, .LCPI51_0
+; SOFT-NEXT: cmp r4, r0
+; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; SOFT-NEXT: blo .LBB51_19
+; SOFT-NEXT: @ %bb.18: @ %entry
+; SOFT-NEXT: mov r3, r6
+; SOFT-NEXT: .LBB51_19: @ %entry
+; SOFT-NEXT: cmp r4, r0
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: beq .LBB51_18
-; SOFT-NEXT: @ %bb.17: @ %entry
-; SOFT-NEXT: mov r4, r6
-; SOFT-NEXT: .LBB51_18: @ %entry
-; SOFT-NEXT: cmp r3, #0
-; SOFT-NEXT: bmi .LBB51_20
-; SOFT-NEXT: @ %bb.19: @ %entry
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: .LBB51_20: @ %entry
+; SOFT-NEXT: beq .LBB51_21
+; SOFT-NEXT: @ %bb.20: @ %entry
+; SOFT-NEXT: mov r4, r3
+; SOFT-NEXT: .LBB51_21: @ %entry
+; SOFT-NEXT: cmp r7, #0
+; SOFT-NEXT: bmi .LBB51_23
+; SOFT-NEXT: @ %bb.22: @ %entry
+; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: .LBB51_23: @ %entry
; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; SOFT-NEXT: cmp r0, #0
-; SOFT-NEXT: beq .LBB51_22
-; SOFT-NEXT: @ %bb.21: @ %entry
+; SOFT-NEXT: beq .LBB51_25
+; SOFT-NEXT: @ %bb.24: @ %entry
; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; SOFT-NEXT: .LBB51_22: @ %entry
+; SOFT-NEXT: .LBB51_25: @ %entry
; SOFT-NEXT: cmp r5, r1
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: bhi .LBB51_24
-; SOFT-NEXT: @ %bb.23: @ %entry
-; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB51_24: @ %entry
+; SOFT-NEXT: mov r3, r4
+; SOFT-NEXT: bhi .LBB51_27
+; SOFT-NEXT: @ %bb.26: @ %entry
+; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
+; SOFT-NEXT: .LBB51_27: @ %entry
; SOFT-NEXT: cmp r5, r1
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: beq .LBB51_26
-; SOFT-NEXT: @ %bb.25: @ %entry
-; SOFT-NEXT: mov r0, r2
-; SOFT-NEXT: .LBB51_26: @ %entry
-; SOFT-NEXT: cmp r7, #0
-; SOFT-NEXT: bge .LBB51_28
-; SOFT-NEXT: @ %bb.27: @ %entry
+; SOFT-NEXT: beq .LBB51_29
+; SOFT-NEXT: @ %bb.28: @ %entry
+; SOFT-NEXT: mov r0, r3
+; SOFT-NEXT: .LBB51_29: @ %entry
+; SOFT-NEXT: cmp r2, #0
+; SOFT-NEXT: bge .LBB51_31
+; SOFT-NEXT: @ %bb.30: @ %entry
; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
-; SOFT-NEXT: .LBB51_28: @ %entry
+; SOFT-NEXT: .LBB51_31: @ %entry
; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload
; SOFT-NEXT: cmp r1, #0
-; SOFT-NEXT: beq .LBB51_30
-; SOFT-NEXT: @ %bb.29: @ %entry
-; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: .LBB51_30: @ %entry
; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; SOFT-NEXT: beq .LBB51_33
+; SOFT-NEXT: @ %bb.32: @ %entry
+; SOFT-NEXT: mov r0, r4
+; SOFT-NEXT: .LBB51_33: @ %entry
; SOFT-NEXT: add sp, #20
; SOFT-NEXT: pop {r4, r5, r6, r7, pc}
; SOFT-NEXT: .p2align 2
-; SOFT-NEXT: @ %bb.31:
+; SOFT-NEXT: @ %bb.34:
; SOFT-NEXT: .LCPI51_0:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
@@ -4364,43 +4415,46 @@ define i64 @stest_f16i64_mm(half %x) {
; VFP2-NEXT: mov.w r5, #0
; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r4, r12
-; VFP2-NEXT: orrs.w r7, r2, r3
+; VFP2-NEXT: orrs.w r9, r2, r3
; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r4, r1
; VFP2-NEXT: cmp r3, #0
; VFP2-NEXT: it mi
; VFP2-NEXT: movmi r5, r3
-; VFP2-NEXT: and.w r2, r2, r3, asr #31
-; VFP2-NEXT: mov.w r1, #-2147483648
; VFP2-NEXT: cmp.w r5, #-1
-; VFP2-NEXT: mov.w r6, #-2147483648
-; VFP2-NEXT: and.w r2, r2, r5
+; VFP2-NEXT: mov.w r7, #-2147483648
+; VFP2-NEXT: mov.w r1, #-2147483648
; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r6, r4
+; VFP2-NEXT: movgt r7, r4
; VFP2-NEXT: cmp.w r4, #-2147483648
+; VFP2-NEXT: mov r6, r3
; VFP2-NEXT: it hi
; VFP2-NEXT: movhi r1, r4
-; VFP2-NEXT: adds r2, #1
+; VFP2-NEXT: cmp r3, #0
; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r1, r6
+; VFP2-NEXT: andne.w r6, r2, r6, asr #31
+; VFP2-NEXT: and.w r2, r6, r5
; VFP2-NEXT: mov.w r6, #-1
+; VFP2-NEXT: adds r2, #1
+; VFP2-NEXT: it ne
+; VFP2-NEXT: movne r1, r7
+; VFP2-NEXT: mov.w r7, #-1
; VFP2-NEXT: cmp r12, r8
; VFP2-NEXT: it lo
-; VFP2-NEXT: movlo r6, r0
+; VFP2-NEXT: movlo r7, r0
+; VFP2-NEXT: mov.w lr, #0
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r6, r0
+; VFP2-NEXT: moveq r7, r0
; VFP2-NEXT: cmp r3, #0
-; VFP2-NEXT: mov.w r9, #-1
; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl r0, r9
-; VFP2-NEXT: cmp r7, #0
+; VFP2-NEXT: movpl r0, r6
+; VFP2-NEXT: cmp.w r9, #0
; VFP2-NEXT: mov.w r3, #0
; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r6
+; VFP2-NEXT: moveq r0, r7
; VFP2-NEXT: cmp.w r4, #-2147483648
; VFP2-NEXT: it hi
; VFP2-NEXT: movhi r3, r0
-; VFP2-NEXT: mov.w lr, #0
; VFP2-NEXT: it eq
; VFP2-NEXT: moveq r3, r0
; VFP2-NEXT: cmp.w r5, #-1
@@ -4414,8 +4468,10 @@ define i64 @stest_f16i64_mm(half %x) {
;
; FULL-LABEL: stest_f16i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: .pad #4
+; FULL-NEXT: sub sp, #4
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -4430,14 +4486,17 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: cmp r3, #0
; FULL-NEXT: mov.w r7, #-2147483648
; FULL-NEXT: csel r6, r3, lr, mi
-; FULL-NEXT: and.w r2, r2, r3, asr #31
+; FULL-NEXT: mov r5, r3
; FULL-NEXT: cmp.w r6, #-1
-; FULL-NEXT: and.w r2, r2, r6
-; FULL-NEXT: csel r5, r4, r7, gt
+; FULL-NEXT: csel r9, r4, r7, gt
; FULL-NEXT: cmp.w r4, #-2147483648
; FULL-NEXT: csel r7, r4, r7, hi
-; FULL-NEXT: adds r2, #1
-; FULL-NEXT: csel r5, r7, r5, eq
+; FULL-NEXT: cmp r3, #0
+; FULL-NEXT: it ne
+; FULL-NEXT: andne.w r5, r2, r5, asr #31
+; FULL-NEXT: and.w r2, r5, r6
+; FULL-NEXT: adds r5, r2, #1
+; FULL-NEXT: csel r2, r7, r9, eq
; FULL-NEXT: mov.w r7, #-1
; FULL-NEXT: cmp r1, r12
; FULL-NEXT: csel r1, r0, r7, lo
@@ -4451,10 +4510,11 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: csel r1, r0, r1, eq
; FULL-NEXT: cmp.w r6, #-1
; FULL-NEXT: csel r0, r0, lr, gt
-; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: cmp r5, #0
; FULL-NEXT: csel r0, r1, r0, eq
-; FULL-NEXT: mov r1, r5
-; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; FULL-NEXT: mov r1, r2
+; FULL-NEXT: add sp, #4
+; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index db6f33128236a..a4d470b72d4ea 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -3671,93 +3671,95 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: cmn r1, #-2147483647
-; CHECK-NEXT: mvn r0, #-2147483648
-; CHECK-NEXT: mvn r5, #-2147483648
-; CHECK-NEXT: movlo r0, r1
; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: andne r0, r2, r0, asr #31
; CHECK-NEXT: mov r11, r1
-; CHECK-NEXT: movmi r5, r1
-; CHECK-NEXT: orrs r1, r2, r3
-; CHECK-NEXT: mov r8, #0
-; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: movmi r10, r3
+; CHECK-NEXT: and r1, r0, r10
+; CHECK-NEXT: cmn r11, #-2147483647
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: movlo r0, r11
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: and r0, r2, r3, asr #31
-; CHECK-NEXT: movmi r8, r3
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: and r1, r0, r8
+; CHECK-NEXT: mvn r8, #-2147483648
; CHECK-NEXT: vorr d0, d8, d8
-; CHECK-NEXT: cmn r8, #1
+; CHECK-NEXT: movmi r8, r11
+; CHECK-NEXT: orrs r2, r2, r3
+; CHECK-NEXT: moveq r8, r0
+; CHECK-NEXT: cmn r10, #1
; CHECK-NEXT: mov r0, #-2147483648
-; CHECK-NEXT: mov r10, #-2147483648
-; CHECK-NEXT: movgt r0, r5
-; CHECK-NEXT: cmp r5, #-2147483648
-; CHECK-NEXT: movhi r10, r5
+; CHECK-NEXT: mov r9, #-2147483648
+; CHECK-NEXT: movgt r0, r8
+; CHECK-NEXT: cmp r8, #-2147483648
+; CHECK-NEXT: movhi r9, r8
; CHECK-NEXT: cmn r1, #1
-; CHECK-NEXT: mov r9, r3
+; CHECK-NEXT: mov r6, r3
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: mvn r7, #-2147483648
-; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: movne r10, r0
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movne r9, r0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: cmn r1, #-2147483647
-; CHECK-NEXT: mvn r6, #0
-; CHECK-NEXT: movlo r6, r0
+; CHECK-NEXT: mvn r5, #0
+; CHECK-NEXT: movlo r5, r0
; CHECK-NEXT: mvn r4, #0
-; CHECK-NEXT: moveq r6, r0
+; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movpl r0, r4
; CHECK-NEXT: orrs r12, r2, r3
-; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: cmn r1, #-2147483647
-; CHECK-NEXT: mvn r6, #-2147483648
-; CHECK-NEXT: and r2, r2, r3, asr #31
-; CHECK-NEXT: movlo r6, r1
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r7, r1
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: moveq r7, r6
+; CHECK-NEXT: moveq r7, r5
; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: movhi r1, r0
; CHECK-NEXT: mov r12, #0
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: movhi r12, r0
+; CHECK-NEXT: moveq r1, r0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvn r6, #0
-; CHECK-NEXT: moveq r12, r0
-; CHECK-NEXT: cmp r9, #0
-; CHECK-NEXT: movmi r6, r1
+; CHECK-NEXT: movmi r6, r5
; CHECK-NEXT: cmn r11, #-2147483647
-; CHECK-NEXT: movlo r4, r1
-; CHECK-NEXT: moveq r4, r1
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: movlo r4, r5
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movne r4, r6
-; CHECK-NEXT: cmp r5, #-2147483648
+; CHECK-NEXT: cmp r8, #-2147483648
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movhi r6, r4
; CHECK-NEXT: moveq r6, r4
-; CHECK-NEXT: cmn r8, #1
-; CHECK-NEXT: movle r4, r5
-; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: cmn r10, #1
+; CHECK-NEXT: movle r4, r12
+; CHECK-NEXT: cmn r5, #1
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: vmov.32 d1[0], r4
; CHECK-NEXT: movmi r6, r3
; CHECK-NEXT: cmn r6, #1
-; CHECK-NEXT: and r2, r2, r6
-; CHECK-NEXT: movle r0, r5
+; CHECK-NEXT: movle r0, r12
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: andne r3, r2, r3, asr #31
+; CHECK-NEXT: and r2, r3, r6
; CHECK-NEXT: cmn r2, #1
-; CHECK-NEXT: mov r1, #-2147483648
-; CHECK-NEXT: moveq r0, r12
+; CHECK-NEXT: moveq r0, r1
; CHECK-NEXT: cmn r6, #1
+; CHECK-NEXT: mov r1, #-2147483648
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: movgt r1, r7
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: vmov.32 d1[1], r9
; CHECK-NEXT: movls r7, r0
; CHECK-NEXT: cmn r2, #1
-; CHECK-NEXT: vmov.32 d1[1], r10
; CHECK-NEXT: movne r7, r1
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: add sp, sp, #16
@@ -3945,93 +3947,95 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: cmn r1, #-2147483647
-; CHECK-NEXT: mvn r0, #-2147483648
-; CHECK-NEXT: movlo r0, r1
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: andne r0, r2, r0, asr #31
; CHECK-NEXT: mov r11, r1
-; CHECK-NEXT: movmi r5, r1
-; CHECK-NEXT: orrs r1, r2, r3
-; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: movmi r10, r3
+; CHECK-NEXT: and r1, r0, r10
+; CHECK-NEXT: cmn r11, #-2147483647
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: movlo r0, r11
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov r8, #0
-; CHECK-NEXT: and r0, r2, r3, asr #31
-; CHECK-NEXT: movmi r8, r3
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: and r1, r0, r8
-; CHECK-NEXT: cmn r8, #1
+; CHECK-NEXT: movmi r8, r11
+; CHECK-NEXT: orrs r2, r2, r3
+; CHECK-NEXT: moveq r8, r0
+; CHECK-NEXT: cmn r10, #1
; CHECK-NEXT: mov r0, #-2147483648
-; CHECK-NEXT: mov r10, #-2147483648
-; CHECK-NEXT: movgt r0, r5
-; CHECK-NEXT: cmp r5, #-2147483648
-; CHECK-NEXT: movhi r10, r5
+; CHECK-NEXT: mov r9, #-2147483648
+; CHECK-NEXT: movgt r0, r8
+; CHECK-NEXT: cmp r8, #-2147483648
+; CHECK-NEXT: movhi r9, r8
; CHECK-NEXT: cmn r1, #1
-; CHECK-NEXT: mov r9, r3
+; CHECK-NEXT: mov r6, r3
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: mvn r7, #-2147483648
-; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: movne r10, r0
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movne r9, r0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: cmn r1, #-2147483647
-; CHECK-NEXT: mvn r6, #0
-; CHECK-NEXT: movlo r6, r0
+; CHECK-NEXT: mvn r5, #0
+; CHECK-NEXT: movlo r5, r0
; CHECK-NEXT: mvn r4, #0
-; CHECK-NEXT: moveq r6, r0
+; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movpl r0, r4
; CHECK-NEXT: orrs r12, r2, r3
-; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: cmn r1, #-2147483647
-; CHECK-NEXT: mvn r6, #-2147483648
-; CHECK-NEXT: and r2, r2, r3, asr #31
-; CHECK-NEXT: movlo r6, r1
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r7, r1
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: moveq r7, r6
+; CHECK-NEXT: moveq r7, r5
; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: movhi r1, r0
; CHECK-NEXT: mov r12, #0
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: movhi r12, r0
+; CHECK-NEXT: moveq r1, r0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvn r6, #0
-; CHECK-NEXT: moveq r12, r0
-; CHECK-NEXT: cmp r9, #0
-; CHECK-NEXT: movmi r6, r1
+; CHECK-NEXT: movmi r6, r5
; CHECK-NEXT: cmn r11, #-2147483647
-; CHECK-NEXT: movlo r4, r1
-; CHECK-NEXT: moveq r4, r1
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: movlo r4, r5
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movne r4, r6
-; CHECK-NEXT: cmp r5, #-2147483648
+; CHECK-NEXT: cmp r8, #-2147483648
; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movhi r6, r4
; CHECK-NEXT: moveq r6, r4
-; CHECK-NEXT: cmn r8, #1
-; CHECK-NEXT: movle r4, r5
-; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: cmn r10, #1
+; CHECK-NEXT: movle r4, r12
+; CHECK-NEXT: cmn r5, #1
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: vmov.32 d1[0], r4
; CHECK-NEXT: movmi r6, r3
; CHECK-NEXT: cmn r6, #1
-; CHECK-NEXT: and r2, r2, r6
-; CHECK-NEXT: movle r0, r5
+; CHECK-NEXT: movle r0, r12
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: andne r3, r2, r3, asr #31
+; CHECK-NEXT: and r2, r3, r6
; CHECK-NEXT: cmn r2, #1
-; CHECK-NEXT: mov r1, #-2147483648
-; CHECK-NEXT: moveq r0, r12
+; CHECK-NEXT: moveq r0, r1
; CHECK-NEXT: cmn r6, #1
+; CHECK-NEXT: mov r1, #-2147483648
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: movgt r1, r7
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: vmov.32 d1[1], r9
; CHECK-NEXT: movls r7, r0
; CHECK-NEXT: cmn r2, #1
-; CHECK-NEXT: vmov.32 d1[1], r10
; CHECK-NEXT: movne r7, r1
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: add sp, sp, #16
@@ -4220,100 +4224,100 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
-; CHECK-NEON-NEXT: mov r5, r0
-; CHECK-NEON-NEXT: cmn r1, #-2147483647
-; CHECK-NEON-NEXT: mvn r0, #-2147483648
-; CHECK-NEON-NEXT: mvn r11, #-2147483648
-; CHECK-NEON-NEXT: movlo r0, r1
+; CHECK-NEON-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEON-NEXT: cmp r3, #0
-; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEON-NEXT: movmi r11, r1
-; CHECK-NEON-NEXT: orrs r1, r2, r3
-; CHECK-NEON-NEXT: mov r8, #0
-; CHECK-NEON-NEXT: moveq r11, r0
+; CHECK-NEON-NEXT: mov r0, r3
+; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: andne r0, r2, r0, asr #31
+; CHECK-NEON-NEXT: mov r11, r1
+; CHECK-NEON-NEXT: movmi r10, r3
+; CHECK-NEON-NEXT: and r1, r0, r10
+; CHECK-NEON-NEXT: cmn r11, #-2147483647
+; CHECK-NEON-NEXT: mvn r0, #-2147483648
+; CHECK-NEON-NEXT: movlo r0, r11
; CHECK-NEON-NEXT: cmp r3, #0
-; CHECK-NEON-NEXT: and r0, r2, r3, asr #31
-; CHECK-NEON-NEXT: movmi r8, r3
-; CHECK-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; CHECK-NEON-NEXT: and r1, r0, r8
-; CHECK-NEON-NEXT: cmn r8, #1
+; CHECK-NEON-NEXT: mvn r8, #-2147483648
+; CHECK-NEON-NEXT: mov r9, #-2147483648
+; CHECK-NEON-NEXT: movmi r8, r11
+; CHECK-NEON-NEXT: orrs r2, r2, r3
+; CHECK-NEON-NEXT: moveq r8, r0
+; CHECK-NEON-NEXT: cmn r10, #1
; CHECK-NEON-NEXT: mov r0, #-2147483648
-; CHECK-NEON-NEXT: movgt r0, r11
-; CHECK-NEON-NEXT: cmp r11, #-2147483648
-; CHECK-NEON-NEXT: mov r2, #-2147483648
-; CHECK-NEON-NEXT: mov r9, r3
-; CHECK-NEON-NEXT: movhi r2, r11
+; CHECK-NEON-NEXT: mov r6, r3
+; CHECK-NEON-NEXT: movgt r0, r8
+; CHECK-NEON-NEXT: cmp r8, #-2147483648
+; CHECK-NEON-NEXT: movhi r9, r8
; CHECK-NEON-NEXT: cmn r1, #1
-; CHECK-NEON-NEXT: movne r2, r0
+; CHECK-NEON-NEXT: movne r9, r0
; CHECK-NEON-NEXT: vmov r0, s16
-; CHECK-NEON-NEXT: mvn r10, #-2147483648
-; CHECK-NEON-NEXT: str r1, [sp] @ 4-byte Spill
+; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: cmn r1, #-2147483647
-; CHECK-NEON-NEXT: mvn r6, #0
-; CHECK-NEON-NEXT: movlo r6, r0
+; CHECK-NEON-NEXT: mvn r5, #0
+; CHECK-NEON-NEXT: movlo r5, r0
; CHECK-NEON-NEXT: mvn r4, #0
-; CHECK-NEON-NEXT: moveq r6, r0
+; CHECK-NEON-NEXT: moveq r5, r0
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movpl r0, r4
; CHECK-NEON-NEXT: orrs r12, r2, r3
-; CHECK-NEON-NEXT: moveq r0, r6
+; CHECK-NEON-NEXT: moveq r0, r5
; CHECK-NEON-NEXT: cmn r1, #-2147483647
-; CHECK-NEON-NEXT: mvn r6, #-2147483648
-; CHECK-NEON-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEON-NEXT: movlo r6, r1
+; CHECK-NEON-NEXT: mvn r5, #-2147483648
+; CHECK-NEON-NEXT: movlo r5, r1
; CHECK-NEON-NEXT: cmp r3, #0
-; CHECK-NEON-NEXT: movmi r10, r1
+; CHECK-NEON-NEXT: movmi r7, r1
; CHECK-NEON-NEXT: cmp r12, #0
-; CHECK-NEON-NEXT: moveq r10, r6
-; CHECK-NEON-NEXT: cmp r10, #-2147483648
+; CHECK-NEON-NEXT: moveq r7, r5
+; CHECK-NEON-NEXT: cmp r7, #-2147483648
; CHECK-NEON-NEXT: mov r1, #0
-; CHECK-NEON-NEXT: mvn r6, #0
+; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
; CHECK-NEON-NEXT: movhi r1, r0
-; CHECK-NEON-NEXT: and r2, r2, r3, asr #31
+; CHECK-NEON-NEXT: mov r12, #0
; CHECK-NEON-NEXT: moveq r1, r0
-; CHECK-NEON-NEXT: cmp r9, #0
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: mvn r6, #0
; CHECK-NEON-NEXT: movmi r6, r5
-; CHECK-NEON-NEXT: cmn r7, #-2147483647
+; CHECK-NEON-NEXT: cmn r11, #-2147483647
; CHECK-NEON-NEXT: movlo r4, r5
-; CHECK-NEON-NEXT: ldr r7, [sp] @ 4-byte Reload
; CHECK-NEON-NEXT: moveq r4, r5
-; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEON-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
; CHECK-NEON-NEXT: cmp r5, #0
-; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEON-NEXT: movne r4, r6
-; CHECK-NEON-NEXT: cmp r11, #-2147483648
+; CHECK-NEON-NEXT: cmp r8, #-2147483648
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: movhi r6, r4
; CHECK-NEON-NEXT: moveq r6, r4
-; CHECK-NEON-NEXT: cmn r8, #1
-; CHECK-NEON-NEXT: movle r4, r5
-; CHECK-NEON-NEXT: cmn r7, #1
+; CHECK-NEON-NEXT: cmn r10, #1
+; CHECK-NEON-NEXT: movle r4, r12
+; CHECK-NEON-NEXT: cmn r5, #1
; CHECK-NEON-NEXT: moveq r4, r6
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: vmov.32 d1[0], r4
; CHECK-NEON-NEXT: movmi r6, r3
; CHECK-NEON-NEXT: cmn r6, #1
-; CHECK-NEON-NEXT: and r2, r2, r6
-; CHECK-NEON-NEXT: movle r0, r5
+; CHECK-NEON-NEXT: movle r0, r12
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: andne r3, r2, r3, asr #31
+; CHECK-NEON-NEXT: and r2, r3, r6
; CHECK-NEON-NEXT: cmn r2, #1
; CHECK-NEON-NEXT: moveq r0, r1
; CHECK-NEON-NEXT: cmn r6, #1
; CHECK-NEON-NEXT: mov r1, #-2147483648
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
-; CHECK-NEON-NEXT: movgt r1, r10
-; CHECK-NEON-NEXT: cmp r10, #-2147483648
+; CHECK-NEON-NEXT: movgt r1, r7
+; CHECK-NEON-NEXT: cmp r7, #-2147483648
; CHECK-NEON-NEXT: mov r0, #-2147483648
-; CHECK-NEON-NEXT: movls r10, r0
-; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEON-NEXT: vmov.32 d1[1], r9
+; CHECK-NEON-NEXT: movls r7, r0
; CHECK-NEON-NEXT: cmn r2, #1
-; CHECK-NEON-NEXT: movne r10, r1
-; CHECK-NEON-NEXT: vmov.32 d1[1], r0
-; CHECK-NEON-NEXT: vmov.32 d0[1], r10
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: add sp, sp, #16
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: add sp, sp, #4
@@ -4334,94 +4338,96 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-FP16-NEXT: cmn r1, #-2147483647
-; CHECK-FP16-NEXT: mvn r0, #-2147483648
-; CHECK-FP16-NEXT: mvn r5, #-2147483648
-; CHECK-FP16-NEXT: movlo r0, r1
; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mov r0, r3
+; CHECK-FP16-NEXT: mov r10, #0
+; CHECK-FP16-NEXT: andne r0, r2, r0, asr #31
; CHECK-FP16-NEXT: mov r11, r1
-; CHECK-FP16-NEXT: movmi r5, r1
-; CHECK-FP16-NEXT: orrs r1, r2, r3
-; CHECK-FP16-NEXT: mov r8, #0
-; CHECK-FP16-NEXT: moveq r5, r0
+; CHECK-FP16-NEXT: movmi r10, r3
+; CHECK-FP16-NEXT: and r1, r0, r10
+; CHECK-FP16-NEXT: cmn r11, #-2147483647
+; CHECK-FP16-NEXT: mvn r0, #-2147483648
+; CHECK-FP16-NEXT: movlo r0, r11
; CHECK-FP16-NEXT: cmp r3, #0
-; CHECK-FP16-NEXT: and r0, r2, r3, asr #31
-; CHECK-FP16-NEXT: movmi r8, r3
-; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-FP16-NEXT: and r1, r0, r8
-; CHECK-FP16-NEXT: cmn r8, #1
+; CHECK-FP16-NEXT: mvn r8, #-2147483648
+; CHECK-FP16-NEXT: mov r9, #-2147483648
+; CHECK-FP16-NEXT: movmi r8, r11
+; CHECK-FP16-NEXT: orrs r2, r2, r3
+; CHECK-FP16-NEXT: moveq r8, r0
+; CHECK-FP16-NEXT: cmn r10, #1
; CHECK-FP16-NEXT: mov r0, #-2147483648
-; CHECK-FP16-NEXT: movgt r0, r5
-; CHECK-FP16-NEXT: cmp r5, #-2147483648
-; CHECK-FP16-NEXT: mov r10, #-2147483648
-; CHECK-FP16-NEXT: mov r9, r3
-; CHECK-FP16-NEXT: movhi r10, r5
+; CHECK-FP16-NEXT: mov r6, r3
+; CHECK-FP16-NEXT: movgt r0, r8
+; CHECK-FP16-NEXT: cmp r8, #-2147483648
+; CHECK-FP16-NEXT: movhi r9, r8
; CHECK-FP16-NEXT: cmn r1, #1
-; CHECK-FP16-NEXT: movne r10, r0
+; CHECK-FP16-NEXT: movne r9, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
+; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-FP16-NEXT: mvn r7, #-2147483648
-; CHECK-FP16-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; CHECK-FP16-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: cmn r1, #-2147483647
-; CHECK-FP16-NEXT: mvn r6, #0
-; CHECK-FP16-NEXT: movlo r6, r0
+; CHECK-FP16-NEXT: mvn r5, #0
+; CHECK-FP16-NEXT: movlo r5, r0
; CHECK-FP16-NEXT: mvn r4, #0
-; CHECK-FP16-NEXT: moveq r6, r0
+; CHECK-FP16-NEXT: moveq r5, r0
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movpl r0, r4
; CHECK-FP16-NEXT: orrs r12, r2, r3
-; CHECK-FP16-NEXT: moveq r0, r6
+; CHECK-FP16-NEXT: moveq r0, r5
; CHECK-FP16-NEXT: cmn r1, #-2147483647
-; CHECK-FP16-NEXT: mvn r6, #-2147483648
-; CHECK-FP16-NEXT: and r2, r2, r3, asr #31
-; CHECK-FP16-NEXT: movlo r6, r1
+; CHECK-FP16-NEXT: mvn r5, #-2147483648
+; CHECK-FP16-NEXT: movlo r5, r1
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movmi r7, r1
; CHECK-FP16-NEXT: cmp r12, #0
-; CHECK-FP16-NEXT: moveq r7, r6
+; CHECK-FP16-NEXT: moveq r7, r5
; CHECK-FP16-NEXT: cmp r7, #-2147483648
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-FP16-NEXT: movhi r1, r0
; CHECK-FP16-NEXT: mov r12, #0
-; CHECK-FP16-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-FP16-NEXT: movhi r12, r0
+; CHECK-FP16-NEXT: moveq r1, r0
+; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: mvn r6, #0
-; CHECK-FP16-NEXT: moveq r12, r0
-; CHECK-FP16-NEXT: cmp r9, #0
-; CHECK-FP16-NEXT: movmi r6, r1
+; CHECK-FP16-NEXT: movmi r6, r5
; CHECK-FP16-NEXT: cmn r11, #-2147483647
-; CHECK-FP16-NEXT: movlo r4, r1
-; CHECK-FP16-NEXT: moveq r4, r1
-; CHECK-FP16-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-FP16-NEXT: cmp r1, #0
-; CHECK-FP16-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-FP16-NEXT: movlo r4, r5
+; CHECK-FP16-NEXT: moveq r4, r5
+; CHECK-FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-FP16-NEXT: movne r4, r6
-; CHECK-FP16-NEXT: cmp r5, #-2147483648
+; CHECK-FP16-NEXT: cmp r8, #-2147483648
; CHECK-FP16-NEXT: mov r6, #0
-; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movhi r6, r4
; CHECK-FP16-NEXT: moveq r6, r4
-; CHECK-FP16-NEXT: cmn r8, #1
-; CHECK-FP16-NEXT: movle r4, r5
-; CHECK-FP16-NEXT: cmn r1, #1
+; CHECK-FP16-NEXT: cmn r10, #1
+; CHECK-FP16-NEXT: movle r4, r12
+; CHECK-FP16-NEXT: cmn r5, #1
; CHECK-FP16-NEXT: moveq r4, r6
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: vmov.32 d1[0], r4
; CHECK-FP16-NEXT: movmi r6, r3
; CHECK-FP16-NEXT: cmn r6, #1
-; CHECK-FP16-NEXT: and r2, r2, r6
-; CHECK-FP16-NEXT: movle r0, r5
+; CHECK-FP16-NEXT: movle r0, r12
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: andne r3, r2, r3, asr #31
+; CHECK-FP16-NEXT: and r2, r3, r6
; CHECK-FP16-NEXT: cmn r2, #1
-; CHECK-FP16-NEXT: mov r1, #-2147483648
-; CHECK-FP16-NEXT: moveq r0, r12
+; CHECK-FP16-NEXT: moveq r0, r1
; CHECK-FP16-NEXT: cmn r6, #1
+; CHECK-FP16-NEXT: mov r1, #-2147483648
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: movgt r1, r7
; CHECK-FP16-NEXT: cmp r7, #-2147483648
; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: vmov.32 d1[1], r9
; CHECK-FP16-NEXT: movls r7, r0
; CHECK-FP16-NEXT: cmn r2, #1
-; CHECK-FP16-NEXT: vmov.32 d1[1], r10
; CHECK-FP16-NEXT: movne r7, r1
; CHECK-FP16-NEXT: vmov.32 d0[1], r7
; CHECK-FP16-NEXT: add sp, sp, #16
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index a7d424190bbcd..7eb7e14353329 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -2972,47 +2972,50 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti at plt
-; RV32IF-NEXT: lw a1, 20(sp)
+; RV32IF-NEXT: lw a0, 20(sp)
; RV32IF-NEXT: lw t0, 8(sp)
; RV32IF-NEXT: lw a4, 12(sp)
-; RV32IF-NEXT: lw a0, 16(sp)
+; RV32IF-NEXT: lw a1, 16(sp)
; RV32IF-NEXT: lui a3, 524288
-; RV32IF-NEXT: addi a5, a3, -1
+; RV32IF-NEXT: addi a6, a3, -1
; RV32IF-NEXT: mv a2, t0
-; RV32IF-NEXT: beq a4, a5, .LBB45_2
+; RV32IF-NEXT: beq a4, a6, .LBB45_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: sltu a2, a4, a5
+; RV32IF-NEXT: sltu a2, a4, a6
; RV32IF-NEXT: addi a2, a2, -1
; RV32IF-NEXT: or a2, a2, t0
; RV32IF-NEXT: .LBB45_2: # %entry
-; RV32IF-NEXT: or a7, a0, a1
-; RV32IF-NEXT: slti a6, a1, 0
+; RV32IF-NEXT: or a7, a1, a0
+; RV32IF-NEXT: slti a5, a0, 0
; RV32IF-NEXT: bnez a7, .LBB45_16
; RV32IF-NEXT: # %bb.3: # %entry
; RV32IF-NEXT: mv t0, a4
-; RV32IF-NEXT: bgez a1, .LBB45_17
+; RV32IF-NEXT: bgez a0, .LBB45_17
; RV32IF-NEXT: .LBB45_4: # %entry
-; RV32IF-NEXT: bgeu a4, a5, .LBB45_18
+; RV32IF-NEXT: bgeu a4, a6, .LBB45_18
; RV32IF-NEXT: .LBB45_5: # %entry
; RV32IF-NEXT: beqz a7, .LBB45_7
; RV32IF-NEXT: .LBB45_6: # %entry
; RV32IF-NEXT: mv a4, t0
; RV32IF-NEXT: .LBB45_7: # %entry
-; RV32IF-NEXT: neg a5, a6
-; RV32IF-NEXT: and a5, a5, a1
-; RV32IF-NEXT: srai a1, a1, 31
-; RV32IF-NEXT: mv t0, a4
+; RV32IF-NEXT: srai a6, a0, 31
+; RV32IF-NEXT: and a1, a6, a1
+; RV32IF-NEXT: seqz a6, a0
+; RV32IF-NEXT: neg a5, a5
+; RV32IF-NEXT: and a5, a5, a0
+; RV32IF-NEXT: addi a6, a6, -1
+; RV32IF-NEXT: mv a0, a4
; RV32IF-NEXT: bgez a5, .LBB45_9
; RV32IF-NEXT: # %bb.8: # %entry
-; RV32IF-NEXT: lui t0, 524288
+; RV32IF-NEXT: lui a0, 524288
; RV32IF-NEXT: .LBB45_9: # %entry
-; RV32IF-NEXT: and a0, a1, a0
+; RV32IF-NEXT: and a6, a6, a1
; RV32IF-NEXT: mv a1, a4
; RV32IF-NEXT: bltu a3, a4, .LBB45_11
; RV32IF-NEXT: # %bb.10: # %entry
; RV32IF-NEXT: lui a1, 524288
; RV32IF-NEXT: .LBB45_11: # %entry
-; RV32IF-NEXT: and a6, a0, a5
+; RV32IF-NEXT: and a6, a6, a5
; RV32IF-NEXT: li a7, -1
; RV32IF-NEXT: bne a6, a7, .LBB45_19
; RV32IF-NEXT: # %bb.12: # %entry
@@ -3029,19 +3032,19 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB45_16: # %entry
-; RV32IF-NEXT: addi a2, a6, -1
+; RV32IF-NEXT: addi a2, a5, -1
; RV32IF-NEXT: or a2, a2, t0
; RV32IF-NEXT: mv t0, a4
-; RV32IF-NEXT: bltz a1, .LBB45_4
+; RV32IF-NEXT: bltz a0, .LBB45_4
; RV32IF-NEXT: .LBB45_17: # %entry
-; RV32IF-NEXT: mv t0, a5
-; RV32IF-NEXT: bltu a4, a5, .LBB45_5
+; RV32IF-NEXT: mv t0, a6
+; RV32IF-NEXT: bltu a4, a6, .LBB45_5
; RV32IF-NEXT: .LBB45_18: # %entry
-; RV32IF-NEXT: mv a4, a5
+; RV32IF-NEXT: mv a4, a6
; RV32IF-NEXT: bnez a7, .LBB45_6
; RV32IF-NEXT: j .LBB45_7
; RV32IF-NEXT: .LBB45_19: # %entry
-; RV32IF-NEXT: mv a1, t0
+; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: mv a0, a2
; RV32IF-NEXT: beq a4, a3, .LBB45_13
; RV32IF-NEXT: .LBB45_20: # %entry
@@ -3108,47 +3111,50 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti at plt
-; RV32IFD-NEXT: lw a1, 20(sp)
+; RV32IFD-NEXT: lw a0, 20(sp)
; RV32IFD-NEXT: lw t0, 8(sp)
; RV32IFD-NEXT: lw a4, 12(sp)
-; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: addi a5, a3, -1
+; RV32IFD-NEXT: addi a6, a3, -1
; RV32IFD-NEXT: mv a2, t0
-; RV32IFD-NEXT: beq a4, a5, .LBB45_2
+; RV32IFD-NEXT: beq a4, a6, .LBB45_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: sltu a2, a4, a5
+; RV32IFD-NEXT: sltu a2, a4, a6
; RV32IFD-NEXT: addi a2, a2, -1
; RV32IFD-NEXT: or a2, a2, t0
; RV32IFD-NEXT: .LBB45_2: # %entry
-; RV32IFD-NEXT: or a7, a0, a1
-; RV32IFD-NEXT: slti a6, a1, 0
+; RV32IFD-NEXT: or a7, a1, a0
+; RV32IFD-NEXT: slti a5, a0, 0
; RV32IFD-NEXT: bnez a7, .LBB45_16
; RV32IFD-NEXT: # %bb.3: # %entry
; RV32IFD-NEXT: mv t0, a4
-; RV32IFD-NEXT: bgez a1, .LBB45_17
+; RV32IFD-NEXT: bgez a0, .LBB45_17
; RV32IFD-NEXT: .LBB45_4: # %entry
-; RV32IFD-NEXT: bgeu a4, a5, .LBB45_18
+; RV32IFD-NEXT: bgeu a4, a6, .LBB45_18
; RV32IFD-NEXT: .LBB45_5: # %entry
; RV32IFD-NEXT: beqz a7, .LBB45_7
; RV32IFD-NEXT: .LBB45_6: # %entry
; RV32IFD-NEXT: mv a4, t0
; RV32IFD-NEXT: .LBB45_7: # %entry
-; RV32IFD-NEXT: neg a5, a6
-; RV32IFD-NEXT: and a5, a5, a1
-; RV32IFD-NEXT: srai a1, a1, 31
-; RV32IFD-NEXT: mv t0, a4
+; RV32IFD-NEXT: srai a6, a0, 31
+; RV32IFD-NEXT: and a1, a6, a1
+; RV32IFD-NEXT: seqz a6, a0
+; RV32IFD-NEXT: neg a5, a5
+; RV32IFD-NEXT: and a5, a5, a0
+; RV32IFD-NEXT: addi a6, a6, -1
+; RV32IFD-NEXT: mv a0, a4
; RV32IFD-NEXT: bgez a5, .LBB45_9
; RV32IFD-NEXT: # %bb.8: # %entry
-; RV32IFD-NEXT: lui t0, 524288
+; RV32IFD-NEXT: lui a0, 524288
; RV32IFD-NEXT: .LBB45_9: # %entry
-; RV32IFD-NEXT: and a0, a1, a0
+; RV32IFD-NEXT: and a6, a6, a1
; RV32IFD-NEXT: mv a1, a4
; RV32IFD-NEXT: bltu a3, a4, .LBB45_11
; RV32IFD-NEXT: # %bb.10: # %entry
; RV32IFD-NEXT: lui a1, 524288
; RV32IFD-NEXT: .LBB45_11: # %entry
-; RV32IFD-NEXT: and a6, a0, a5
+; RV32IFD-NEXT: and a6, a6, a5
; RV32IFD-NEXT: li a7, -1
; RV32IFD-NEXT: bne a6, a7, .LBB45_19
; RV32IFD-NEXT: # %bb.12: # %entry
@@ -3165,19 +3171,19 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB45_16: # %entry
-; RV32IFD-NEXT: addi a2, a6, -1
+; RV32IFD-NEXT: addi a2, a5, -1
; RV32IFD-NEXT: or a2, a2, t0
; RV32IFD-NEXT: mv t0, a4
-; RV32IFD-NEXT: bltz a1, .LBB45_4
+; RV32IFD-NEXT: bltz a0, .LBB45_4
; RV32IFD-NEXT: .LBB45_17: # %entry
-; RV32IFD-NEXT: mv t0, a5
-; RV32IFD-NEXT: bltu a4, a5, .LBB45_5
+; RV32IFD-NEXT: mv t0, a6
+; RV32IFD-NEXT: bltu a4, a6, .LBB45_5
; RV32IFD-NEXT: .LBB45_18: # %entry
-; RV32IFD-NEXT: mv a4, a5
+; RV32IFD-NEXT: mv a4, a6
; RV32IFD-NEXT: bnez a7, .LBB45_6
; RV32IFD-NEXT: j .LBB45_7
; RV32IFD-NEXT: .LBB45_19: # %entry
-; RV32IFD-NEXT: mv a1, t0
+; RV32IFD-NEXT: mv a1, a0
; RV32IFD-NEXT: mv a0, a2
; RV32IFD-NEXT: beq a4, a3, .LBB45_13
; RV32IFD-NEXT: .LBB45_20: # %entry
@@ -3240,7 +3246,11 @@ define i64 @utest_f64i64_mm(double %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixunsdfti at plt
-; RV64-NEXT: snez a1, a1
+; RV64-NEXT: snez a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -3364,8 +3374,12 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB47_2: # %entry
-; RV64-NEXT: slti a1, a1, 1
-; RV64-NEXT: neg a1, a1
+; RV64-NEXT: slti a3, a1, 1
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: beqz a2, .LBB47_4
; RV64-NEXT: # %bb.3: # %entry
@@ -3462,47 +3476,50 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti at plt
-; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
; RV32-NEXT: lw t0, 8(sp)
; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a1, 16(sp)
; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a5, a3, -1
+; RV32-NEXT: addi a6, a3, -1
; RV32-NEXT: mv a2, t0
-; RV32-NEXT: beq a4, a5, .LBB48_2
+; RV32-NEXT: beq a4, a6, .LBB48_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: sltu a2, a4, a5
+; RV32-NEXT: sltu a2, a4, a6
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: or a2, a2, t0
; RV32-NEXT: .LBB48_2: # %entry
-; RV32-NEXT: or a7, a0, a1
-; RV32-NEXT: slti a6, a1, 0
+; RV32-NEXT: or a7, a1, a0
+; RV32-NEXT: slti a5, a0, 0
; RV32-NEXT: bnez a7, .LBB48_16
; RV32-NEXT: # %bb.3: # %entry
; RV32-NEXT: mv t0, a4
-; RV32-NEXT: bgez a1, .LBB48_17
+; RV32-NEXT: bgez a0, .LBB48_17
; RV32-NEXT: .LBB48_4: # %entry
-; RV32-NEXT: bgeu a4, a5, .LBB48_18
+; RV32-NEXT: bgeu a4, a6, .LBB48_18
; RV32-NEXT: .LBB48_5: # %entry
; RV32-NEXT: beqz a7, .LBB48_7
; RV32-NEXT: .LBB48_6: # %entry
; RV32-NEXT: mv a4, t0
; RV32-NEXT: .LBB48_7: # %entry
-; RV32-NEXT: neg a5, a6
-; RV32-NEXT: and a5, a5, a1
-; RV32-NEXT: srai a1, a1, 31
-; RV32-NEXT: mv t0, a4
+; RV32-NEXT: srai a6, a0, 31
+; RV32-NEXT: and a1, a6, a1
+; RV32-NEXT: seqz a6, a0
+; RV32-NEXT: neg a5, a5
+; RV32-NEXT: and a5, a5, a0
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: mv a0, a4
; RV32-NEXT: bgez a5, .LBB48_9
; RV32-NEXT: # %bb.8: # %entry
-; RV32-NEXT: lui t0, 524288
+; RV32-NEXT: lui a0, 524288
; RV32-NEXT: .LBB48_9: # %entry
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a6, a6, a1
; RV32-NEXT: mv a1, a4
; RV32-NEXT: bltu a3, a4, .LBB48_11
; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB48_11: # %entry
-; RV32-NEXT: and a6, a0, a5
+; RV32-NEXT: and a6, a6, a5
; RV32-NEXT: li a7, -1
; RV32-NEXT: bne a6, a7, .LBB48_19
; RV32-NEXT: # %bb.12: # %entry
@@ -3519,19 +3536,19 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV32-NEXT: .LBB48_16: # %entry
-; RV32-NEXT: addi a2, a6, -1
+; RV32-NEXT: addi a2, a5, -1
; RV32-NEXT: or a2, a2, t0
; RV32-NEXT: mv t0, a4
-; RV32-NEXT: bltz a1, .LBB48_4
+; RV32-NEXT: bltz a0, .LBB48_4
; RV32-NEXT: .LBB48_17: # %entry
-; RV32-NEXT: mv t0, a5
-; RV32-NEXT: bltu a4, a5, .LBB48_5
+; RV32-NEXT: mv t0, a6
+; RV32-NEXT: bltu a4, a6, .LBB48_5
; RV32-NEXT: .LBB48_18: # %entry
-; RV32-NEXT: mv a4, a5
+; RV32-NEXT: mv a4, a6
; RV32-NEXT: bnez a7, .LBB48_6
; RV32-NEXT: j .LBB48_7
; RV32-NEXT: .LBB48_19: # %entry
-; RV32-NEXT: mv a1, t0
+; RV32-NEXT: mv a1, a0
; RV32-NEXT: mv a0, a2
; RV32-NEXT: beq a4, a3, .LBB48_13
; RV32-NEXT: .LBB48_20: # %entry
@@ -3592,7 +3609,11 @@ define i64 @utest_f32i64_mm(float %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call __fixunssfti at plt
-; RV64-NEXT: snez a1, a1
+; RV64-NEXT: snez a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -3687,8 +3708,12 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB50_2: # %entry
-; RV64-NEXT: slti a1, a1, 1
-; RV64-NEXT: neg a1, a1
+; RV64-NEXT: slti a3, a1, 1
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: beqz a2, .LBB50_4
; RV64-NEXT: # %bb.3: # %entry
@@ -3718,47 +3743,50 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti at plt
-; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
; RV32-NEXT: lw t0, 8(sp)
; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a1, 16(sp)
; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a5, a3, -1
+; RV32-NEXT: addi a6, a3, -1
; RV32-NEXT: mv a2, t0
-; RV32-NEXT: beq a4, a5, .LBB51_2
+; RV32-NEXT: beq a4, a6, .LBB51_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: sltu a2, a4, a5
+; RV32-NEXT: sltu a2, a4, a6
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: or a2, a2, t0
; RV32-NEXT: .LBB51_2: # %entry
-; RV32-NEXT: or a7, a0, a1
-; RV32-NEXT: slti a6, a1, 0
+; RV32-NEXT: or a7, a1, a0
+; RV32-NEXT: slti a5, a0, 0
; RV32-NEXT: bnez a7, .LBB51_16
; RV32-NEXT: # %bb.3: # %entry
; RV32-NEXT: mv t0, a4
-; RV32-NEXT: bgez a1, .LBB51_17
+; RV32-NEXT: bgez a0, .LBB51_17
; RV32-NEXT: .LBB51_4: # %entry
-; RV32-NEXT: bgeu a4, a5, .LBB51_18
+; RV32-NEXT: bgeu a4, a6, .LBB51_18
; RV32-NEXT: .LBB51_5: # %entry
; RV32-NEXT: beqz a7, .LBB51_7
; RV32-NEXT: .LBB51_6: # %entry
; RV32-NEXT: mv a4, t0
; RV32-NEXT: .LBB51_7: # %entry
-; RV32-NEXT: neg a5, a6
-; RV32-NEXT: and a5, a5, a1
-; RV32-NEXT: srai a1, a1, 31
-; RV32-NEXT: mv t0, a4
+; RV32-NEXT: srai a6, a0, 31
+; RV32-NEXT: and a1, a6, a1
+; RV32-NEXT: seqz a6, a0
+; RV32-NEXT: neg a5, a5
+; RV32-NEXT: and a5, a5, a0
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: mv a0, a4
; RV32-NEXT: bgez a5, .LBB51_9
; RV32-NEXT: # %bb.8: # %entry
-; RV32-NEXT: lui t0, 524288
+; RV32-NEXT: lui a0, 524288
; RV32-NEXT: .LBB51_9: # %entry
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a6, a6, a1
; RV32-NEXT: mv a1, a4
; RV32-NEXT: bltu a3, a4, .LBB51_11
; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB51_11: # %entry
-; RV32-NEXT: and a6, a0, a5
+; RV32-NEXT: and a6, a6, a5
; RV32-NEXT: li a7, -1
; RV32-NEXT: bne a6, a7, .LBB51_19
; RV32-NEXT: # %bb.12: # %entry
@@ -3775,19 +3803,19 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV32-NEXT: .LBB51_16: # %entry
-; RV32-NEXT: addi a2, a6, -1
+; RV32-NEXT: addi a2, a5, -1
; RV32-NEXT: or a2, a2, t0
; RV32-NEXT: mv t0, a4
-; RV32-NEXT: bltz a1, .LBB51_4
+; RV32-NEXT: bltz a0, .LBB51_4
; RV32-NEXT: .LBB51_17: # %entry
-; RV32-NEXT: mv t0, a5
-; RV32-NEXT: bltu a4, a5, .LBB51_5
+; RV32-NEXT: mv t0, a6
+; RV32-NEXT: bltu a4, a6, .LBB51_5
; RV32-NEXT: .LBB51_18: # %entry
-; RV32-NEXT: mv a4, a5
+; RV32-NEXT: mv a4, a6
; RV32-NEXT: bnez a7, .LBB51_6
; RV32-NEXT: j .LBB51_7
; RV32-NEXT: .LBB51_19: # %entry
-; RV32-NEXT: mv a1, t0
+; RV32-NEXT: mv a1, a0
; RV32-NEXT: mv a0, a2
; RV32-NEXT: beq a4, a3, .LBB51_13
; RV32-NEXT: .LBB51_20: # %entry
@@ -3894,7 +3922,11 @@ define i64 @utesth_f16i64_mm(half %x) {
; RV64-NEXT: fmv.x.w a0, fa0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixunssfti at plt
-; RV64-NEXT: snez a1, a1
+; RV64-NEXT: snez a2, a1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -3993,8 +4025,12 @@ define i64 @ustest_f16i64_mm(half %x) {
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: li a2, 1
; RV64-NEXT: .LBB53_2: # %entry
-; RV64-NEXT: slti a1, a1, 1
-; RV64-NEXT: neg a1, a1
+; RV64-NEXT: slti a3, a1, 1
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: seqz a1, a1
+; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: beqz a2, .LBB53_4
; RV64-NEXT: # %bb.3: # %entry
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
index de9a54da50821..77faf67002eac 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
@@ -5572,12 +5572,20 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixunsdfti at plt
-; CHECK-NOV-NEXT: snez a1, a1
+; CHECK-NOV-NEXT: snez a2, a1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a0, a1, a0
; CHECK-NOV-NEXT: snez a1, s1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: addi s1, s1, -1
+; CHECK-NOV-NEXT: seqz a2, s1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -5615,7 +5623,15 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: snez a2, s1
; CHECK-V-NEXT: addi a2, a2, -1
; CHECK-V-NEXT: and a2, a2, s0
-; CHECK-V-NEXT: snez a1, a1
+; CHECK-V-NEXT: addi s1, s1, -1
+; CHECK-V-NEXT: seqz a3, s1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a2, a3, a2
+; CHECK-V-NEXT: snez a3, a1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
@@ -5661,35 +5677,42 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixdfti at plt
-; CHECK-NOV-NEXT: mv a3, a1
+; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: blez a1, .LBB47_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: li a3, 1
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB47_2: # %entry
; CHECK-NOV-NEXT: mv a4, s1
; CHECK-NOV-NEXT: blez s1, .LBB47_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: li a4, 1
; CHECK-NOV-NEXT: .LBB47_4: # %entry
-; CHECK-NOV-NEXT: slti a1, a1, 1
-; CHECK-NOV-NEXT: neg a1, a1
-; CHECK-NOV-NEXT: slti a2, s1, 1
-; CHECK-NOV-NEXT: neg a2, a2
-; CHECK-NOV-NEXT: and a2, a2, s0
+; CHECK-NOV-NEXT: slti a3, a1, 1
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: slti a0, s1, 1
+; CHECK-NOV-NEXT: neg a0, a0
+; CHECK-NOV-NEXT: and a0, a0, s0
+; CHECK-NOV-NEXT: addi s1, s1, -1
+; CHECK-NOV-NEXT: seqz a5, s1
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a0, a5, a0
; CHECK-NOV-NEXT: beqz a4, .LBB47_6
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a4, a4
; CHECK-NOV-NEXT: neg a4, a4
-; CHECK-NOV-NEXT: and a2, a4, a2
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: .LBB47_6: # %entry
-; CHECK-NOV-NEXT: and a1, a1, a0
-; CHECK-NOV-NEXT: beqz a3, .LBB47_8
+; CHECK-NOV-NEXT: and a1, a1, a3
+; CHECK-NOV-NEXT: beqz a2, .LBB47_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: sgtz a0, a3
-; CHECK-NOV-NEXT: neg a0, a0
-; CHECK-NOV-NEXT: and a1, a0, a1
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: .LBB47_8: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -5730,29 +5753,37 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB47_2: # %entry
; CHECK-V-NEXT: slti a3, s0, 1
-; CHECK-V-NEXT: neg a4, a3
-; CHECK-V-NEXT: slti a1, a1, 1
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a3, a3, s1
+; CHECK-V-NEXT: addi a4, s0, -1
+; CHECK-V-NEXT: seqz a4, a4
+; CHECK-V-NEXT: addi a4, a4, -1
+; CHECK-V-NEXT: slti a5, a1, 1
+; CHECK-V-NEXT: neg a5, a5
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a6, a1
; CHECK-V-NEXT: blez s0, .LBB47_4
; CHECK-V-NEXT: # %bb.3: # %entry
; CHECK-V-NEXT: li s0, 1
; CHECK-V-NEXT: .LBB47_4: # %entry
-; CHECK-V-NEXT: neg a3, a1
-; CHECK-V-NEXT: and a1, a4, s1
+; CHECK-V-NEXT: and a1, a5, a0
+; CHECK-V-NEXT: addi a5, a6, -1
+; CHECK-V-NEXT: and a0, a4, a3
; CHECK-V-NEXT: beqz s0, .LBB47_6
; CHECK-V-NEXT: # %bb.5: # %entry
-; CHECK-V-NEXT: sgtz a4, s0
-; CHECK-V-NEXT: neg a4, a4
-; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: .LBB47_6: # %entry
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: .LBB47_6: # %entry
+; CHECK-V-NEXT: and a1, a5, a1
; CHECK-V-NEXT: beqz a2, .LBB47_8
; CHECK-V-NEXT: # %bb.7: # %entry
; CHECK-V-NEXT: sgtz a2, a2
; CHECK-V-NEXT: neg a2, a2
-; CHECK-V-NEXT: and a0, a2, a0
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB47_8: # %entry
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a1, 32(sp)
+; CHECK-V-NEXT: sd a1, 24(sp)
+; CHECK-V-NEXT: sd a0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
@@ -6031,12 +6062,20 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixunssfti at plt
-; CHECK-NOV-NEXT: snez a1, a1
+; CHECK-NOV-NEXT: snez a2, a1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a0, a1, a0
; CHECK-NOV-NEXT: snez a1, s1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a1, a1, s0
+; CHECK-NOV-NEXT: addi s1, s1, -1
+; CHECK-NOV-NEXT: seqz a2, s1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -6074,7 +6113,15 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: snez a2, s1
; CHECK-V-NEXT: addi a2, a2, -1
; CHECK-V-NEXT: and a2, a2, s0
-; CHECK-V-NEXT: snez a1, a1
+; CHECK-V-NEXT: addi s1, s1, -1
+; CHECK-V-NEXT: seqz a3, s1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a2, a3, a2
+; CHECK-V-NEXT: snez a3, a1
+; CHECK-V-NEXT: addi a3, a3, -1
+; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
@@ -6120,35 +6167,42 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixsfti at plt
-; CHECK-NOV-NEXT: mv a3, a1
+; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: blez a1, .LBB50_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: li a3, 1
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB50_2: # %entry
; CHECK-NOV-NEXT: mv a4, s1
; CHECK-NOV-NEXT: blez s1, .LBB50_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: li a4, 1
; CHECK-NOV-NEXT: .LBB50_4: # %entry
-; CHECK-NOV-NEXT: slti a1, a1, 1
-; CHECK-NOV-NEXT: neg a1, a1
-; CHECK-NOV-NEXT: slti a2, s1, 1
-; CHECK-NOV-NEXT: neg a2, a2
-; CHECK-NOV-NEXT: and a2, a2, s0
+; CHECK-NOV-NEXT: slti a3, a1, 1
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: slti a0, s1, 1
+; CHECK-NOV-NEXT: neg a0, a0
+; CHECK-NOV-NEXT: and a0, a0, s0
+; CHECK-NOV-NEXT: addi s1, s1, -1
+; CHECK-NOV-NEXT: seqz a5, s1
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a0, a5, a0
; CHECK-NOV-NEXT: beqz a4, .LBB50_6
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a4, a4
; CHECK-NOV-NEXT: neg a4, a4
-; CHECK-NOV-NEXT: and a2, a4, a2
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: .LBB50_6: # %entry
-; CHECK-NOV-NEXT: and a1, a1, a0
-; CHECK-NOV-NEXT: beqz a3, .LBB50_8
+; CHECK-NOV-NEXT: and a1, a1, a3
+; CHECK-NOV-NEXT: beqz a2, .LBB50_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: sgtz a0, a3
-; CHECK-NOV-NEXT: neg a0, a0
-; CHECK-NOV-NEXT: and a1, a0, a1
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: .LBB50_8: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -6189,29 +6243,37 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB50_2: # %entry
; CHECK-V-NEXT: slti a3, s0, 1
-; CHECK-V-NEXT: neg a4, a3
-; CHECK-V-NEXT: slti a1, a1, 1
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a3, a3, s1
+; CHECK-V-NEXT: addi a4, s0, -1
+; CHECK-V-NEXT: seqz a4, a4
+; CHECK-V-NEXT: addi a4, a4, -1
+; CHECK-V-NEXT: slti a5, a1, 1
+; CHECK-V-NEXT: neg a5, a5
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a6, a1
; CHECK-V-NEXT: blez s0, .LBB50_4
; CHECK-V-NEXT: # %bb.3: # %entry
; CHECK-V-NEXT: li s0, 1
; CHECK-V-NEXT: .LBB50_4: # %entry
-; CHECK-V-NEXT: neg a3, a1
-; CHECK-V-NEXT: and a1, a4, s1
+; CHECK-V-NEXT: and a1, a5, a0
+; CHECK-V-NEXT: addi a5, a6, -1
+; CHECK-V-NEXT: and a0, a4, a3
; CHECK-V-NEXT: beqz s0, .LBB50_6
; CHECK-V-NEXT: # %bb.5: # %entry
-; CHECK-V-NEXT: sgtz a4, s0
-; CHECK-V-NEXT: neg a4, a4
-; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: .LBB50_6: # %entry
+; CHECK-V-NEXT: sgtz a3, s0
+; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: .LBB50_6: # %entry
+; CHECK-V-NEXT: and a1, a5, a1
; CHECK-V-NEXT: beqz a2, .LBB50_8
; CHECK-V-NEXT: # %bb.7: # %entry
; CHECK-V-NEXT: sgtz a2, a2
; CHECK-V-NEXT: neg a2, a2
-; CHECK-V-NEXT: and a0, a2, a0
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB50_8: # %entry
-; CHECK-V-NEXT: sd a0, 24(sp)
-; CHECK-V-NEXT: sd a1, 32(sp)
+; CHECK-V-NEXT: sd a1, 24(sp)
+; CHECK-V-NEXT: sd a0, 32(sp)
; CHECK-V-NEXT: addi a0, sp, 24
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
@@ -6485,12 +6547,20 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixunssfti at plt
-; CHECK-NOV-NEXT: snez a1, a1
+; CHECK-NOV-NEXT: snez a2, a1
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a0, a1, a0
; CHECK-NOV-NEXT: snez a1, s2
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a1, a1, s1
+; CHECK-NOV-NEXT: addi s2, s2, -1
+; CHECK-NOV-NEXT: seqz a2, s2
+; CHECK-NOV-NEXT: addi a2, a2, -1
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -6519,12 +6589,20 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: mv a0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixunssfti at plt
-; CHECK-V-NEXT: snez a1, a1
+; CHECK-V-NEXT: snez a2, a1
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a0, a2, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: snez a1, s2
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a1, a1, s1
+; CHECK-V-NEXT: addi s2, s2, -1
+; CHECK-V-NEXT: seqz a2, s2
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: sd a1, 8(sp)
; CHECK-V-NEXT: sd a0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
@@ -6568,35 +6646,42 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: mv a0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
-; CHECK-NOV-NEXT: mv a3, a1
+; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: blez a1, .LBB53_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: li a3, 1
+; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB53_2: # %entry
; CHECK-NOV-NEXT: mv a4, s1
; CHECK-NOV-NEXT: blez s1, .LBB53_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: li a4, 1
; CHECK-NOV-NEXT: .LBB53_4: # %entry
-; CHECK-NOV-NEXT: slti a1, a1, 1
-; CHECK-NOV-NEXT: neg a1, a1
-; CHECK-NOV-NEXT: slti a2, s1, 1
-; CHECK-NOV-NEXT: neg a2, a2
-; CHECK-NOV-NEXT: and a2, a2, s0
+; CHECK-NOV-NEXT: slti a3, a1, 1
+; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: addi a1, a1, -1
+; CHECK-NOV-NEXT: slti a0, s1, 1
+; CHECK-NOV-NEXT: neg a0, a0
+; CHECK-NOV-NEXT: and a0, a0, s0
+; CHECK-NOV-NEXT: addi s1, s1, -1
+; CHECK-NOV-NEXT: seqz a5, s1
+; CHECK-NOV-NEXT: addi a5, a5, -1
+; CHECK-NOV-NEXT: and a0, a5, a0
; CHECK-NOV-NEXT: beqz a4, .LBB53_6
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a4, a4
; CHECK-NOV-NEXT: neg a4, a4
-; CHECK-NOV-NEXT: and a2, a4, a2
+; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: .LBB53_6: # %entry
-; CHECK-NOV-NEXT: and a1, a1, a0
-; CHECK-NOV-NEXT: beqz a3, .LBB53_8
+; CHECK-NOV-NEXT: and a1, a1, a3
+; CHECK-NOV-NEXT: beqz a2, .LBB53_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: sgtz a0, a3
-; CHECK-NOV-NEXT: neg a0, a0
-; CHECK-NOV-NEXT: and a1, a0, a1
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a1, a2, a1
; CHECK-NOV-NEXT: .LBB53_8: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -6634,26 +6719,34 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: # %bb.3: # %entry
; CHECK-V-NEXT: li a4, 1
; CHECK-V-NEXT: .LBB53_4: # %entry
-; CHECK-V-NEXT: slti a1, a1, 1
-; CHECK-V-NEXT: neg a3, a1
-; CHECK-V-NEXT: slti a1, s1, 1
-; CHECK-V-NEXT: neg a1, a1
-; CHECK-V-NEXT: and a1, a1, s0
+; CHECK-V-NEXT: slti a3, a1, 1
+; CHECK-V-NEXT: neg a3, a3
+; CHECK-V-NEXT: and a3, a3, a0
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: addi a1, a1, -1
+; CHECK-V-NEXT: slti a0, s1, 1
+; CHECK-V-NEXT: neg a0, a0
+; CHECK-V-NEXT: and a0, a0, s0
+; CHECK-V-NEXT: addi s1, s1, -1
+; CHECK-V-NEXT: seqz a5, s1
+; CHECK-V-NEXT: addi a5, a5, -1
+; CHECK-V-NEXT: and a0, a5, a0
; CHECK-V-NEXT: beqz a4, .LBB53_6
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a4, a4
; CHECK-V-NEXT: neg a4, a4
-; CHECK-V-NEXT: and a1, a4, a1
+; CHECK-V-NEXT: and a0, a4, a0
; CHECK-V-NEXT: .LBB53_6: # %entry
-; CHECK-V-NEXT: and a0, a3, a0
+; CHECK-V-NEXT: and a1, a1, a3
; CHECK-V-NEXT: beqz a2, .LBB53_8
; CHECK-V-NEXT: # %bb.7: # %entry
; CHECK-V-NEXT: sgtz a2, a2
; CHECK-V-NEXT: neg a2, a2
-; CHECK-V-NEXT: and a0, a2, a0
+; CHECK-V-NEXT: and a1, a2, a1
; CHECK-V-NEXT: .LBB53_8: # %entry
-; CHECK-V-NEXT: sd a0, 8(sp)
-; CHECK-V-NEXT: sd a1, 0(sp)
+; CHECK-V-NEXT: sd a1, 8(sp)
+; CHECK-V-NEXT: sd a0, 0(sp)
; CHECK-V-NEXT: addi a0, sp, 8
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v9, (a0)
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index d6376d3c884b1..d3297d2a18089 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -1817,94 +1817,100 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mvn r11, #-2147483648
-; CHECK-NEXT: cmp r1, r11
; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: csel r0, r1, r11, lo
+; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: csel r1, r3, r0, mi
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: it ne
+; CHECK-NEXT: andne.w r0, r2, r0, asr #31
+; CHECK-NEXT: mvn r11, #-2147483648
+; CHECK-NEXT: ands r0, r1
+; CHECK-NEXT: cmp r6, r11
; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: csel r1, r1, r11, mi
-; CHECK-NEXT: orrs r3, r2
-; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: csel r4, r0, r1, eq
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: add.w r3, r0, #1
+; CHECK-NEXT: csel r0, r6, r11, lo
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: mov.w r1, #-2147483648
-; CHECK-NEXT: csel r8, r5, r0, mi
-; CHECK-NEXT: and.w r0, r2, r5, asr #31
-; CHECK-NEXT: and.w r0, r0, r8
-; CHECK-NEXT: cmp.w r8, #-1
-; CHECK-NEXT: add.w r2, r0, #1
-; CHECK-NEXT: csel r0, r4, r1, gt
-; CHECK-NEXT: cmp.w r4, #-2147483648
+; CHECK-NEXT: csel r7, r6, r11, mi
+; CHECK-NEXT: orrs r2, r5
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: csel r1, r4, r1, hi
-; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r8, r0, r7, eq
+; CHECK-NEXT: mov.w r2, #-2147483648
+; CHECK-NEXT: cmp.w r1, #-1
+; CHECK-NEXT: csel r0, r8, r2, gt
+; CHECK-NEXT: cmp.w r8, #-2147483648
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: csel r1, r8, r2, hi
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: csel r0, r1, r0, eq
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: cmp r1, r11
-; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: mov lr, r0
; CHECK-NEXT: csel r7, r1, r11, lo
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: csel r6, r1, r11, mi
-; CHECK-NEXT: orrs.w r0, r2, r3
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: csel r6, r7, r6, eq
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: csel r7, r3, r0, mi
-; CHECK-NEXT: and.w r2, r2, r3, asr #31
-; CHECK-NEXT: cmp.w r7, #-1
-; CHECK-NEXT: mov.w r0, #-2147483648
-; CHECK-NEXT: csel lr, r6, r0, gt
-; CHECK-NEXT: cmp.w r6, #-2147483648
-; CHECK-NEXT: and.w r2, r2, r7
-; CHECK-NEXT: csel r0, r6, r0, hi
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: csel r4, r1, r11, mi
+; CHECK-NEXT: orrs r3, r2
+; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: csel r7, r7, r4, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r4, r0, r9, mi
+; CHECK-NEXT: mov.w r3, #-2147483648
+; CHECK-NEXT: cmp.w r4, #-1
+; CHECK-NEXT: csel r9, r7, r3, gt
+; CHECK-NEXT: cmp.w r7, #-2147483648
+; CHECK-NEXT: csel r12, r7, r3, hi
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: andne.w r3, r2, r3, asr #31
+; CHECK-NEXT: and.w r2, r3, r4
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: adds r2, #1
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: csel r0, r0, lr, eq
-; CHECK-NEXT: mov.w lr, #-1
-; CHECK-NEXT: cmp r9, r11
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: csel r2, r10, lr, lo
-; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: csel r2, r10, r2, eq
+; CHECK-NEXT: csel r12, r12, r9, eq
+; CHECK-NEXT: cmp r6, r11
+; CHECK-NEXT: csel r6, r10, r3, lo
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: csel r6, r10, r6, eq
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r5, r10, lr, mi
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csel r2, r2, r5, eq
-; CHECK-NEXT: cmp.w r4, #-2147483648
-; CHECK-NEXT: mov.w r5, #0
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: csel r4, r2, r5, hi
-; CHECK-NEXT: csel r4, r2, r4, eq
-; CHECK-NEXT: cmp.w r8, #-1
-; CHECK-NEXT: csel r2, r2, r5, gt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csel r2, r4, r2, eq
+; CHECK-NEXT: csel r5, r10, r3, mi
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r5, r6, r5, eq
+; CHECK-NEXT: cmp.w r8, #-2147483648
+; CHECK-NEXT: mov.w r8, #0
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: csel r6, r5, r8, hi
+; CHECK-NEXT: csel r6, r5, r6, eq
+; CHECK-NEXT: cmp.w r2, #-1
+; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csel r5, r5, r8, gt
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: csel r5, r6, r5, eq
; CHECK-NEXT: cmp r1, r11
-; CHECK-NEXT: csel r1, r12, lr, lo
-; CHECK-NEXT: csel r1, r12, r1, eq
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: csel r0, r12, lr, mi
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT: csel r1, lr, r3, lo
+; CHECK-NEXT: csel r1, lr, r1, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, lr, r3, mi
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r1, r0, eq
-; CHECK-NEXT: cmp.w r6, #-2147483648
-; CHECK-NEXT: csel r1, r0, r5, hi
+; CHECK-NEXT: cmp.w r7, #-2147483648
+; CHECK-NEXT: csel r1, r0, r8, hi
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: csel r1, r0, r1, eq
-; CHECK-NEXT: cmp.w r7, #-1
-; CHECK-NEXT: csel r0, r0, r5, gt
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: cmp.w r4, #-1
+; CHECK-NEXT: csel r0, r0, r8, gt
+; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r1, r0, eq
-; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT: vmov q0[3], q0[1], r12, r0
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -2068,96 +2074,105 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #20
-; CHECK-NEXT: sub sp, #20
-; CHECK-NEXT: vmov r7, r0, d0
+; CHECK-NEXT: .pad #28
+; CHECK-NEXT: sub sp, #28
+; CHECK-NEXT: vmov r4, r0, d0
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: mvn r9, #-2147483648
-; CHECK-NEXT: cmp r1, r9
; CHECK-NEXT: mov r11, r0
-; CHECK-NEXT: csel r0, r1, r9, lo
+; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: csel r1, r3, r0, mi
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: it ne
+; CHECK-NEXT: andne.w r0, r2, r0, asr #31
+; CHECK-NEXT: mvn r10, #-2147483648
+; CHECK-NEXT: ands r0, r1
+; CHECK-NEXT: cmp r6, r10
; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: csel r1, r1, r9, mi
-; CHECK-NEXT: orrs r3, r2
-; CHECK-NEXT: mov.w r8, #-2147483648
-; CHECK-NEXT: csel r4, r0, r1, eq
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: add.w r3, r0, #1
+; CHECK-NEXT: csel r0, r6, r10, lo
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: csel r10, r5, r0, mi
-; CHECK-NEXT: and.w r0, r2, r5, asr #31
-; CHECK-NEXT: and.w r0, r0, r10
-; CHECK-NEXT: cmp.w r10, #-1
-; CHECK-NEXT: add.w r2, r0, #1
-; CHECK-NEXT: csel r0, r4, r8, gt
-; CHECK-NEXT: cmp.w r4, #-2147483648
-; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: csel r1, r4, r8, hi
-; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r7, r6, r10, mi
+; CHECK-NEXT: orrs r2, r5
+; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: csel r8, r0, r7, eq
+; CHECK-NEXT: mov.w r2, #-2147483648
+; CHECK-NEXT: cmp.w r1, #-1
+; CHECK-NEXT: csel r0, r8, r2, gt
+; CHECK-NEXT: cmp.w r8, #-2147483648
+; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: csel r1, r8, r2, hi
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: csel r0, r1, r0, eq
-; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: cmp r1, r9
+; CHECK-NEXT: cmp r1, r10
; CHECK-NEXT: mov lr, r0
-; CHECK-NEXT: csel r12, r1, r9, lo
+; CHECK-NEXT: csel r7, r1, r10, lo
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: csel r7, r1, r9, mi
-; CHECK-NEXT: orrs.w r0, r2, r3
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: csel r7, r12, r7, eq
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: csel r12, r3, r0, mi
-; CHECK-NEXT: and.w r2, r2, r3, asr #31
-; CHECK-NEXT: cmp.w r12, #-1
-; CHECK-NEXT: and.w r2, r2, r12
-; CHECK-NEXT: csel r0, r7, r8, gt
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: csel r4, r1, r10, mi
+; CHECK-NEXT: orrs.w r3, r2, r0
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: csel r7, r7, r4, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r4, r0, r9, mi
+; CHECK-NEXT: mov.w r3, #-2147483648
+; CHECK-NEXT: cmp.w r4, #-1
+; CHECK-NEXT: csel r9, r7, r3, gt
; CHECK-NEXT: cmp.w r7, #-2147483648
-; CHECK-NEXT: csel r8, r7, r8, hi
+; CHECK-NEXT: csel r12, r7, r3, hi
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: andne.w r3, r2, r3, asr #31
+; CHECK-NEXT: and.w r2, r3, r4
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: csel r8, r8, r0, eq
-; CHECK-NEXT: mov.w r0, #-1
-; CHECK-NEXT: cmp r6, r9
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: csel r6, r11, r0, lo
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: csel r12, r12, r9, eq
+; CHECK-NEXT: cmp r6, r10
+; CHECK-NEXT: csel r6, r11, r3, lo
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: csel r6, r11, r6, eq
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r5, r11, r0, mi
+; CHECK-NEXT: csel r5, r11, r3, mi
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r5, r6, r5, eq
-; CHECK-NEXT: cmp.w r4, #-2147483648
-; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: csel r4, r5, r6, hi
-; CHECK-NEXT: csel r4, r5, r4, eq
-; CHECK-NEXT: cmp.w r10, #-1
-; CHECK-NEXT: csel r5, r5, r6, gt
+; CHECK-NEXT: cmp.w r8, #-2147483648
+; CHECK-NEXT: mov.w r8, #0
+; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: csel r6, r5, r8, hi
+; CHECK-NEXT: csel r6, r5, r6, eq
+; CHECK-NEXT: cmp.w r2, #-1
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r4, r4, r5, eq
-; CHECK-NEXT: cmp r1, r9
-; CHECK-NEXT: csel r1, lr, r0, lo
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csel r5, r6, r5, eq
+; CHECK-NEXT: cmp r1, r10
+; CHECK-NEXT: csel r1, lr, r3, lo
; CHECK-NEXT: csel r1, lr, r1, eq
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: csel r0, lr, r0, mi
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, lr, r3, mi
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r1, r0, eq
; CHECK-NEXT: cmp.w r7, #-2147483648
-; CHECK-NEXT: csel r1, r0, r6, hi
-; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: csel r1, r0, r8, hi
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: csel r1, r0, r1, eq
-; CHECK-NEXT: cmp.w r12, #-1
-; CHECK-NEXT: csel r0, r0, r6, gt
+; CHECK-NEXT: cmp.w r4, #-1
+; CHECK-NEXT: csel r0, r0, r8, gt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r1, r0, eq
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
-; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[3], q0[1], r8, r0
-; CHECK-NEXT: add sp, #20
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
+; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: vmov q0[3], q0[1], r12, r0
+; CHECK-NEXT: add sp, #28
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -2320,91 +2335,100 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: bl __fixhfti
-; CHECK-NEXT: mvn r11, #-2147483648
-; CHECK-NEXT: cmp r1, r11
; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: csel r0, r1, r11, lo
+; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: csel r1, r3, r0, mi
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: it ne
+; CHECK-NEXT: andne.w r0, r2, r0, asr #31
+; CHECK-NEXT: mvn r11, #-2147483648
+; CHECK-NEXT: ands r0, r1
+; CHECK-NEXT: cmp r6, r11
; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: csel r1, r1, r11, mi
-; CHECK-NEXT: orrs r3, r2
-; CHECK-NEXT: mov.w r9, #-2147483648
-; CHECK-NEXT: csel r4, r0, r1, eq
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: add.w r3, r0, #1
+; CHECK-NEXT: csel r0, r6, r11, lo
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: csel r8, r5, r0, mi
-; CHECK-NEXT: and.w r0, r2, r5, asr #31
-; CHECK-NEXT: and.w r0, r0, r8
-; CHECK-NEXT: cmp.w r8, #-1
-; CHECK-NEXT: add.w r2, r0, #1
-; CHECK-NEXT: csel r0, r4, r9, gt
-; CHECK-NEXT: cmp.w r4, #-2147483648
+; CHECK-NEXT: csel r7, r6, r11, mi
+; CHECK-NEXT: orrs r2, r5
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: csel r1, r4, r9, hi
-; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: csel r8, r0, r7, eq
+; CHECK-NEXT: mov.w r2, #-2147483648
+; CHECK-NEXT: cmp.w r1, #-1
+; CHECK-NEXT: csel r0, r8, r2, gt
+; CHECK-NEXT: cmp.w r8, #-2147483648
+; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: csel r1, r8, r2, hi
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: csel r0, r1, r0, eq
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: vmov.u16 r0, q4[0]
; CHECK-NEXT: bl __fixhfti
; CHECK-NEXT: cmp r1, r11
; CHECK-NEXT: mov lr, r0
-; CHECK-NEXT: csel r12, r1, r11, lo
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: csel r7, r1, r11, mi
-; CHECK-NEXT: orrs.w r0, r2, r3
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: csel r7, r12, r7, eq
+; CHECK-NEXT: csel r7, r1, r11, lo
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: csel r12, r3, r0, mi
-; CHECK-NEXT: and.w r2, r2, r3, asr #31
-; CHECK-NEXT: cmp.w r12, #-1
-; CHECK-NEXT: and.w r2, r2, r12
-; CHECK-NEXT: csel r0, r7, r9, gt
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: csel r4, r1, r11, mi
+; CHECK-NEXT: orrs r3, r2
+; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: csel r7, r7, r4, eq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r4, r0, r9, mi
+; CHECK-NEXT: mov.w r3, #-2147483648
+; CHECK-NEXT: cmp.w r4, #-1
+; CHECK-NEXT: csel r9, r7, r3, gt
; CHECK-NEXT: cmp.w r7, #-2147483648
-; CHECK-NEXT: csel r9, r7, r9, hi
+; CHECK-NEXT: csel r12, r7, r3, hi
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: andne.w r3, r2, r3, asr #31
+; CHECK-NEXT: and.w r2, r3, r4
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: csel r9, r9, r0, eq
-; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
+; CHECK-NEXT: csel r12, r12, r9, eq
; CHECK-NEXT: cmp r6, r11
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: csel r6, r10, r0, lo
-; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: csel r6, r10, r3, lo
+; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: csel r6, r10, r6, eq
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: csel r5, r10, r0, mi
+; CHECK-NEXT: csel r5, r10, r3, mi
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r5, r6, r5, eq
-; CHECK-NEXT: cmp.w r4, #-2147483648
-; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: csel r4, r5, r6, hi
-; CHECK-NEXT: csel r4, r5, r4, eq
-; CHECK-NEXT: cmp.w r8, #-1
-; CHECK-NEXT: csel r5, r5, r6, gt
+; CHECK-NEXT: cmp.w r8, #-2147483648
+; CHECK-NEXT: mov.w r8, #0
+; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: csel r6, r5, r8, hi
+; CHECK-NEXT: csel r6, r5, r6, eq
+; CHECK-NEXT: cmp.w r2, #-1
+; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csel r5, r5, r8, gt
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csel r4, r4, r5, eq
+; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: csel r5, r6, r5, eq
; CHECK-NEXT: cmp r1, r11
-; CHECK-NEXT: csel r1, lr, r0, lo
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: csel r1, lr, r3, lo
; CHECK-NEXT: csel r1, lr, r1, eq
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: csel r0, lr, r0, mi
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: csel r0, lr, r3, mi
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r1, r0, eq
; CHECK-NEXT: cmp.w r7, #-2147483648
-; CHECK-NEXT: csel r1, r0, r6, hi
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: csel r1, r0, r8, hi
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: csel r1, r0, r1, eq
-; CHECK-NEXT: cmp.w r12, #-1
-; CHECK-NEXT: csel r0, r0, r6, gt
+; CHECK-NEXT: cmp.w r4, #-1
+; CHECK-NEXT: csel r0, r0, r8, gt
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csel r0, r1, r0, eq
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: vmov q0[3], q0[1], r9, r0
+; CHECK-NEXT: vmov q0[3], q0[1], r12, r0
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index e539f00d8e518..9a39c62591e1b 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -574,11 +574,16 @@ define i64 @utest_f64i64_cse_combine(double %x) #0 {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui double %x to i128
@@ -672,6 +677,7 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -679,6 +685,10 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
@@ -857,6 +867,7 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -864,6 +875,10 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
@@ -1494,11 +1509,16 @@ define i64 @utest_f64i64_mm(double %x) {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui double %x to i128
@@ -1532,6 +1552,7 @@ define i64 @ustest_f64i64_mm(double %x) {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -1539,6 +1560,10 @@ define i64 @ustest_f64i64_mm(double %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
@@ -1602,11 +1627,16 @@ define i64 @utest_f32i64_mm(float %x) {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui float %x to i128
@@ -1640,6 +1670,7 @@ define i64 @ustest_f32i64_mm(float %x) {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -1647,6 +1678,10 @@ define i64 @ustest_f32i64_mm(float %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
@@ -1714,11 +1749,16 @@ define i64 @utesth_f16i64_mm(half %x) {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui half %x to i128
@@ -1754,6 +1794,7 @@ define i64 @ustest_f16i64_mm(half %x) {
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -1761,6 +1802,10 @@ define i64 @ustest_f16i64_mm(half %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index facd15f3270d3..007802dd0c035 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -2309,17 +2309,27 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: # fallthrough-return
entry:
@@ -2371,6 +2381,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
@@ -2378,6 +2389,10 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 5
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
@@ -2393,6 +2408,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -2400,6 +2416,10 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 4
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.const 0
@@ -2591,17 +2611,27 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: # fallthrough-return
entry:
@@ -2653,6 +2683,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
@@ -2660,6 +2691,10 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 5
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
@@ -2675,6 +2710,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
@@ -2682,6 +2718,10 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 1
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 4
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.const 0
@@ -2877,17 +2917,27 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 6
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: # fallthrough-return
entry:
@@ -2941,6 +2991,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 6
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
@@ -2948,6 +2999,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 2
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 6
; CHECK-NEXT: local.get 6
; CHECK-NEXT: i64.const 0
@@ -2963,6 +3018,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
@@ -2970,6 +3026,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: i64.lt_s
; CHECK-NEXT: local.tee 2
; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
; CHECK-NEXT: local.tee 5
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 87e9a587d31ea..ac6d9d359e5eb 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -1081,6 +1081,8 @@ define i64 @utest_f64i64_mm(double %x) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@@ -1099,9 +1101,11 @@ define i64 @ustest_f64i64_mm(double %x) {
; CHECK-NEXT: callq __fixdfti at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovgq %rcx, %rax
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovsq %rcx, %rax
; CHECK-NEXT: popq %rcx
@@ -1143,6 +1147,8 @@ define i64 @utest_f32i64_mm(float %x) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@@ -1161,9 +1167,11 @@ define i64 @ustest_f32i64_mm(float %x) {
; CHECK-NEXT: callq __fixsfti at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovgq %rcx, %rax
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovsq %rcx, %rax
; CHECK-NEXT: popq %rcx
@@ -1213,6 +1221,8 @@ define i64 @utesth_f16i64_mm(half %x) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@@ -1231,9 +1241,11 @@ define i64 @ustest_f16i64_mm(half %x) {
; CHECK-NEXT: callq __fixhfti at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovgq %rcx, %rax
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovsq %rcx, %rax
; CHECK-NEXT: popq %rcx
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index 22fbc1c55b3d6..c351c1b82cf19 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -2762,8 +2762,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: movq %rbx, %xmm0
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2801,15 +2805,20 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-NEXT: callq __fixdfti at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovgq %rcx, %rax
; CHECK-NEXT: movl $1, %esi
-; CHECK-NEXT: cmovgq %rsi, %rdx
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: cmovleq %rdx, %rdi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
-; CHECK-NEXT: cmovgq %rcx, %rbx
; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovsq %rcx, %rbx
-; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovsq %rcx, %rax
; CHECK-NEXT: movq %rax, %xmm0
; CHECK-NEXT: movq %rbx, %xmm1
@@ -2917,8 +2926,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: movq %rbx, %xmm0
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2956,15 +2969,20 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-NEXT: callq __fixsfti at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovgq %rcx, %rax
; CHECK-NEXT: movl $1, %esi
-; CHECK-NEXT: cmovgq %rsi, %rdx
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: cmovleq %rdx, %rdi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
-; CHECK-NEXT: cmovgq %rcx, %rbx
; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovsq %rcx, %rbx
-; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovsq %rcx, %rax
; CHECK-NEXT: movq %rax, %xmm0
; CHECK-NEXT: movq %rbx, %xmm1
@@ -3073,8 +3091,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: movq %rbx, %xmm0
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -3112,15 +3134,20 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEXT: callq __fixhfti at PLT
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: cmovgq %rcx, %rax
; CHECK-NEXT: movl $1, %esi
-; CHECK-NEXT: cmovgq %rsi, %rdx
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: cmovleq %rdx, %rdi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
-; CHECK-NEXT: cmovgq %rcx, %rbx
; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: cmovsq %rcx, %rbx
-; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: cmovsq %rcx, %rax
; CHECK-NEXT: movq %rax, %xmm0
; CHECK-NEXT: movq %rbx, %xmm1
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index f0ce2312c1c68..b7387651f8c44 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -370,117 +370,123 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $64, %esp
+; X86-NEXT: subl $88, %esp
; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl 12(%ebp), %ebx
-; X86-NEXT: movl 20(%ebp), %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: sarl $31, %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %ebx, %edi
-; X86-NEXT: shldl $31, %ecx, %ebx
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movl 20(%ebp), %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, %edx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $31, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $31, %ecx, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %edi
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edx
+; X86-NEXT: pushl %esi
; X86-NEXT: pushl %ecx
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl %eax
-; X86-NEXT: calll __modti3
+; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: sets %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: sets %cl
-; X86-NEXT: xorb %al, %cl
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %ebx
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: sets %al
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: sets %dl
+; X86-NEXT: xorb %al, %dl
+; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setne %bh
-; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl 20(%ebp)
; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: pushl %eax
-; X86-NEXT: calll __divti3
+; X86-NEXT: calll __modti3
; X86-NEXT: addl $32, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: sbbl $0, %edx
-; X86-NEXT: setb %bl
-; X86-NEXT: testb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: cmovel %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addb $255, %bl
-; X86-NEXT: sbbl $0, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: testb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmovel %esi, %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: setne %al
+; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: cmovsl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovsl %esi, %eax
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: andl %ecx, %edi
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovel %ebx, %edi
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: cmpl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
-; X86-NEXT: cmovbl %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: cmovnsl %eax, %esi
+; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovbl %ecx, %edx
; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF
-; X86-NEXT: cmovnsl %ebx, %edx
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: cmovnsl %ebx, %eax
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: cmovsl %ecx, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: orl %ecx, %edi
-; X86-NEXT: cmovel %esi, %eax
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovsl %ecx, %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: cmovnel %eax, %esi
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: cmpl $-2147483647, %edx # imm = 0x80000001
-; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000
-; X86-NEXT: cmovael %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: andl %eax, %esi
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000
-; X86-NEXT: cmovsl %edi, %edx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovsl %edi, %eax
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: cmovael %edx, %eax
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: andl %ebx, %ecx
-; X86-NEXT: cmpl $-1, %ecx
-; X86-NEXT: cmovel %esi, %eax
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: andl %esi, %ecx
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
+; X86-NEXT: cmovsl %ebx, %edx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: cmovsl %ebx, %esi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %edi
+; X86-NEXT: cmovel %ecx, %esi
+; X86-NEXT: cmovel %eax, %edx
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -836,78 +842,100 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 24(%ebp), %ebx
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl 28(%ebp), %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: subl $256, %esp # imm = 0x100
+; X86-NEXT: movl 16(%ebp), %edi
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarl $31, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal (%ecx,%ecx), %eax
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %eax, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: leal (%edi,%edi), %eax
+; X86-NEXT: shrl $31, %edi
+; X86-NEXT: shldl $31, %eax, %edi
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edx
+; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl $0
; X86-NEXT: pushl %eax
-; X86-NEXT: calll __modti3
+; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 40(%ebp), %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: leal (%ebx,%ebx), %eax
-; X86-NEXT: shrl $31, %ebx
-; X86-NEXT: shldl $31, %eax, %ebx
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
+; X86-NEXT: pushl 32(%ebp)
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl $0
; X86-NEXT: pushl %eax
; X86-NEXT: calll __modti3
; X86-NEXT: addl $32, %esp
+; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: leal (%ecx,%ecx), %eax
+; X86-NEXT: shrl $31, %ecx
+; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 40(%ebp)
+; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll __modti3
+; X86-NEXT: addl $32, %esp
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: leal (%ecx,%ecx), %eax
+; X86-NEXT: shrl $31, %ecx
+; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %edx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %ecx
; X86-NEXT: pushl $0
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %ecx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: leal (%ecx,%ecx), %eax
; X86-NEXT: shrl $31, %ecx
; X86-NEXT: shldl $31, %eax, %ecx
-; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
@@ -921,51 +949,27 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: calll __modti3
; X86-NEXT: addl $32, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 36(%ebp)
+; X86-NEXT: pushl 40(%ebp)
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: pushl $0
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 32(%ebp), %edx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl 16(%ebp), %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: leal (%ecx,%ecx), %eax
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
-; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl 28(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl 36(%ebp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: pushl $0
; X86-NEXT: pushl %eax
@@ -973,282 +977,327 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: addl $32, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: sets %al
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: sets %dl
-; X86-NEXT: xorb %al, %dl
+; X86-NEXT: sets %bl
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: sets %bh
+; X86-NEXT: xorb %bl, %bh
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: setne %al
-; X86-NEXT: testb %dl, %al
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: testb %bh, %al
+; X86-NEXT: cmovel %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: andl %edi, %eax
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: cmovsl %ecx, %ebx
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: cmovsl (%esp), %esi # 4-byte Folded Reload
-; X86-NEXT: negl %edi
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: orl (%esp), %edi # 4-byte Folded Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovnel %esi, %edi
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: cmovel %esi, %eax
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovel %edi, %ecx
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: cmovsl %esi, %edi
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: cmovsl %esi, %eax
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: andl %ebx, %edx
-; X86-NEXT: cmpl $-1, %edx
-; X86-NEXT: cmovnel %eax, %edx
-; X86-NEXT: cmovel %ecx, %edi
-; X86-NEXT: shrdl $1, %edx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl 32(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
-; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: sbbl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %edi
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: sets %al
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: sets %dl
-; X86-NEXT: xorb %al, %dl
+; X86-NEXT: sets %bl
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: sets %bh
+; X86-NEXT: xorb %bl, %bh
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: setne %al
-; X86-NEXT: testb %dl, %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmovel %esi, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: testb %bh, %al
+; X86-NEXT: cmovel %esi, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: andl %ecx, %ebx
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovsl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: cmovsl (%esp), %eax # 4-byte Folded Reload
-; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: orl (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: orl %edx, %edi
-; X86-NEXT: cmovnel %eax, %ecx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovel %edi, %ebx
-; X86-NEXT: cmpl $-1, %ebx
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: cmovsl %edi, %ecx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovsl %ecx, %ebx
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: andl %edx, %esi
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: cmovnel %ebx, %esi
-; X86-NEXT: cmovel %eax, %edi
-; X86-NEXT: shrdl $1, %esi, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ebx
-; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: testl %edx, %edx
; X86-NEXT: sets %al
-; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: sets %dl
-; X86-NEXT: xorb %al, %dl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: sets %bl
+; X86-NEXT: xorb %al, %bl
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pushl %edx
+; X86-NEXT: pushl %edx
+; X86-NEXT: pushl %edx
+; X86-NEXT: pushl 28(%ebp)
+; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll __modti3
+; X86-NEXT: addl $32, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
-; X86-NEXT: testb %dl, %al
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl (%esp), %edx # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: andl %ecx, %eax
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovsl %ebx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: cmovsl %edx, %edi
-; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orl (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: cmovnel %edi, %edx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovel %edi, %eax
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovel %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: cmovsl %edi, %edx
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: cmovsl %edi, %eax
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: andl %ebx, %esi
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: cmovnel %eax, %esi
-; X86-NEXT: cmovel %ecx, %edx
-; X86-NEXT: shrdl $1, %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testb %bl, %al
+; X86-NEXT: cmovel %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: sets %al
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %edx
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %bl
-; X86-NEXT: xorb %al, %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: sets %bh
+; X86-NEXT: xorb %bl, %bh
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: setne %al
-; X86-NEXT: testb %bl, %al
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: testb %bh, %al
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovel %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmovel %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovsl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovsl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: andl %esi, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovsl %ecx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: negl %esi
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl %eax, %ebx
+; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovnel %esi, %ecx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: cmovel %edx, %ebx
+; X86-NEXT: cmpl $-1, %ebx
; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: orl (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovnel %edi, %esi
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: cmovel %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmovsl %edx, %ecx
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovsl %edx, %ebx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $0, %ecx
; X86-NEXT: cmovel %esi, %ecx
-; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: cmovsl %edi, %esi
+; X86-NEXT: cmovnel %ebx, %eax
+; X86-NEXT: shldl $31, %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl %eax, %edi
+; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: cmovel %esi, %edi
+; X86-NEXT: cmpl $-1, %edi
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: cmovel %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: cmovsl %esi, %eax
+; X86-NEXT: movl $-1, %ebx
+; X86-NEXT: cmovsl %ebx, %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %ecx
+; X86-NEXT: cmovel %edx, %eax
+; X86-NEXT: cmovnel %edi, %ecx
+; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovel %esi, %edx
+; X86-NEXT: cmpl $-1, %edx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovsl %esi, %eax
; X86-NEXT: movl $-1, %edi
-; X86-NEXT: cmovsl %edi, %eax
-; X86-NEXT: andl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT: cmovsl %edi, %edx
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %ebx
-; X86-NEXT: cmovnel %eax, %ebx
-; X86-NEXT: cmovel %ecx, %esi
-; X86-NEXT: shrdl $1, %ebx, %esi
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: cmovnel %edx, %ebx
+; X86-NEXT: shldl $31, %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl %eax, %edi
+; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovel %esi, %edi
+; X86-NEXT: cmpl $-1, %edi
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: cmovsl %esi, %eax
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: cmovsl %esi, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl %edx, %esi
+; X86-NEXT: cmpl $-1, %esi
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: shldl $31, %eax, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
More information about the llvm-commits
mailing list