[llvm] 2ec3ca7 - [ARM] Extend IsCMPZCSINC to handle CMOV

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 27 06:15:09 PST 2021


Author: David Green
Date: 2021-12-27T14:15:03Z
New Revision: 2ec3ca747732e9cb9b051c1f979902c0291091c4

URL: https://github.com/llvm/llvm-project/commit/2ec3ca747732e9cb9b051c1f979902c0291091c4
DIFF: https://github.com/llvm/llvm-project/commit/2ec3ca747732e9cb9b051c1f979902c0291091c4.diff

LOG: [ARM] Extend IsCMPZCSINC to handle CMOV

A 'CMOV 1, 0, CC, %cpsr, Cmp' is the same as a 'CSINC 0, 0, CC, Cmp',
and can be treated the same in IsCMPZCSINC added in D114013. This allows
us to remove the unnecessary CMOV in the same way that we could remove a
CSINC.

Differential Revision: https://reviews.llvm.org/D115188

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/ARM/fp16-fullfp16.ll
    llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
    llvm/test/CodeGen/Thumb2/active_lane_mask.ll
    llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
    llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
    llvm/test/CodeGen/Thumb2/mve-masked-load.ll
    llvm/test/CodeGen/Thumb2/mve-masked-store.ll
    llvm/test/CodeGen/Thumb2/mve-minmax.ll
    llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
    llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
    llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
    llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
    llvm/test/CodeGen/Thumb2/mve-vcmp.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
    llvm/test/CodeGen/Thumb2/mve-vcmpr.ll
    llvm/test/CodeGen/Thumb2/mve-vqmovn.ll
    llvm/test/CodeGen/Thumb2/mve-vqshrn.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3d45db3496447..31c6234f02d63 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14682,7 +14682,9 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid.
+// Check that N is CMPZ(CSINC(0, 0, CC, X)),
+//              or CMPZ(CMOV(1, 0, CC, $cpsr, X))
+// return X if valid.
 static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
   if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
     return SDValue();
@@ -14696,12 +14698,24 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
          CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
     CSInc = CSInc.getOperand(0);
 
-  if (CSInc.getOpcode() != ARMISD::CSINC ||
-      !isNullConstant(CSInc.getOperand(0)) ||
-      !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse())
-    return SDValue();
-  CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
-  return CSInc.getOperand(3);
+  if (CSInc.getOpcode() == ARMISD::CSINC &&
+      isNullConstant(CSInc.getOperand(0)) &&
+      isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
+    CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
+    return CSInc.getOperand(3);
+  }
+  if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
+      isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
+    CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
+    return CSInc.getOperand(4);
+  }
+  if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
+      isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
+    CC = ARMCC::getOppositeCondition(
+        (ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
+    return CSInc.getOperand(4);
+  }
+  return SDValue();
 }
 
 static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {

diff  --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 0ff7e0635450d..c6db4d3ae47e4 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -482,11 +482,9 @@ define void @test_copysign(half* %p, half* %q) {
 ; CHECK-NEXT:    vstr.16 s0, [sp]
 ; CHECK-NEXT:    vldr.16 s0, [r0]
 ; CHECK-NEXT:    ldrb r1, [sp, #1]
-; CHECK-NEXT:    ands r1, r1, #128
 ; CHECK-NEXT:    vabs.f16 s0, s0
-; CHECK-NEXT:    movwne r1, #1
+; CHECK-NEXT:    tst r1, #128
 ; CHECK-NEXT:    vneg.f16 s2, s0
-; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    vseleq.f16 s0, s0, s2
 ; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    add sp, sp, #4

diff  --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 74dd9fe154aa4..a4d470b72d4ea 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -2379,67 +2379,42 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r2, r1
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov r0, r1, d9
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    clz r7, r2
-; CHECK-NEXT:    movwmi r3, #1
-; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    vmov r0, r2, d9
+; CHECK-NEXT:    cmn r4, #-2147483647
 ; CHECK-NEXT:    mvn r3, #-2147483648
+; CHECK-NEXT:    movlo r3, r4
 ; CHECK-NEXT:    mvn r5, #-2147483648
-; CHECK-NEXT:    movne r3, r4
-; CHECK-NEXT:    cmn r4, #-2147483647
-; CHECK-NEXT:    movhs r4, r5
-; CHECK-NEXT:    lsr r7, r7, #5
-; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    movpl r4, r5
+; CHECK-NEXT:    movpl r1, r6
 ; CHECK-NEXT:    moveq r4, r3
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    movpl r2, r6
-; CHECK-NEXT:    cmn r2, #1
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    add r2, r2, #1
-; CHECK-NEXT:    movwgt r3, #1
-; CHECK-NEXT:    clz r2, r2
-; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    cmn r1, #1
 ; CHECK-NEXT:    mov r3, #-2147483648
-; CHECK-NEXT:    movne r3, r4
 ; CHECK-NEXT:    mov r7, #-2147483648
+; CHECK-NEXT:    movgt r3, r4
 ; CHECK-NEXT:    cmp r4, #-2147483648
-; CHECK-NEXT:    lsr r2, r2, #5
 ; CHECK-NEXT:    movls r4, r7
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    moveq r4, r3
+; CHECK-NEXT:    cmn r1, #1
+; CHECK-NEXT:    movne r4, r3
+; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwmi r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvn r2, #-2147483648
-; CHECK-NEXT:    vmov.32 d0[0], r4
-; CHECK-NEXT:    movne r2, r0
 ; CHECK-NEXT:    cmn r0, #-2147483647
-; CHECK-NEXT:    movlo r5, r0
-; CHECK-NEXT:    clz r0, r1
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    moveq r5, r2
+; CHECK-NEXT:    mvn r2, #-2147483648
+; CHECK-NEXT:    movlo r2, r0
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    movpl r1, r6
-; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    add r1, r1, #1
-; CHECK-NEXT:    movwgt r6, #1
-; CHECK-NEXT:    clz r1, r1
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movmi r5, r0
+; CHECK-NEXT:    movmi r6, r1
+; CHECK-NEXT:    moveq r5, r2
+; CHECK-NEXT:    cmn r6, #1
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    movne r0, r5
+; CHECK-NEXT:    vmov.32 d0[0], r4
+; CHECK-NEXT:    movgt r0, r5
 ; CHECK-NEXT:    cmp r5, #-2147483648
 ; CHECK-NEXT:    movls r5, r7
-; CHECK-NEXT:    lsr r1, r1, #5
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r5, r0
+; CHECK-NEXT:    cmn r6, #1
+; CHECK-NEXT:    movne r5, r0
 ; CHECK-NEXT:    vmov.32 d0[1], r5
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, pc}
@@ -2485,63 +2460,45 @@ entry:
 define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i32_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
-; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    bl __aeabi_d2lz
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    vmov r2, r1, d8
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    movwmi r3, #1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    clz r3, r4
-; CHECK-NEXT:    mvn r8, #0
-; CHECK-NEXT:    movne r8, r0
+; CHECK-NEXT:    vmov r2, r12, d9
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvn r3, #0
 ; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    lsr r3, r3, #5
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    movne r8, r0
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    movpl r4, r5
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    movmi r3, r0
+; CHECK-NEXT:    movpl r1, r5
+; CHECK-NEXT:    moveq r3, r0
+; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    mvn r7, #0
+; CHECK-NEXT:    mvn r4, #0
 ; CHECK-NEXT:    movwgt r6, #1
 ; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    movne r6, r8
+; CHECK-NEXT:    movne r6, r3
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r6, r3
 ; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r12
 ; CHECK-NEXT:    bl __aeabi_d2lz
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwmi r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    clz r2, r1
-; CHECK-NEXT:    movne r7, r0
-; CHECK-NEXT:    lsr r2, r2, #5
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    movne r7, r0
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vmov.32 d0[0], r6
+; CHECK-NEXT:    movmi r4, r0
 ; CHECK-NEXT:    movpl r1, r5
-; CHECK-NEXT:    clz r0, r1
+; CHECK-NEXT:    moveq r4, r0
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    movwgt r5, #1
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    movne r5, r7
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    clz r0, r4
-; CHECK-NEXT:    movne r5, r7
-; CHECK-NEXT:    vmov.32 d0[0], r5
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    movne r6, r8
-; CHECK-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEXT:    movne r5, r4
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r5, r4
+; CHECK-NEXT:    vmov.32 d0[1], r5
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
   %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
@@ -2560,129 +2517,78 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    vorr q4, q0, q0
 ; CHECK-NEXT:    mov r8, #-2147483648
 ; CHECK-NEXT:    mvn r7, #-2147483648
-; CHECK-NEXT:    mov r9, #0
 ; CHECK-NEXT:    vmov r0, s19
-; CHECK-NEXT:    vmov r5, s18
+; CHECK-NEXT:    vmov r5, s16
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    clz r2, r1
-; CHECK-NEXT:    movwmi r0, #1
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmn r0, #-2147483647
 ; CHECK-NEXT:    mvn r0, #-2147483648
-; CHECK-NEXT:    lsr r2, r2, #5
-; CHECK-NEXT:    movne r0, r4
-; CHECK-NEXT:    cmn r4, #-2147483647
-; CHECK-NEXT:    movhs r4, r7
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    moveq r4, r0
+; CHECK-NEXT:    mov r9, #0
+; CHECK-NEXT:    movlo r0, r4
 ; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    movpl r4, r7
 ; CHECK-NEXT:    movpl r1, r9
+; CHECK-NEXT:    moveq r4, r0
 ; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    add r1, r1, #1
-; CHECK-NEXT:    movwgt r0, #1
-; CHECK-NEXT:    clz r1, r1
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    movne r0, r4
+; CHECK-NEXT:    movgt r0, r4
 ; CHECK-NEXT:    cmp r4, #-2147483648
 ; CHECK-NEXT:    movls r4, r8
-; CHECK-NEXT:    lsr r1, r1, #5
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r4, r0
+; CHECK-NEXT:    cmn r1, #1
+; CHECK-NEXT:    movne r4, r0
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    clz r2, r1
-; CHECK-NEXT:    movwmi r0, #1
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmn r0, #-2147483647
 ; CHECK-NEXT:    mvn r0, #-2147483648
-; CHECK-NEXT:    lsr r2, r2, #5
-; CHECK-NEXT:    movne r0, r5
-; CHECK-NEXT:    cmn r5, #-2147483647
-; CHECK-NEXT:    movhs r5, r7
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    moveq r5, r0
+; CHECK-NEXT:    mov r2, #-2147483648
+; CHECK-NEXT:    movlo r0, r5
 ; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    movpl r5, r7
 ; CHECK-NEXT:    movpl r1, r9
+; CHECK-NEXT:    moveq r5, r0
+; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    mov r2, #-2147483648
-; CHECK-NEXT:    movwgt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    add r0, r1, #1
-; CHECK-NEXT:    movne r2, r5
-; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    movgt r2, r5
 ; CHECK-NEXT:    cmp r5, #-2147483648
 ; CHECK-NEXT:    movls r5, r8
-; CHECK-NEXT:    lsr r1, r0, #5
-; CHECK-NEXT:    vmov r0, s16
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r5, r2
+; CHECK-NEXT:    cmn r1, #1
+; CHECK-NEXT:    movne r5, r2
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    clz r2, r1
-; CHECK-NEXT:    movwmi r0, #1
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    cmn r0, #-2147483647
 ; CHECK-NEXT:    mvn r0, #-2147483648
-; CHECK-NEXT:    lsr r2, r2, #5
-; CHECK-NEXT:    movne r0, r6
-; CHECK-NEXT:    cmn r6, #-2147483647
-; CHECK-NEXT:    movhs r6, r7
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    moveq r6, r0
+; CHECK-NEXT:    mov r2, #-2147483648
+; CHECK-NEXT:    movlo r0, r6
 ; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    movpl r6, r7
 ; CHECK-NEXT:    movpl r1, r9
+; CHECK-NEXT:    moveq r6, r0
+; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    mov r2, #-2147483648
-; CHECK-NEXT:    movwgt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    add r0, r1, #1
-; CHECK-NEXT:    movne r2, r6
-; CHECK-NEXT:    clz r0, r0
+; CHECK-NEXT:    movgt r2, r6
 ; CHECK-NEXT:    cmp r6, #-2147483648
 ; CHECK-NEXT:    movls r6, r8
-; CHECK-NEXT:    lsr r1, r0, #5
-; CHECK-NEXT:    vmov r0, s17
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r6, r2
+; CHECK-NEXT:    cmn r1, #1
+; CHECK-NEXT:    movne r6, r2
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwmi r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvn r2, #-2147483648
-; CHECK-NEXT:    vmov.32 d0[0], r6
-; CHECK-NEXT:    movne r2, r0
 ; CHECK-NEXT:    cmn r0, #-2147483647
-; CHECK-NEXT:    movlo r7, r0
-; CHECK-NEXT:    clz r0, r1
-; CHECK-NEXT:    vmov.32 d1[0], r5
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    moveq r7, r2
+; CHECK-NEXT:    mvn r2, #-2147483648
+; CHECK-NEXT:    movlo r2, r0
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    movpl r1, r9
-; CHECK-NEXT:    cmn r1, #1
-; CHECK-NEXT:    add r1, r1, #1
-; CHECK-NEXT:    movwgt r9, #1
-; CHECK-NEXT:    clz r1, r1
-; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    movmi r7, r0
+; CHECK-NEXT:    movmi r9, r1
+; CHECK-NEXT:    moveq r7, r2
+; CHECK-NEXT:    cmn r9, #1
 ; CHECK-NEXT:    mov r0, #-2147483648
-; CHECK-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEXT:    movne r0, r7
+; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    movgt r0, r7
 ; CHECK-NEXT:    cmp r7, #-2147483648
+; CHECK-NEXT:    vmov.32 d0[0], r5
 ; CHECK-NEXT:    movls r7, r8
-; CHECK-NEXT:    lsr r1, r1, #5
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    moveq r7, r0
+; CHECK-NEXT:    cmn r9, #1
+; CHECK-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEXT:    movne r7, r0
 ; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
@@ -2743,115 +2649,75 @@ entry:
 define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i32_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, sp, #4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vorr q4, q0, q0
+; CHECK-NEXT:    mvn r9, #0
 ; CHECK-NEXT:    vmov r0, s19
+; CHECK-NEXT:    vmov r5, s16
+; CHECK-NEXT:    vmov r8, s18
 ; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    vmov r0, s17
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    movwmi r3, #1
-; CHECK-NEXT:    clz r6, r1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mvn r3, #0
-; CHECK-NEXT:    movne r3, r2
-; CHECK-NEXT:    lsr r6, r6, #5
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mvn r2, #0
+; CHECK-NEXT:    movmi r2, r0
 ; CHECK-NEXT:    mov r7, #0
-; CHECK-NEXT:    movne r3, r2
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r2, r0
 ; CHECK-NEXT:    movpl r1, r7
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r11, #0
-; CHECK-NEXT:    clz r1, r1
-; CHECK-NEXT:    movwgt r11, #1
-; CHECK-NEXT:    cmp r11, #0
-; CHECK-NEXT:    movne r11, r3
-; CHECK-NEXT:    lsr r1, r1, #5
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mvn r9, #0
-; CHECK-NEXT:    vmov r8, s16
-; CHECK-NEXT:    movne r11, r3
-; CHECK-NEXT:    bl __aeabi_f2lz
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    mvn r10, #0
-; CHECK-NEXT:    movwmi r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    clz r1, r4
-; CHECK-NEXT:    movne r10, r0
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    lsr r1, r1, #5
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    movne r10, r0
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    movpl r4, r7
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movwgt r4, #1
 ; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    movwgt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    mov r0, r8
-; CHECK-NEXT:    movne r6, r10
+; CHECK-NEXT:    movne r4, r2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    moveq r4, r2
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwmi r2, #1
-; CHECK-NEXT:    clz r3, r1
-; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvn r2, #0
-; CHECK-NEXT:    movne r2, r0
-; CHECK-NEXT:    lsr r3, r3, #5
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movne r2, r0
-; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    movmi r2, r0
 ; CHECK-NEXT:    movpl r1, r7
-; CHECK-NEXT:    clz r0, r1
+; CHECK-NEXT:    moveq r2, r0
 ; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    movwgt r5, #1
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    movne r5, r2
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vmov r0, s18
 ; CHECK-NEXT:    movne r5, r2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r5, r2
 ; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwmi r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    clz r2, r1
-; CHECK-NEXT:    movne r9, r0
-; CHECK-NEXT:    vmov.32 d0[0], r5
-; CHECK-NEXT:    lsr r2, r2, #5
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    movne r9, r0
+; CHECK-NEXT:    mvn r2, #0
+; CHECK-NEXT:    movmi r2, r0
+; CHECK-NEXT:    movpl r1, r7
+; CHECK-NEXT:    moveq r2, r0
+; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    movwgt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    movne r6, r2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    moveq r6, r2
+; CHECK-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEXT:    movmi r9, r0
 ; CHECK-NEXT:    movpl r1, r7
-; CHECK-NEXT:    clz r0, r1
+; CHECK-NEXT:    moveq r9, r0
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    movwgt r7, #1
 ; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    vmov.32 d0[0], r5
 ; CHECK-NEXT:    movne r7, r9
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    clz r0, r4
-; CHECK-NEXT:    movne r7, r9
-; CHECK-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEXT:    lsr r0, r0, #5
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    movne r6, r10
-; CHECK-NEXT:    vmov.32 d1[1], r11
-; CHECK-NEXT:    vmov.32 d0[1], r6
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEXT:    moveq r7, r9
+; CHECK-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    add sp, sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -2868,136 +2734,85 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    .vsave {d8, d9, d10}
 ; CHECK-NEON-NEXT:    vpush {d8, d9, d10}
 ; CHECK-NEON-NEXT:    vmov r0, s3
-; CHECK-NEON-NEXT:    vmov.f32 s20, s2
+; CHECK-NEON-NEXT:    vmov.f32 s18, s2
 ; CHECK-NEON-NEXT:    vmov.f32 s16, s1
-; CHECK-NEON-NEXT:    vmov.f32 s18, s0
+; CHECK-NEON-NEXT:    vmov.f32 s20, s0
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    mov r4, r0
 ; CHECK-NEON-NEXT:    vmov r0, s20
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movwmi r2, #1
-; CHECK-NEON-NEXT:    clz r3, r1
-; CHECK-NEON-NEXT:    cmp r2, #0
+; CHECK-NEON-NEXT:    cmn r4, #-2147483647
 ; CHECK-NEON-NEXT:    mvn r2, #-2147483648
-; CHECK-NEON-NEXT:    movne r2, r4
+; CHECK-NEON-NEXT:    movlo r2, r4
 ; CHECK-NEON-NEXT:    mvn r7, #-2147483648
-; CHECK-NEON-NEXT:    cmn r4, #-2147483647
-; CHECK-NEON-NEXT:    lsr r3, r3, #5
-; CHECK-NEON-NEXT:    movhs r4, r7
-; CHECK-NEON-NEXT:    cmp r3, #0
-; CHECK-NEON-NEXT:    moveq r4, r2
-; CHECK-NEON-NEXT:    mov r9, #0
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r2, #0
+; CHECK-NEON-NEXT:    mov r9, #0
+; CHECK-NEON-NEXT:    movpl r4, r7
 ; CHECK-NEON-NEXT:    movpl r1, r9
+; CHECK-NEON-NEXT:    moveq r4, r2
 ; CHECK-NEON-NEXT:    cmn r1, #1
-; CHECK-NEON-NEXT:    movwgt r2, #1
-; CHECK-NEON-NEXT:    add r1, r1, #1
-; CHECK-NEON-NEXT:    clz r1, r1
-; CHECK-NEON-NEXT:    cmp r2, #0
 ; CHECK-NEON-NEXT:    mov r2, #-2147483648
 ; CHECK-NEON-NEXT:    mov r8, #-2147483648
-; CHECK-NEON-NEXT:    movne r2, r4
+; CHECK-NEON-NEXT:    movgt r2, r4
 ; CHECK-NEON-NEXT:    cmp r4, #-2147483648
 ; CHECK-NEON-NEXT:    movls r4, r8
-; CHECK-NEON-NEXT:    lsr r1, r1, #5
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r4, r2
+; CHECK-NEON-NEXT:    cmn r1, #1
+; CHECK-NEON-NEXT:    movne r4, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    mov r5, r0
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    clz r2, r1
-; CHECK-NEON-NEXT:    movwmi r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    cmn r0, #-2147483647
 ; CHECK-NEON-NEXT:    mvn r0, #-2147483648
-; CHECK-NEON-NEXT:    lsr r2, r2, #5
-; CHECK-NEON-NEXT:    movne r0, r5
-; CHECK-NEON-NEXT:    cmn r5, #-2147483647
-; CHECK-NEON-NEXT:    movhs r5, r7
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    moveq r5, r0
+; CHECK-NEON-NEXT:    mov r2, #-2147483648
+; CHECK-NEON-NEXT:    movlo r0, r5
 ; CHECK-NEON-NEXT:    cmp r1, #0
+; CHECK-NEON-NEXT:    movpl r5, r7
 ; CHECK-NEON-NEXT:    movpl r1, r9
+; CHECK-NEON-NEXT:    moveq r5, r0
+; CHECK-NEON-NEXT:    vmov r0, s18
 ; CHECK-NEON-NEXT:    cmn r1, #1
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    mov r2, #-2147483648
-; CHECK-NEON-NEXT:    movwgt r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    add r0, r1, #1
-; CHECK-NEON-NEXT:    movne r2, r5
-; CHECK-NEON-NEXT:    clz r0, r0
+; CHECK-NEON-NEXT:    movgt r2, r5
 ; CHECK-NEON-NEXT:    cmp r5, #-2147483648
 ; CHECK-NEON-NEXT:    movls r5, r8
-; CHECK-NEON-NEXT:    lsr r1, r0, #5
-; CHECK-NEON-NEXT:    vmov r0, s18
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r5, r2
+; CHECK-NEON-NEXT:    cmn r1, #1
+; CHECK-NEON-NEXT:    movne r5, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    mov r6, r0
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    clz r2, r1
-; CHECK-NEON-NEXT:    movwmi r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
+; CHECK-NEON-NEXT:    cmn r0, #-2147483647
 ; CHECK-NEON-NEXT:    mvn r0, #-2147483648
-; CHECK-NEON-NEXT:    lsr r2, r2, #5
-; CHECK-NEON-NEXT:    movne r0, r6
-; CHECK-NEON-NEXT:    cmn r6, #-2147483647
-; CHECK-NEON-NEXT:    movhs r6, r7
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    moveq r6, r0
+; CHECK-NEON-NEXT:    mov r2, #-2147483648
+; CHECK-NEON-NEXT:    movlo r0, r6
 ; CHECK-NEON-NEXT:    cmp r1, #0
+; CHECK-NEON-NEXT:    movpl r6, r7
 ; CHECK-NEON-NEXT:    movpl r1, r9
+; CHECK-NEON-NEXT:    moveq r6, r0
+; CHECK-NEON-NEXT:    vmov r0, s16
 ; CHECK-NEON-NEXT:    cmn r1, #1
-; CHECK-NEON-NEXT:    mov r0, #0
-; CHECK-NEON-NEXT:    mov r2, #-2147483648
-; CHECK-NEON-NEXT:    movwgt r0, #1
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    add r0, r1, #1
-; CHECK-NEON-NEXT:    movne r2, r6
-; CHECK-NEON-NEXT:    clz r0, r0
+; CHECK-NEON-NEXT:    movgt r2, r6
 ; CHECK-NEON-NEXT:    cmp r6, #-2147483648
 ; CHECK-NEON-NEXT:    movls r6, r8
-; CHECK-NEON-NEXT:    lsr r1, r0, #5
-; CHECK-NEON-NEXT:    vmov r0, s16
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r6, r2
+; CHECK-NEON-NEXT:    cmn r1, #1
+; CHECK-NEON-NEXT:    movne r6, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movwmi r2, #1
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    mvn r2, #-2147483648
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r6
-; CHECK-NEON-NEXT:    movne r2, r0
 ; CHECK-NEON-NEXT:    cmn r0, #-2147483647
-; CHECK-NEON-NEXT:    movlo r7, r0
-; CHECK-NEON-NEXT:    clz r0, r1
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
-; CHECK-NEON-NEXT:    lsr r0, r0, #5
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    moveq r7, r2
+; CHECK-NEON-NEXT:    mvn r2, #-2147483648
+; CHECK-NEON-NEXT:    movlo r2, r0
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    movpl r1, r9
-; CHECK-NEON-NEXT:    cmn r1, #1
-; CHECK-NEON-NEXT:    add r1, r1, #1
-; CHECK-NEON-NEXT:    movwgt r9, #1
-; CHECK-NEON-NEXT:    clz r1, r1
-; CHECK-NEON-NEXT:    cmp r9, #0
+; CHECK-NEON-NEXT:    movmi r7, r0
+; CHECK-NEON-NEXT:    movmi r9, r1
+; CHECK-NEON-NEXT:    moveq r7, r2
+; CHECK-NEON-NEXT:    cmn r9, #1
 ; CHECK-NEON-NEXT:    mov r0, #-2147483648
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
-; CHECK-NEON-NEXT:    movne r0, r7
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r6
+; CHECK-NEON-NEXT:    movgt r0, r7
 ; CHECK-NEON-NEXT:    cmp r7, #-2147483648
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r5
 ; CHECK-NEON-NEXT:    movls r7, r8
-; CHECK-NEON-NEXT:    lsr r1, r1, #5
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    moveq r7, r0
+; CHECK-NEON-NEXT:    cmn r9, #1
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r4
+; CHECK-NEON-NEXT:    movne r7, r0
 ; CHECK-NEON-NEXT:    vmov.32 d0[1], r7
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
@@ -3013,131 +2828,80 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
 ; CHECK-FP16-NEXT:    mov r4, r0
-; CHECK-FP16-NEXT:    vmov.u16 r0, d8[0]
-; CHECK-FP16-NEXT:    vmov.u16 r2, d8[2]
-; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    vmov.u16 r0, d8[2]
+; CHECK-FP16-NEXT:    vmov.u16 r2, d8[0]
+; CHECK-FP16-NEXT:    cmn r4, #-2147483647
 ; CHECK-FP16-NEXT:    mvn r7, #-2147483648
 ; CHECK-FP16-NEXT:    mov r9, #0
 ; CHECK-FP16-NEXT:    mov r8, #-2147483648
 ; CHECK-FP16-NEXT:    vmov s18, r0
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    movwmi r0, #1
-; CHECK-FP16-NEXT:    vmov s0, r2
-; CHECK-FP16-NEXT:    clz r2, r1
-; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    mvn r0, #-2147483648
-; CHECK-FP16-NEXT:    movne r0, r4
-; CHECK-FP16-NEXT:    cmn r4, #-2147483647
-; CHECK-FP16-NEXT:    movhs r4, r7
-; CHECK-FP16-NEXT:    lsr r2, r2, #5
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    moveq r4, r0
+; CHECK-FP16-NEXT:    movlo r0, r4
 ; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    movpl r4, r7
 ; CHECK-FP16-NEXT:    movpl r1, r9
+; CHECK-FP16-NEXT:    moveq r4, r0
 ; CHECK-FP16-NEXT:    cmn r1, #1
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    add r1, r1, #1
-; CHECK-FP16-NEXT:    movwgt r0, #1
-; CHECK-FP16-NEXT:    clz r1, r1
-; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    mov r0, #-2147483648
-; CHECK-FP16-NEXT:    movne r0, r4
+; CHECK-FP16-NEXT:    vmov s0, r2
+; CHECK-FP16-NEXT:    movgt r0, r4
 ; CHECK-FP16-NEXT:    cmp r4, #-2147483648
 ; CHECK-FP16-NEXT:    movls r4, r8
-; CHECK-FP16-NEXT:    lsr r1, r1, #5
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r4, r0
+; CHECK-FP16-NEXT:    cmn r1, #1
+; CHECK-FP16-NEXT:    movne r4, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    vmov.f32 s0, s18
 ; CHECK-FP16-NEXT:    mov r5, r0
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    clz r2, r1
-; CHECK-FP16-NEXT:    movwmi r0, #1
-; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    cmn r0, #-2147483647
 ; CHECK-FP16-NEXT:    mvn r0, #-2147483648
-; CHECK-FP16-NEXT:    vmov.f32 s0, s18
-; CHECK-FP16-NEXT:    movne r0, r5
-; CHECK-FP16-NEXT:    cmn r5, #-2147483647
-; CHECK-FP16-NEXT:    lsr r2, r2, #5
-; CHECK-FP16-NEXT:    movhs r5, r7
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    moveq r5, r0
+; CHECK-FP16-NEXT:    movlo r0, r5
 ; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    movpl r5, r7
 ; CHECK-FP16-NEXT:    movpl r1, r9
+; CHECK-FP16-NEXT:    moveq r5, r0
 ; CHECK-FP16-NEXT:    cmn r1, #1
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    add r1, r1, #1
-; CHECK-FP16-NEXT:    movwgt r0, #1
-; CHECK-FP16-NEXT:    clz r1, r1
-; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    mov r0, #-2147483648
-; CHECK-FP16-NEXT:    movne r0, r5
+; CHECK-FP16-NEXT:    movgt r0, r5
 ; CHECK-FP16-NEXT:    cmp r5, #-2147483648
 ; CHECK-FP16-NEXT:    movls r5, r8
-; CHECK-FP16-NEXT:    lsr r1, r1, #5
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r5, r0
+; CHECK-FP16-NEXT:    cmn r1, #1
+; CHECK-FP16-NEXT:    movne r5, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    vmov.u16 r2, d8[1]
 ; CHECK-FP16-NEXT:    mov r6, r0
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r0, #0
-; CHECK-FP16-NEXT:    clz r2, r1
-; CHECK-FP16-NEXT:    movwmi r0, #1
-; CHECK-FP16-NEXT:    cmp r0, #0
+; CHECK-FP16-NEXT:    cmn r0, #-2147483647
 ; CHECK-FP16-NEXT:    mvn r0, #-2147483648
-; CHECK-FP16-NEXT:    lsr r2, r2, #5
-; CHECK-FP16-NEXT:    movne r0, r6
-; CHECK-FP16-NEXT:    cmn r6, #-2147483647
-; CHECK-FP16-NEXT:    movhs r6, r7
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    vmov.u16 r2, d8[1]
-; CHECK-FP16-NEXT:    moveq r6, r0
+; CHECK-FP16-NEXT:    movlo r0, r6
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r0, #0
+; CHECK-FP16-NEXT:    movpl r6, r7
 ; CHECK-FP16-NEXT:    movpl r1, r9
+; CHECK-FP16-NEXT:    moveq r6, r0
 ; CHECK-FP16-NEXT:    cmn r1, #1
-; CHECK-FP16-NEXT:    movwgt r0, #1
-; CHECK-FP16-NEXT:    add r1, r1, #1
-; CHECK-FP16-NEXT:    clz r1, r1
-; CHECK-FP16-NEXT:    cmp r0, #0
 ; CHECK-FP16-NEXT:    mov r0, #-2147483648
-; CHECK-FP16-NEXT:    movne r0, r6
+; CHECK-FP16-NEXT:    movgt r0, r6
 ; CHECK-FP16-NEXT:    cmp r6, #-2147483648
 ; CHECK-FP16-NEXT:    movls r6, r8
-; CHECK-FP16-NEXT:    lsr r1, r1, #5
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r6, r0
+; CHECK-FP16-NEXT:    cmn r1, #1
+; CHECK-FP16-NEXT:    movne r6, r0
 ; CHECK-FP16-NEXT:    vmov s0, r2
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movwmi r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    mvn r2, #-2147483648
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r6
-; CHECK-FP16-NEXT:    movne r2, r0
 ; CHECK-FP16-NEXT:    cmn r0, #-2147483647
-; CHECK-FP16-NEXT:    movlo r7, r0
-; CHECK-FP16-NEXT:    clz r0, r1
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r5
-; CHECK-FP16-NEXT:    lsr r0, r0, #5
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    moveq r7, r2
+; CHECK-FP16-NEXT:    mvn r2, #-2147483648
+; CHECK-FP16-NEXT:    movlo r2, r0
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movpl r1, r9
-; CHECK-FP16-NEXT:    cmn r1, #1
-; CHECK-FP16-NEXT:    add r1, r1, #1
-; CHECK-FP16-NEXT:    movwgt r9, #1
-; CHECK-FP16-NEXT:    clz r1, r1
-; CHECK-FP16-NEXT:    cmp r9, #0
+; CHECK-FP16-NEXT:    movmi r7, r0
+; CHECK-FP16-NEXT:    movmi r9, r1
+; CHECK-FP16-NEXT:    moveq r7, r2
+; CHECK-FP16-NEXT:    cmn r9, #1
 ; CHECK-FP16-NEXT:    mov r0, #-2147483648
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
-; CHECK-FP16-NEXT:    movne r0, r7
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r6
+; CHECK-FP16-NEXT:    movgt r0, r7
 ; CHECK-FP16-NEXT:    cmp r7, #-2147483648
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r5
 ; CHECK-FP16-NEXT:    movls r7, r8
-; CHECK-FP16-NEXT:    lsr r1, r1, #5
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    moveq r7, r0
+; CHECK-FP16-NEXT:    cmn r9, #1
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r4
+; CHECK-FP16-NEXT:    movne r7, r0
 ; CHECK-FP16-NEXT:    vmov.32 d0[1], r7
 ; CHECK-FP16-NEXT:    vpop {d8, d9}
 ; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
@@ -3247,233 +3011,157 @@ entry:
 define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEON-LABEL: ustest_f16i32_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
-; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEON-NEXT:    .pad #4
-; CHECK-NEON-NEXT:    sub sp, sp, #4
+; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-NEON-NEXT:    .vsave {d8, d9, d10}
 ; CHECK-NEON-NEXT:    vpush {d8, d9, d10}
 ; CHECK-NEON-NEXT:    vmov r0, s3
-; CHECK-NEON-NEXT:    vmov.f32 s16, s2
-; CHECK-NEON-NEXT:    vmov.f32 s18, s1
+; CHECK-NEON-NEXT:    vmov.f32 s18, s2
+; CHECK-NEON-NEXT:    vmov.f32 s16, s1
 ; CHECK-NEON-NEXT:    vmov.f32 s20, s0
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    mov r2, r0
-; CHECK-NEON-NEXT:    vmov r0, s18
+; CHECK-NEON-NEXT:    vmov r2, s20
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r3, #0
-; CHECK-NEON-NEXT:    movwmi r3, #1
-; CHECK-NEON-NEXT:    clz r6, r1
-; CHECK-NEON-NEXT:    cmp r3, #0
 ; CHECK-NEON-NEXT:    mvn r3, #0
-; CHECK-NEON-NEXT:    movne r3, r2
-; CHECK-NEON-NEXT:    lsr r6, r6, #5
-; CHECK-NEON-NEXT:    cmp r6, #0
-; CHECK-NEON-NEXT:    mov r7, #0
-; CHECK-NEON-NEXT:    movne r3, r2
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    movpl r1, r7
+; CHECK-NEON-NEXT:    mov r6, #0
+; CHECK-NEON-NEXT:    movmi r3, r0
+; CHECK-NEON-NEXT:    movpl r1, r6
+; CHECK-NEON-NEXT:    moveq r3, r0
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r11, #0
-; CHECK-NEON-NEXT:    clz r1, r1
-; CHECK-NEON-NEXT:    movwgt r11, #1
-; CHECK-NEON-NEXT:    cmp r11, #0
-; CHECK-NEON-NEXT:    movne r11, r3
-; CHECK-NEON-NEXT:    lsr r1, r1, #5
+; CHECK-NEON-NEXT:    mov r7, #0
+; CHECK-NEON-NEXT:    vmov r8, s18
+; CHECK-NEON-NEXT:    movwgt r7, #1
+; CHECK-NEON-NEXT:    cmp r7, #0
+; CHECK-NEON-NEXT:    movne r7, r3
 ; CHECK-NEON-NEXT:    cmp r1, #0
 ; CHECK-NEON-NEXT:    mvn r9, #0
-; CHECK-NEON-NEXT:    vmov r8, s20
-; CHECK-NEON-NEXT:    movne r11, r3
+; CHECK-NEON-NEXT:    moveq r7, r3
+; CHECK-NEON-NEXT:    mov r0, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
-; CHECK-NEON-NEXT:    mov r4, r1
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r1, #0
-; CHECK-NEON-NEXT:    mvn r10, #0
-; CHECK-NEON-NEXT:    movwmi r1, #1
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    clz r1, r4
-; CHECK-NEON-NEXT:    movne r10, r0
-; CHECK-NEON-NEXT:    mov r6, #0
-; CHECK-NEON-NEXT:    lsr r1, r1, #5
+; CHECK-NEON-NEXT:    mvn r2, #0
+; CHECK-NEON-NEXT:    movmi r2, r0
+; CHECK-NEON-NEXT:    movpl r1, r6
+; CHECK-NEON-NEXT:    moveq r2, r0
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    movne r10, r0
-; CHECK-NEON-NEXT:    cmp r4, #0
-; CHECK-NEON-NEXT:    movpl r4, r7
-; CHECK-NEON-NEXT:    cmp r4, #0
-; CHECK-NEON-NEXT:    movwgt r6, #1
-; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    mov r4, #0
 ; CHECK-NEON-NEXT:    mov r0, r8
-; CHECK-NEON-NEXT:    movne r6, r10
+; CHECK-NEON-NEXT:    movwgt r4, #1
+; CHECK-NEON-NEXT:    cmp r4, #0
+; CHECK-NEON-NEXT:    movne r4, r2
+; CHECK-NEON-NEXT:    cmp r1, #0
+; CHECK-NEON-NEXT:    moveq r4, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movwmi r2, #1
-; CHECK-NEON-NEXT:    clz r3, r1
-; CHECK-NEON-NEXT:    cmp r2, #0
 ; CHECK-NEON-NEXT:    mvn r2, #0
-; CHECK-NEON-NEXT:    movne r2, r0
-; CHECK-NEON-NEXT:    lsr r3, r3, #5
-; CHECK-NEON-NEXT:    cmp r3, #0
-; CHECK-NEON-NEXT:    mov r5, #0
-; CHECK-NEON-NEXT:    movne r2, r0
-; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    movpl r1, r7
-; CHECK-NEON-NEXT:    clz r0, r1
+; CHECK-NEON-NEXT:    movmi r2, r0
+; CHECK-NEON-NEXT:    movpl r1, r6
+; CHECK-NEON-NEXT:    moveq r2, r0
+; CHECK-NEON-NEXT:    vmov r0, s16
 ; CHECK-NEON-NEXT:    cmp r1, #0
+; CHECK-NEON-NEXT:    mov r5, #0
 ; CHECK-NEON-NEXT:    movwgt r5, #1
 ; CHECK-NEON-NEXT:    cmp r5, #0
-; CHECK-NEON-NEXT:    lsr r0, r0, #5
-; CHECK-NEON-NEXT:    movne r5, r2
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    vmov r0, s16
 ; CHECK-NEON-NEXT:    movne r5, r2
+; CHECK-NEON-NEXT:    cmp r1, #0
+; CHECK-NEON-NEXT:    moveq r5, r2
 ; CHECK-NEON-NEXT:    bl __aeabi_h2f
 ; CHECK-NEON-NEXT:    bl __aeabi_f2lz
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    mov r2, #0
-; CHECK-NEON-NEXT:    movwmi r2, #1
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    clz r2, r1
-; CHECK-NEON-NEXT:    movne r9, r0
-; CHECK-NEON-NEXT:    vmov.32 d0[0], r5
-; CHECK-NEON-NEXT:    lsr r2, r2, #5
-; CHECK-NEON-NEXT:    cmp r2, #0
-; CHECK-NEON-NEXT:    movne r9, r0
+; CHECK-NEON-NEXT:    vmov.32 d1[0], r5
+; CHECK-NEON-NEXT:    movmi r9, r0
+; CHECK-NEON-NEXT:    movpl r1, r6
+; CHECK-NEON-NEXT:    moveq r9, r0
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    movpl r1, r7
-; CHECK-NEON-NEXT:    clz r0, r1
+; CHECK-NEON-NEXT:    movwgt r6, #1
+; CHECK-NEON-NEXT:    cmp r6, #0
+; CHECK-NEON-NEXT:    vmov.32 d0[0], r4
+; CHECK-NEON-NEXT:    movne r6, r9
 ; CHECK-NEON-NEXT:    cmp r1, #0
-; CHECK-NEON-NEXT:    movwgt r7, #1
-; CHECK-NEON-NEXT:    cmp r7, #0
-; CHECK-NEON-NEXT:    movne r7, r9
-; CHECK-NEON-NEXT:    lsr r0, r0, #5
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    clz r0, r4
-; CHECK-NEON-NEXT:    movne r7, r9
-; CHECK-NEON-NEXT:    vmov.32 d1[0], r7
-; CHECK-NEON-NEXT:    lsr r0, r0, #5
-; CHECK-NEON-NEXT:    cmp r0, #0
-; CHECK-NEON-NEXT:    movne r6, r10
-; CHECK-NEON-NEXT:    vmov.32 d1[1], r11
+; CHECK-NEON-NEXT:    vmov.32 d1[1], r7
+; CHECK-NEON-NEXT:    moveq r6, r9
 ; CHECK-NEON-NEXT:    vmov.32 d0[1], r6
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10}
-; CHECK-NEON-NEXT:    add sp, sp, #4
-; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 ;
 ; CHECK-FP16-LABEL: ustest_f16i32_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
-; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, lr}
 ; CHECK-FP16-NEXT:    .vsave {d8, d9}
 ; CHECK-FP16-NEXT:    vpush {d8, d9}
 ; CHECK-FP16-NEXT:    vmov.u16 r0, d0[3]
 ; CHECK-FP16-NEXT:    vorr d8, d0, d0
-; CHECK-FP16-NEXT:    vmov.u16 r4, d0[1]
 ; CHECK-FP16-NEXT:    vmov s0, r0
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    vmov.u16 r2, d8[0]
+; CHECK-FP16-NEXT:    vmov.u16 r2, d8[1]
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    clz r3, r1
+; CHECK-FP16-NEXT:    vmov.u16 r7, d8[0]
+; CHECK-FP16-NEXT:    mov r5, #0
+; CHECK-FP16-NEXT:    vmov.u16 r3, d8[2]
+; CHECK-FP16-NEXT:    movpl r1, r5
 ; CHECK-FP16-NEXT:    mov r6, #0
-; CHECK-FP16-NEXT:    mov r10, #0
-; CHECK-FP16-NEXT:    vmov s0, r4
-; CHECK-FP16-NEXT:    lsr r3, r3, #5
 ; CHECK-FP16-NEXT:    mvn r8, #0
-; CHECK-FP16-NEXT:    vmov s18, r2
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movwmi r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
+; CHECK-FP16-NEXT:    vmov s16, r2
 ; CHECK-FP16-NEXT:    mvn r2, #0
-; CHECK-FP16-NEXT:    movne r2, r0
-; CHECK-FP16-NEXT:    cmp r3, #0
-; CHECK-FP16-NEXT:    movne r2, r0
+; CHECK-FP16-NEXT:    movmi r2, r0
+; CHECK-FP16-NEXT:    vmov s0, r7
+; CHECK-FP16-NEXT:    moveq r2, r0
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movpl r1, r6
-; CHECK-FP16-NEXT:    clz r0, r1
+; CHECK-FP16-NEXT:    movwgt r6, #1
+; CHECK-FP16-NEXT:    cmp r6, #0
+; CHECK-FP16-NEXT:    movne r6, r2
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movwgt r10, #1
-; CHECK-FP16-NEXT:    cmp r10, #0
-; CHECK-FP16-NEXT:    movne r10, r2
-; CHECK-FP16-NEXT:    lsr r0, r0, #5
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    movne r10, r2
+; CHECK-FP16-NEXT:    vmov s18, r3
+; CHECK-FP16-NEXT:    moveq r6, r2
 ; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    mov r4, r1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r1, #0
 ; CHECK-FP16-NEXT:    vmov.f32 s0, s18
-; CHECK-FP16-NEXT:    movwmi r1, #1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    clz r1, r4
-; CHECK-FP16-NEXT:    mvn r9, #0
-; CHECK-FP16-NEXT:    movne r9, r0
-; CHECK-FP16-NEXT:    mov r5, #0
-; CHECK-FP16-NEXT:    lsr r1, r1, #5
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movne r9, r0
-; CHECK-FP16-NEXT:    cmp r4, #0
-; CHECK-FP16-NEXT:    movpl r4, r6
-; CHECK-FP16-NEXT:    cmp r4, #0
-; CHECK-FP16-NEXT:    movwgt r5, #1
-; CHECK-FP16-NEXT:    cmp r5, #0
-; CHECK-FP16-NEXT:    movne r5, r9
-; CHECK-FP16-NEXT:    bl __fixhfdi
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movwmi r2, #1
-; CHECK-FP16-NEXT:    clz r3, r1
-; CHECK-FP16-NEXT:    cmp r2, #0
 ; CHECK-FP16-NEXT:    mvn r2, #0
-; CHECK-FP16-NEXT:    movne r2, r0
-; CHECK-FP16-NEXT:    lsr r3, r3, #5
-; CHECK-FP16-NEXT:    cmp r3, #0
+; CHECK-FP16-NEXT:    movpl r1, r5
+; CHECK-FP16-NEXT:    movmi r2, r0
 ; CHECK-FP16-NEXT:    mov r7, #0
-; CHECK-FP16-NEXT:    movne r2, r0
+; CHECK-FP16-NEXT:    moveq r2, r0
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movpl r1, r6
-; CHECK-FP16-NEXT:    clz r0, r1
-; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    vmov.u16 r1, d8[2]
 ; CHECK-FP16-NEXT:    movwgt r7, #1
 ; CHECK-FP16-NEXT:    cmp r7, #0
 ; CHECK-FP16-NEXT:    movne r7, r2
-; CHECK-FP16-NEXT:    lsr r0, r0, #5
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    movne r7, r2
-; CHECK-FP16-NEXT:    vmov s0, r1
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    moveq r7, r2
 ; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    vmov.f32 s0, s16
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    mov r2, #0
-; CHECK-FP16-NEXT:    movwmi r2, #1
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    clz r2, r1
-; CHECK-FP16-NEXT:    movne r8, r0
-; CHECK-FP16-NEXT:    vmov.32 d0[0], r7
-; CHECK-FP16-NEXT:    lsr r2, r2, #5
-; CHECK-FP16-NEXT:    cmp r2, #0
-; CHECK-FP16-NEXT:    movne r8, r0
+; CHECK-FP16-NEXT:    mvn r2, #0
+; CHECK-FP16-NEXT:    movpl r1, r5
+; CHECK-FP16-NEXT:    movmi r2, r0
+; CHECK-FP16-NEXT:    mov r4, #0
+; CHECK-FP16-NEXT:    moveq r2, r0
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movpl r1, r6
-; CHECK-FP16-NEXT:    clz r0, r1
+; CHECK-FP16-NEXT:    movwgt r4, #1
+; CHECK-FP16-NEXT:    cmp r4, #0
+; CHECK-FP16-NEXT:    movne r4, r2
 ; CHECK-FP16-NEXT:    cmp r1, #0
-; CHECK-FP16-NEXT:    movwgt r6, #1
-; CHECK-FP16-NEXT:    cmp r6, #0
-; CHECK-FP16-NEXT:    movne r6, r8
-; CHECK-FP16-NEXT:    lsr r0, r0, #5
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    clz r0, r4
-; CHECK-FP16-NEXT:    movne r6, r8
-; CHECK-FP16-NEXT:    vmov.32 d1[0], r6
-; CHECK-FP16-NEXT:    lsr r0, r0, #5
-; CHECK-FP16-NEXT:    cmp r0, #0
-; CHECK-FP16-NEXT:    movne r5, r9
-; CHECK-FP16-NEXT:    vmov.32 d1[1], r10
+; CHECK-FP16-NEXT:    moveq r4, r2
+; CHECK-FP16-NEXT:    bl __fixhfdi
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[0], r4
+; CHECK-FP16-NEXT:    movmi r8, r0
+; CHECK-FP16-NEXT:    movpl r1, r5
+; CHECK-FP16-NEXT:    moveq r8, r0
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    movwgt r5, #1
+; CHECK-FP16-NEXT:    cmp r5, #0
+; CHECK-FP16-NEXT:    vmov.32 d0[0], r7
+; CHECK-FP16-NEXT:    movne r5, r8
+; CHECK-FP16-NEXT:    cmp r1, #0
+; CHECK-FP16-NEXT:    vmov.32 d1[1], r6
+; CHECK-FP16-NEXT:    moveq r5, r8
 ; CHECK-FP16-NEXT:    vmov.32 d0[1], r5
 ; CHECK-FP16-NEXT:    vpop {d8, d9}
-; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)

diff  --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
index f63477e038264..a5f9c511e0680 100644
--- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
+++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
@@ -15,19 +15,15 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) {
 ; CHECK-NEXT:    adds r6, r0, #1
 ; CHECK-NEXT:    adc r4, r4, #0
 ; CHECK-NEXT:    subs.w r0, lr, #-1
-; CHECK-NEXT:    sbcs r0, r12, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], lr, r6
-; CHECK-NEXT:    cset r0, lo
+; CHECK-NEXT:    sbcs r0, r12, #0
 ; CHECK-NEXT:    vmov q1[3], q1[1], r12, r4
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    csetm r12, ne
+; CHECK-NEXT:    csetm r12, lo
 ; CHECK-NEXT:    subs.w r6, r6, #-1
-; CHECK-NEXT:    sbcs r6, r4, #0
 ; CHECK-NEXT:    bfi r5, r12, #0, #8
-; CHECK-NEXT:    cset r6, lo
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    csetm r6, ne
+; CHECK-NEXT:    sbcs r6, r4, #0
+; CHECK-NEXT:    mov.w r0, #0
+; CHECK-NEXT:    csetm r6, lo
 ; CHECK-NEXT:    bfi r5, r6, #8, #8
 ; CHECK-NEXT:    vmsr p0, r5
 ; CHECK-NEXT:    vpsel q1, q1, q0
@@ -38,17 +34,13 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) {
 ; CHECK-NEXT:    subs r1, r6, r1
 ; CHECK-NEXT:    sbcs.w r1, r5, r4
 ; CHECK-NEXT:    vmov r5, r4, d1
-; CHECK-NEXT:    cset r1, lo
+; CHECK-NEXT:    csetm r1, lo
 ; CHECK-NEXT:    vldr d1, [sp, #16]
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
 ; CHECK-NEXT:    vmov r1, r6, d3
 ; CHECK-NEXT:    subs r1, r1, r5
 ; CHECK-NEXT:    sbcs.w r1, r6, r4
-; CHECK-NEXT:    cset r1, lo
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lo
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    add r0, sp, #24

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 35b418f692265..08bcba9b5cd7d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -23,37 +23,29 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    subs.w r3, r4, r12
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov.w r5, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    subs.w r0, r0, r12
 ; CHECK-NEXT:    sbcs r0, r1, #0
+; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
+; CHECK-NEXT:    mov.w r5, #0
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r5, r3, #0, #8
-; CHECK-NEXT:    cset r0, lt
 ; CHECK-NEXT:    mov.w r12, #-1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    adr r4, .LCPI0_1
 ; CHECK-NEXT:    bfi r5, r0, #8, #8
+; CHECK-NEXT:    movs r2, #0
 ; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    adr r4, .LCPI0_1
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vldrw.u32 q1, [r4]
 ; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    rsbs.w r0, r0, #-2147483648
 ; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r2, r0, #0, #8
 ; CHECK-NEXT:    rsbs.w r0, r3, #-2147483648
 ; CHECK-NEXT:    sbcs.w r0, r12, r5
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -99,18 +91,14 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r0
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    cset r3, lo
-; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r2, r3, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
+; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
+; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vpop {d8, d9}
@@ -141,35 +129,27 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r0
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    movs r5, #0
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    subs.w r0, r0, #-1
+; CHECK-NEXT:    mov.w r5, #0
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r5, r3, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r5, r0, #8, #8
+; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
 ; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    movs r2, #0
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vmov.i32 q1, #0x0
 ; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    sbcs.w r0, r2, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    rsbs r1, r3, #0
 ; CHECK-NEXT:    sbcs.w r1, r2, r5
 ; CHECK-NEXT:    bfi r2, r0, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -220,21 +200,17 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
 ; CHECK-NEXT:    sbcs r2, r6, #0
 ; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    movs r7, #0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
+; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmov r0, r4, d8
+; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
 ; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    vpsel q6, q0, q5
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    mov r5, r0
@@ -245,15 +221,11 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
 ; CHECK-NEXT:    sbcs r2, r6, #0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r7, r2, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r7, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r7
 ; CHECK-NEXT:    vpsel q0, q0, q5
@@ -368,20 +340,16 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    sbcs.w r2, r6, r5
 ; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    movs r3, #0
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    vmov.i32 q5, #0x0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmov.u16 r0, q4[0]
+; CHECK-NEXT:    vmov.i32 q5, #0x0
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
 ; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q6, q0, q5
 ; CHECK-NEXT:    bl __fixhfdi
@@ -393,15 +361,11 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
 ; CHECK-NEXT:    sbcs.w r2, r6, r5
 ; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
 ; CHECK-NEXT:    bfi r6, r2, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r6, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r6
 ; CHECK-NEXT:    vpsel q0, q0, q5
@@ -450,16 +414,12 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) {
 ; CHECK-NEXT:    subs r1, r1, r4
 ; CHECK-NEXT:    sbcs r1, r2, #0
 ; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #0, #8
 ; CHECK-NEXT:    subs r1, r3, r4
 ; CHECK-NEXT:    sbcs r1, r5, #0
 ; CHECK-NEXT:    adr r4, .LCPI9_1
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -468,15 +428,11 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    subs.w r1, lr, r1
 ; CHECK-NEXT:    sbcs.w r1, r12, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
 ; CHECK-NEXT:    subs.w r1, lr, r3
 ; CHECK-NEXT:    sbcs.w r1, r12, r5
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -527,15 +483,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    subs r0, r0, r4
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r5, r0, #0, #8
 ; CHECK-NEXT:    subs r0, r2, r4
 ; CHECK-NEXT:    sbcs r0, r3, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r5, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r5
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -573,15 +525,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) {
 ; CHECK-NEXT:    subs r1, r1, r4
 ; CHECK-NEXT:    sbcs r1, r2, #0
 ; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #0, #8
 ; CHECK-NEXT:    subs r1, r3, r4
 ; CHECK-NEXT:    sbcs r1, r5, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -590,15 +538,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    rsbs r1, r1, #0
 ; CHECK-NEXT:    sbcs.w r1, r0, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    rsbs r2, r3, #0
 ; CHECK-NEXT:    sbcs.w r2, r0, r5
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1299,37 +1243,29 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    subs.w r3, r4, r12
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov.w r5, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    subs.w r0, r0, r12
 ; CHECK-NEXT:    sbcs r0, r1, #0
+; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
+; CHECK-NEXT:    mov.w r5, #0
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r5, r3, #0, #8
-; CHECK-NEXT:    cset r0, lt
 ; CHECK-NEXT:    mov.w r12, #-1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    adr r4, .LCPI27_1
 ; CHECK-NEXT:    bfi r5, r0, #8, #8
+; CHECK-NEXT:    movs r2, #0
 ; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    adr r4, .LCPI27_1
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vldrw.u32 q1, [r4]
 ; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    rsbs.w r0, r0, #-2147483648
 ; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r2, r0, #0, #8
 ; CHECK-NEXT:    rsbs.w r0, r3, #-2147483648
 ; CHECK-NEXT:    sbcs.w r0, r12, r5
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1373,18 +1309,14 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r0
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    cset r3, lo
-; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r2, r3, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
+; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
+; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vpop {d8, d9}
@@ -1414,35 +1346,27 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r0
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    vmov q1[3], q1[1], r5, r1
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    movs r5, #0
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    subs.w r0, r0, #-1
+; CHECK-NEXT:    mov.w r5, #0
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r5, r3, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r5, r0, #8, #8
+; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
 ; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    movs r2, #0
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vmov.i32 q1, #0x0
 ; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    sbcs.w r0, r2, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    rsbs r1, r3, #0
 ; CHECK-NEXT:    sbcs.w r1, r2, r5
 ; CHECK-NEXT:    bfi r2, r0, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1489,21 +1413,17 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
 ; CHECK-NEXT:    sbcs r2, r6, #0
 ; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    movs r7, #0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
+; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmov r0, r4, d8
+; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
 ; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    vpsel q6, q0, q5
 ; CHECK-NEXT:    bl __aeabi_f2ulz
 ; CHECK-NEXT:    mov r5, r0
@@ -1514,15 +1434,11 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
 ; CHECK-NEXT:    sbcs r2, r6, #0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r7, r2, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r7, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r7
 ; CHECK-NEXT:    vpsel q0, q0, q5
@@ -1631,20 +1547,16 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    sbcs.w r2, r6, r5
 ; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    movs r3, #0
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    vmov.i32 q5, #0x0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmov.u16 r0, q4[0]
+; CHECK-NEXT:    vmov.i32 q5, #0x0
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
 ; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q6, q0, q5
 ; CHECK-NEXT:    bl __fixhfdi
@@ -1656,15 +1568,11 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
 ; CHECK-NEXT:    sbcs.w r2, r6, r5
 ; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
 ; CHECK-NEXT:    bfi r6, r2, #0, #8
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r6, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r6
 ; CHECK-NEXT:    vpsel q0, q0, q5
@@ -1711,16 +1619,12 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
 ; CHECK-NEXT:    subs r1, r1, r4
 ; CHECK-NEXT:    sbcs r1, r2, #0
 ; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #0, #8
 ; CHECK-NEXT:    subs r1, r3, r4
 ; CHECK-NEXT:    sbcs r1, r5, #0
 ; CHECK-NEXT:    adr r4, .LCPI36_1
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1729,15 +1633,11 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    subs.w r1, lr, r1
 ; CHECK-NEXT:    sbcs.w r1, r12, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
 ; CHECK-NEXT:    subs.w r1, lr, r3
 ; CHECK-NEXT:    sbcs.w r1, r12, r5
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1786,15 +1686,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r2, r3, d1
 ; CHECK-NEXT:    subs r0, r0, r4
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r5, r0, #0, #8
 ; CHECK-NEXT:    subs r0, r2, r4
 ; CHECK-NEXT:    sbcs r0, r3, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r5, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r5
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1831,15 +1727,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
 ; CHECK-NEXT:    subs r1, r1, r4
 ; CHECK-NEXT:    sbcs r1, r2, #0
 ; CHECK-NEXT:    mov.w r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #0, #8
 ; CHECK-NEXT:    subs r1, r3, r4
 ; CHECK-NEXT:    sbcs r1, r5, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r2, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r2
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1848,15 +1740,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
 ; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    rsbs r1, r1, #0
 ; CHECK-NEXT:    sbcs.w r1, r0, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    rsbs r2, r3, #0
 ; CHECK-NEXT:    sbcs.w r2, r0, r5
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
index f92c575b7d221..c3d4276c712c6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -95,55 +95,47 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
 ; CHECK-LE-NEXT:    sub sp, #4
 ; CHECK-LE-NEXT:    ldrd r12, lr, [r1]
 ; CHECK-LE-NEXT:    movs r3, #0
-; CHECK-LE-NEXT:    @ implicit-def: $q0
+; CHECK-LE-NEXT:    @ implicit-def: $q1
 ; CHECK-LE-NEXT:    rsbs.w r1, r12, #0
-; CHECK-LE-NEXT:    vmov q1[2], q1[0], r12, lr
+; CHECK-LE-NEXT:    vmov q0[2], q0[0], r12, lr
 ; CHECK-LE-NEXT:    sbcs.w r1, r3, r12, asr #31
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    rsbs.w r4, lr, #0
 ; CHECK-LE-NEXT:    sbcs.w r4, r3, lr, asr #31
 ; CHECK-LE-NEXT:    bfi r3, r1, #0, #1
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    bfi r3, r1, #1, #1
 ; CHECK-LE-NEXT:    lsls r1, r3, #31
 ; CHECK-LE-NEXT:    itt ne
 ; CHECK-LE-NEXT:    ldrne r1, [r2]
-; CHECK-LE-NEXT:    vmovne.32 q0[0], r1
+; CHECK-LE-NEXT:    vmovne.32 q1[0], r1
 ; CHECK-LE-NEXT:    lsls r1, r3, #30
 ; CHECK-LE-NEXT:    itt mi
 ; CHECK-LE-NEXT:    ldrmi r1, [r2, #4]
-; CHECK-LE-NEXT:    vmovmi.32 q0[2], r1
-; CHECK-LE-NEXT:    vmov r2, s2
+; CHECK-LE-NEXT:    vmovmi.32 q1[2], r1
+; CHECK-LE-NEXT:    vmov r2, s6
 ; CHECK-LE-NEXT:    movs r1, #0
-; CHECK-LE-NEXT:    vmov r3, s4
-; CHECK-LE-NEXT:    vmov r4, s0
-; CHECK-LE-NEXT:    vmov q0[2], q0[0], r4, r2
+; CHECK-LE-NEXT:    vmov r3, s0
+; CHECK-LE-NEXT:    vmov r4, s4
+; CHECK-LE-NEXT:    vmov q1[2], q1[0], r4, r2
 ; CHECK-LE-NEXT:    rsbs r5, r3, #0
 ; CHECK-LE-NEXT:    asr.w r12, r2, #31
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r3, asr #31
-; CHECK-LE-NEXT:    vmov r3, s6
-; CHECK-LE-NEXT:    cset r2, lt
+; CHECK-LE-NEXT:    vmov r3, s2
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    asr.w lr, r4, #31
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    vmov q0[3], q0[1], lr, r12
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    vmov q1[3], q1[1], lr, r12
 ; CHECK-LE-NEXT:    rsbs r5, r3, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, r3, asr #31
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    it ne
-; CHECK-LE-NEXT:    vstrne d0, [r0]
+; CHECK-LE-NEXT:    vstrne d2, [r0]
 ; CHECK-LE-NEXT:    lsls r1, r1, #30
 ; CHECK-LE-NEXT:    it mi
-; CHECK-LE-NEXT:    vstrmi d1, [r0, #8]
+; CHECK-LE-NEXT:    vstrmi d3, [r0, #8]
 ; CHECK-LE-NEXT:    add sp, #4
 ; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
 ;
@@ -157,17 +149,13 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    mov.w r1, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, lr, asr #31
-; CHECK-BE-NEXT:    cset r3, lt
 ; CHECK-BE-NEXT:    vmov q0[3], q0[1], r12, lr
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    @ implicit-def: $q2
-; CHECK-BE-NEXT:    csetm lr, ne
+; CHECK-BE-NEXT:    csetm lr, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, r12, #0
+; CHECK-BE-NEXT:    @ implicit-def: $q2
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12, asr #31
 ; CHECK-BE-NEXT:    bfi r1, lr, #0, #1
-; CHECK-BE-NEXT:    cset r3, lt
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    csetm r3, ne
+; CHECK-BE-NEXT:    csetm r3, lt
 ; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
 ; CHECK-BE-NEXT:    lsls r3, r1, #30
 ; CHECK-BE-NEXT:    bpl .LBB5_2
@@ -198,16 +186,12 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
 ; CHECK-BE-NEXT:    sbcs.w r4, r1, r4, asr #31
 ; CHECK-BE-NEXT:    vmov q1[3], q1[1], r3, r2
 ; CHECK-BE-NEXT:    vmov r3, s9
-; CHECK-BE-NEXT:    cset r2, lt
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    vrev64.32 q0, q1
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
 ; CHECK-BE-NEXT:    rsbs r5, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r3, asr #31
 ; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    it mi
@@ -239,15 +223,11 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-LE-NEXT:    rsbs.w r1, r12, #0
 ; CHECK-LE-NEXT:    vmov q1[2], q1[0], r12, lr
 ; CHECK-LE-NEXT:    sbcs.w r1, r3, r12, asr #31
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    rsbs.w r4, lr, #0
 ; CHECK-LE-NEXT:    sbcs.w r4, r3, lr, asr #31
 ; CHECK-LE-NEXT:    bfi r3, r1, #0, #1
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    bfi r3, r1, #1, #1
 ; CHECK-LE-NEXT:    lsls r1, r3, #31
 ; CHECK-LE-NEXT:    itt ne
@@ -266,17 +246,13 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-LE-NEXT:    asr.w r12, r2, #31
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r3, asr #31
 ; CHECK-LE-NEXT:    vmov r3, s6
-; CHECK-LE-NEXT:    cset r2, lt
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    asr.w lr, r4, #31
-; CHECK-LE-NEXT:    cmp r2, #0
 ; CHECK-LE-NEXT:    vmov q0[3], q0[1], lr, r12
-; CHECK-LE-NEXT:    csetm r2, ne
 ; CHECK-LE-NEXT:    rsbs r5, r3, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, r3, asr #31
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    itt ne
@@ -299,17 +275,13 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    mov.w r1, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, lr, asr #31
-; CHECK-BE-NEXT:    cset r3, lt
 ; CHECK-BE-NEXT:    vmov q0[3], q0[1], r12, lr
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    @ implicit-def: $q2
-; CHECK-BE-NEXT:    csetm lr, ne
+; CHECK-BE-NEXT:    csetm lr, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, r12, #0
+; CHECK-BE-NEXT:    @ implicit-def: $q2
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12, asr #31
 ; CHECK-BE-NEXT:    bfi r1, lr, #0, #1
-; CHECK-BE-NEXT:    cset r3, lt
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    csetm r3, ne
+; CHECK-BE-NEXT:    csetm r3, lt
 ; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
 ; CHECK-BE-NEXT:    lsls r3, r1, #30
 ; CHECK-BE-NEXT:    bpl .LBB6_2
@@ -340,16 +312,12 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-BE-NEXT:    sbcs.w r4, r1, r4, asr #31
 ; CHECK-BE-NEXT:    vmov q1[3], q1[1], r3, r2
 ; CHECK-BE-NEXT:    vmov r3, s9
-; CHECK-BE-NEXT:    cset r2, lt
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    vrev64.32 q0, q1
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
 ; CHECK-BE-NEXT:    rsbs r5, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r3, asr #31
 ; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    itt mi
@@ -384,15 +352,11 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
 ; CHECK-LE-NEXT:    rsbs.w r1, r12, #0
 ; CHECK-LE-NEXT:    vmov q1[2], q1[0], r12, lr
 ; CHECK-LE-NEXT:    sbcs.w r1, r3, r12, asr #31
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    rsbs.w r4, lr, #0
 ; CHECK-LE-NEXT:    sbcs.w r4, r3, lr, asr #31
 ; CHECK-LE-NEXT:    bfi r3, r1, #0, #1
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    bfi r3, r1, #1, #1
 ; CHECK-LE-NEXT:    lsls r1, r3, #31
 ; CHECK-LE-NEXT:    itt ne
@@ -408,15 +372,11 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
 ; CHECK-LE-NEXT:    rsbs r3, r2, #0
 ; CHECK-LE-NEXT:    vmov r3, s6
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r2, asr #31
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    rsbs r4, r3, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, r3, asr #31
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    it ne
@@ -437,51 +397,43 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    mov.w r1, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, lr, asr #31
-; CHECK-BE-NEXT:    cset r3, lt
-; CHECK-BE-NEXT:    vmov q0[3], q0[1], r12, lr
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    @ implicit-def: $q1
-; CHECK-BE-NEXT:    csetm lr, ne
+; CHECK-BE-NEXT:    vmov q1[3], q1[1], r12, lr
+; CHECK-BE-NEXT:    csetm lr, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, r12, #0
+; CHECK-BE-NEXT:    @ implicit-def: $q0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12, asr #31
 ; CHECK-BE-NEXT:    bfi r1, lr, #0, #1
-; CHECK-BE-NEXT:    cset r3, lt
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    csetm r3, ne
+; CHECK-BE-NEXT:    csetm r3, lt
 ; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
 ; CHECK-BE-NEXT:    lsls r3, r1, #30
 ; CHECK-BE-NEXT:    bpl .LBB7_2
 ; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
 ; CHECK-BE-NEXT:    ldr r3, [r2]
 ; CHECK-BE-NEXT:    vmov.32 q2[1], r3
-; CHECK-BE-NEXT:    vrev64.32 q1, q2
+; CHECK-BE-NEXT:    vrev64.32 q0, q2
 ; CHECK-BE-NEXT:  .LBB7_2: @ %else
-; CHECK-BE-NEXT:    vrev64.32 q2, q0
+; CHECK-BE-NEXT:    vrev64.32 q2, q1
 ; CHECK-BE-NEXT:    lsls r1, r1, #31
 ; CHECK-BE-NEXT:    beq .LBB7_4
 ; CHECK-BE-NEXT:  @ %bb.3: @ %cond.load1
 ; CHECK-BE-NEXT:    ldr r1, [r2, #4]
-; CHECK-BE-NEXT:    vrev64.32 q0, q1
-; CHECK-BE-NEXT:    vmov.32 q0[3], r1
 ; CHECK-BE-NEXT:    vrev64.32 q1, q0
+; CHECK-BE-NEXT:    vmov.32 q1[3], r1
+; CHECK-BE-NEXT:    vrev64.32 q0, q1
 ; CHECK-BE-NEXT:  .LBB7_4: @ %else2
 ; CHECK-BE-NEXT:    vrev64.32 q3, q2
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    vmov r2, s15
-; CHECK-BE-NEXT:    vmov.i64 q0, #0xffffffff
-; CHECK-BE-NEXT:    vand q0, q1, q0
+; CHECK-BE-NEXT:    vmov.i64 q1, #0xffffffff
+; CHECK-BE-NEXT:    vand q0, q0, q1
 ; CHECK-BE-NEXT:    rsbs r3, r2, #0
 ; CHECK-BE-NEXT:    vmov r3, s13
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r2, asr #31
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r12, ne
+; CHECK-BE-NEXT:    csetm r12, lt
 ; CHECK-BE-NEXT:    rsbs r2, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r3, asr #31
 ; CHECK-BE-NEXT:    bfi r1, r12, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    it mi
@@ -514,15 +466,11 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-LE-NEXT:    rsbs.w r1, r12, #0
 ; CHECK-LE-NEXT:    vmov q1[2], q1[0], r12, lr
 ; CHECK-LE-NEXT:    sbcs.w r1, r3, r12, asr #31
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    rsbs.w r4, lr, #0
 ; CHECK-LE-NEXT:    sbcs.w r4, r3, lr, asr #31
 ; CHECK-LE-NEXT:    bfi r3, r1, #0, #1
-; CHECK-LE-NEXT:    cset r1, lt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    csetm r1, lt
 ; CHECK-LE-NEXT:    bfi r3, r1, #1, #1
 ; CHECK-LE-NEXT:    lsls r1, r3, #31
 ; CHECK-LE-NEXT:    itt ne
@@ -538,15 +486,11 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-LE-NEXT:    rsbs r3, r2, #0
 ; CHECK-LE-NEXT:    vmov r3, s6
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r2, asr #31
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    rsbs r4, r3, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, r3, asr #31
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    itt ne
@@ -569,51 +513,43 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    mov.w r1, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, lr, asr #31
-; CHECK-BE-NEXT:    cset r3, lt
-; CHECK-BE-NEXT:    vmov q0[3], q0[1], r12, lr
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    @ implicit-def: $q1
-; CHECK-BE-NEXT:    csetm lr, ne
+; CHECK-BE-NEXT:    vmov q1[3], q1[1], r12, lr
+; CHECK-BE-NEXT:    csetm lr, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, r12, #0
+; CHECK-BE-NEXT:    @ implicit-def: $q0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12, asr #31
 ; CHECK-BE-NEXT:    bfi r1, lr, #0, #1
-; CHECK-BE-NEXT:    cset r3, lt
-; CHECK-BE-NEXT:    cmp r3, #0
-; CHECK-BE-NEXT:    csetm r3, ne
+; CHECK-BE-NEXT:    csetm r3, lt
 ; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
 ; CHECK-BE-NEXT:    lsls r3, r1, #30
 ; CHECK-BE-NEXT:    bpl .LBB8_2
 ; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
 ; CHECK-BE-NEXT:    ldr r3, [r2]
 ; CHECK-BE-NEXT:    vmov.32 q2[1], r3
-; CHECK-BE-NEXT:    vrev64.32 q1, q2
+; CHECK-BE-NEXT:    vrev64.32 q0, q2
 ; CHECK-BE-NEXT:  .LBB8_2: @ %else
-; CHECK-BE-NEXT:    vrev64.32 q2, q0
+; CHECK-BE-NEXT:    vrev64.32 q2, q1
 ; CHECK-BE-NEXT:    lsls r1, r1, #31
 ; CHECK-BE-NEXT:    beq .LBB8_4
 ; CHECK-BE-NEXT:  @ %bb.3: @ %cond.load1
 ; CHECK-BE-NEXT:    ldr r1, [r2, #4]
-; CHECK-BE-NEXT:    vrev64.32 q0, q1
-; CHECK-BE-NEXT:    vmov.32 q0[3], r1
 ; CHECK-BE-NEXT:    vrev64.32 q1, q0
+; CHECK-BE-NEXT:    vmov.32 q1[3], r1
+; CHECK-BE-NEXT:    vrev64.32 q0, q1
 ; CHECK-BE-NEXT:  .LBB8_4: @ %else2
 ; CHECK-BE-NEXT:    vrev64.32 q3, q2
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    vmov r2, s15
-; CHECK-BE-NEXT:    vmov.i64 q0, #0xffffffff
-; CHECK-BE-NEXT:    vand q0, q1, q0
+; CHECK-BE-NEXT:    vmov.i64 q1, #0xffffffff
+; CHECK-BE-NEXT:    vand q0, q0, q1
 ; CHECK-BE-NEXT:    rsbs r3, r2, #0
 ; CHECK-BE-NEXT:    vmov r3, s13
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r2, asr #31
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r12, ne
+; CHECK-BE-NEXT:    csetm r12, lt
 ; CHECK-BE-NEXT:    rsbs r2, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r3, asr #31
 ; CHECK-BE-NEXT:    bfi r1, r12, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    itt mi

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
index 2adaf7cf577a7..afcea7901ccf7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -1759,15 +1759,11 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2
 ; CHECK-LE-NEXT:    vmov r12, lr, d1
 ; CHECK-LE-NEXT:    rsbs r2, r2, #0
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r3
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, lr
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    beq .LBB49_2
@@ -1801,15 +1797,11 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2
 ; CHECK-BE-NEXT:    vmov r12, lr, d2
 ; CHECK-BE-NEXT:    rsbs r3, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r2
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12
 ; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    bpl .LBB49_2
@@ -1848,15 +1840,11 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des
 ; CHECK-LE-NEXT:    vmov r12, lr, d3
 ; CHECK-LE-NEXT:    rsbs r2, r2, #0
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r3
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, lr
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    beq .LBB50_2
@@ -1890,15 +1878,11 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des
 ; CHECK-BE-NEXT:    vmov r12, lr, d0
 ; CHECK-BE-NEXT:    rsbs r3, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r2
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12
 ; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    bpl .LBB50_2

diff  --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll
index 6c598cf71b2e1..29b29859e8629 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll
@@ -944,15 +944,11 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) {
 ; CHECK-LE-NEXT:    vmov r12, lr, d1
 ; CHECK-LE-NEXT:    rsbs r2, r2, #0
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r3
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, lr
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    it ne
@@ -975,15 +971,11 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) {
 ; CHECK-BE-NEXT:    vmov r12, lr, d2
 ; CHECK-BE-NEXT:    rsbs r3, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r2
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12
 ; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    it mi
@@ -1011,15 +1003,11 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a,
 ; CHECK-LE-NEXT:    vmov r12, lr, d3
 ; CHECK-LE-NEXT:    rsbs r2, r2, #0
 ; CHECK-LE-NEXT:    sbcs.w r2, r1, r3
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
 ; CHECK-LE-NEXT:    sbcs.w r3, r1, lr
 ; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-LE-NEXT:    cset r2, lt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, lt
 ; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    it ne
@@ -1042,15 +1030,11 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a,
 ; CHECK-BE-NEXT:    vmov r12, lr, d4
 ; CHECK-BE-NEXT:    rsbs r3, r3, #0
 ; CHECK-BE-NEXT:    sbcs.w r2, r1, r2
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
 ; CHECK-BE-NEXT:    sbcs.w r3, r1, r12
 ; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
-; CHECK-BE-NEXT:    cset r2, lt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, lt
 ; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    it mi
@@ -1216,33 +1200,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float>
 ; CHECK-LE-NEXT:    .pad #4
 ; CHECK-LE-NEXT:    sub sp, #4
 ; CHECK-LE-NEXT:    vcmp.f32 s0, #0
-; CHECK-LE-NEXT:    vcvtb.f16.f32 s4, s0
+; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-LE-NEXT:    vcmp.f32 s1, #0
-; CHECK-LE-NEXT:    vcvtt.f16.f32 s4, s1
+; CHECK-LE-NEXT:    vcvtb.f16.f32 s4, s0
 ; CHECK-LE-NEXT:    vcvtb.f16.f32 s6, s2
+; CHECK-LE-NEXT:    vcvtt.f16.f32 s4, s1
 ; CHECK-LE-NEXT:    vcvtt.f16.f32 s5, s3
-; CHECK-LE-NEXT:    cset r1, gt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    mov.w r1, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
 ; CHECK-LE-NEXT:    vcmp.f32 s2, #0
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    vcmp.f32 s3, #0
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-LE-NEXT:    bfi r1, r2, #2, #1
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    bne .LBB25_5
@@ -1282,6 +1258,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float>
 ; CHECK-BE-NEXT:    .pad #4
 ; CHECK-BE-NEXT:    sub sp, #4
 ; CHECK-BE-NEXT:    vrev64.32 q1, q0
+; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-BE-NEXT:    vcvtb.f16.f32 s0, s4
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1289,27 +1266,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float>
 ; CHECK-BE-NEXT:    vcvtt.f16.f32 s0, s5
 ; CHECK-BE-NEXT:    vcvtb.f16.f32 s2, s6
 ; CHECK-BE-NEXT:    vcvtt.f16.f32 s1, s7
-; CHECK-BE-NEXT:    cset r1, gt
-; CHECK-BE-NEXT:    cmp r1, #0
-; CHECK-BE-NEXT:    mov.w r1, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
 ; CHECK-BE-NEXT:    vcmp.f32 s5, #0
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    vcmp.f32 s4, #0
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-BE-NEXT:    bfi r1, r2, #2, #1
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #28
 ; CHECK-BE-NEXT:    bmi .LBB25_5
@@ -1356,33 +1324,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float>
 ; CHECK-LE-NEXT:    .pad #4
 ; CHECK-LE-NEXT:    sub sp, #4
 ; CHECK-LE-NEXT:    vcmp.f32 s0, #0
-; CHECK-LE-NEXT:    vcvtb.f16.f32 s4, s0
+; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-LE-NEXT:    vcmp.f32 s1, #0
-; CHECK-LE-NEXT:    vcvtt.f16.f32 s4, s1
+; CHECK-LE-NEXT:    vcvtb.f16.f32 s4, s0
 ; CHECK-LE-NEXT:    vcvtb.f16.f32 s6, s2
+; CHECK-LE-NEXT:    vcvtt.f16.f32 s4, s1
 ; CHECK-LE-NEXT:    vcvtt.f16.f32 s5, s3
-; CHECK-LE-NEXT:    cset r1, gt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    mov.w r1, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
 ; CHECK-LE-NEXT:    vcmp.f32 s2, #0
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    vcmp.f32 s3, #0
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-LE-NEXT:    bfi r1, r2, #2, #1
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    bne .LBB26_5
@@ -1422,6 +1382,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float>
 ; CHECK-BE-NEXT:    .pad #4
 ; CHECK-BE-NEXT:    sub sp, #4
 ; CHECK-BE-NEXT:    vrev64.32 q1, q0
+; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-BE-NEXT:    vcvtb.f16.f32 s0, s4
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1429,27 +1390,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float>
 ; CHECK-BE-NEXT:    vcvtt.f16.f32 s0, s5
 ; CHECK-BE-NEXT:    vcvtb.f16.f32 s2, s6
 ; CHECK-BE-NEXT:    vcvtt.f16.f32 s1, s7
-; CHECK-BE-NEXT:    cset r1, gt
-; CHECK-BE-NEXT:    cmp r1, #0
-; CHECK-BE-NEXT:    mov.w r1, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
 ; CHECK-BE-NEXT:    vcmp.f32 s5, #0
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    vcmp.f32 s4, #0
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-BE-NEXT:    bfi r1, r2, #2, #1
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #28
 ; CHECK-BE-NEXT:    bmi .LBB26_5
@@ -1496,33 +1448,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float>
 ; CHECK-LE-NEXT:    .pad #20
 ; CHECK-LE-NEXT:    sub sp, #20
 ; CHECK-LE-NEXT:    vcmp.f32 s0, #0
-; CHECK-LE-NEXT:    vcvtb.f16.f32 s4, s0
+; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-LE-NEXT:    vcmp.f32 s1, #0
-; CHECK-LE-NEXT:    vcvtt.f16.f32 s4, s1
+; CHECK-LE-NEXT:    vcvtb.f16.f32 s4, s0
 ; CHECK-LE-NEXT:    vcvtb.f16.f32 s6, s2
+; CHECK-LE-NEXT:    vcvtt.f16.f32 s4, s1
 ; CHECK-LE-NEXT:    vcvtt.f16.f32 s5, s3
-; CHECK-LE-NEXT:    cset r1, gt
-; CHECK-LE-NEXT:    cmp r1, #0
-; CHECK-LE-NEXT:    mov.w r1, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
 ; CHECK-LE-NEXT:    vcmp.f32 s2, #0
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-LE-NEXT:    vcmp.f32 s3, #0
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-LE-NEXT:    bfi r1, r2, #2, #1
-; CHECK-LE-NEXT:    cset r2, gt
-; CHECK-LE-NEXT:    cmp r2, #0
-; CHECK-LE-NEXT:    csetm r2, ne
+; CHECK-LE-NEXT:    csetm r2, gt
 ; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
 ; CHECK-LE-NEXT:    lsls r2, r1, #31
 ; CHECK-LE-NEXT:    bne .LBB27_5
@@ -1570,6 +1514,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float>
 ; CHECK-BE-NEXT:    .pad #20
 ; CHECK-BE-NEXT:    sub sp, #20
 ; CHECK-BE-NEXT:    vrev64.32 q1, q0
+; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    vcmp.f32 s7, #0
 ; CHECK-BE-NEXT:    vcvtb.f16.f32 s0, s4
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
@@ -1577,27 +1522,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float>
 ; CHECK-BE-NEXT:    vcvtt.f16.f32 s0, s5
 ; CHECK-BE-NEXT:    vcvtb.f16.f32 s2, s6
 ; CHECK-BE-NEXT:    vcvtt.f16.f32 s1, s7
-; CHECK-BE-NEXT:    cset r1, gt
-; CHECK-BE-NEXT:    cmp r1, #0
-; CHECK-BE-NEXT:    mov.w r1, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
 ; CHECK-BE-NEXT:    vcmp.f32 s5, #0
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
 ; CHECK-BE-NEXT:    vcmp.f32 s4, #0
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-BE-NEXT:    bfi r1, r2, #2, #1
-; CHECK-BE-NEXT:    cset r2, gt
-; CHECK-BE-NEXT:    cmp r2, #0
-; CHECK-BE-NEXT:    csetm r2, ne
+; CHECK-BE-NEXT:    csetm r2, gt
 ; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
 ; CHECK-BE-NEXT:    lsls r2, r1, #28
 ; CHECK-BE-NEXT:    bmi .LBB27_5

diff  --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index 92355a8256eb4..d536e6b72ac9c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -47,15 +47,11 @@ define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    subs.w r0, r4, r12
 ; CHECK-NEXT:    sbcs.w r0, r5, lr
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -111,15 +107,11 @@ define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    subs.w r0, r4, r12
 ; CHECK-NEXT:    sbcs.w r0, r5, lr
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -176,15 +168,11 @@ define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    subs.w r0, r4, r12
 ; CHECK-NEXT:    sbcs.w r0, r5, lr
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -240,15 +228,11 @@ define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    subs.w r0, r4, r12
 ; CHECK-NEXT:    sbcs.w r0, r5, lr
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -329,18 +313,12 @@ define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x do
 ; CHECK-NEXT:    vmov r12, r1, d9
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    vmov r2, r3, d11
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r4, #0
 ; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    bfi r4, r0, #0, #8
 ; CHECK-NEXT:    mov r0, r12
 ; CHECK-NEXT:    bl __aeabi_dcmpgt
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne r0, #1
-; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r4, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
index 2b4f3d66fe64f..892be9a433073 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
@@ -153,21 +153,17 @@ define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    subs r1, r1, r3
 ; CHECK-NEXT:    sbcs.w r1, lr, r3, asr #31
 ; CHECK-NEXT:    asr.w r5, r3, #31
-; CHECK-NEXT:    cset r1, lt
 ; CHECK-NEXT:    asr.w r12, r0, #31
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    subs r0, r0, r2
+; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r0, r12, r2, asr #31
 ; CHECK-NEXT:    bfi r3, r1, #0, #8
-; CHECK-NEXT:    cset r0, lt
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    asrs r4, r2, #31
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmov q1[3], q1[1], lr, r12
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    vmov q0[3], q0[1], r5, r4
-; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    pop {r4, r5, r7, pc}
@@ -233,17 +229,13 @@ define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lt
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    vmov r3, r2, d3
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d1
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -265,17 +257,13 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r1, r2, r12
 ; CHECK-NEXT:    vmov lr, r12, d3
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
+; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r4, d7
 ; CHECK-NEXT:    subs.w r2, r2, lr
 ; CHECK-NEXT:    sbcs.w r2, r4, r12
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    bfi r3, r2, #8, #8
 ; CHECK-NEXT:    vmov r2, r12, d0
 ; CHECK-NEXT:    vmsr p0, r3
@@ -285,16 +273,12 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
 ; CHECK-NEXT:    vmov r4, r3, d5
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    bfi r1, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r12, d1
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    bfi r1, r2, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q2
@@ -453,17 +437,13 @@ define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    vmov r3, r2, d3
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d1
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -530,17 +510,13 @@ define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    vmov r3, r2, d3
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d1
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -562,17 +538,13 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r1, r2, r12
 ; CHECK-NEXT:    vmov lr, r12, d3
-; CHECK-NEXT:    cset r1, lo
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
+; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r4, d7
 ; CHECK-NEXT:    subs.w r2, r2, lr
 ; CHECK-NEXT:    sbcs.w r2, r4, r12
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    bfi r3, r2, #8, #8
 ; CHECK-NEXT:    vmov r2, r12, d0
 ; CHECK-NEXT:    vmsr p0, r3
@@ -582,16 +554,12 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
 ; CHECK-NEXT:    vmov r4, r3, d5
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    bfi r1, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r12, d1
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    bfi r1, r2, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q2
@@ -747,29 +715,25 @@ define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    vmov r0, s6
 ; CHECK-NEXT:    vmov r1, s4
 ; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
-; CHECK-NEXT:    vmov lr, s2
 ; CHECK-NEXT:    asrs r2, r0, #31
 ; CHECK-NEXT:    asrs r3, r1, #31
 ; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
 ; CHECK-NEXT:    vmov r3, s0
-; CHECK-NEXT:    vmov q0[2], q0[0], r3, lr
-; CHECK-NEXT:    asr.w r12, lr, #31
-; CHECK-NEXT:    asrs r2, r3, #31
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
+; CHECK-NEXT:    asr.w lr, r3, #31
 ; CHECK-NEXT:    subs r3, r3, r1
-; CHECK-NEXT:    sbcs.w r1, r2, r1, asr #31
-; CHECK-NEXT:    vmov q0[3], q0[1], r2, r12
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    bfi r2, r1, #0, #8
-; CHECK-NEXT:    subs.w r1, lr, r0
+; CHECK-NEXT:    sbcs.w r1, lr, r1, asr #31
+; CHECK-NEXT:    mov.w r3, #0
+; CHECK-NEXT:    csetm r1, lt
+; CHECK-NEXT:    asr.w r12, r2, #31
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    subs r1, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r12, r0, asr #31
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    bfi r2, r0, #8, #8
-; CHECK-NEXT:    vmsr p0, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], lr, r12
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r3, r0, #8, #8
+; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    pop {r7, pc}
   %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
@@ -834,17 +798,13 @@ define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lt
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    vmov r3, r2, d1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d3
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -866,17 +826,13 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r1, r2, r12
 ; CHECK-NEXT:    vmov lr, r12, d7
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
+; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r4, d3
 ; CHECK-NEXT:    subs.w r2, r2, lr
 ; CHECK-NEXT:    sbcs.w r2, r4, r12
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    bfi r3, r2, #8, #8
 ; CHECK-NEXT:    vmov r2, r12, d4
 ; CHECK-NEXT:    vmsr p0, r3
@@ -886,16 +842,12 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
 ; CHECK-NEXT:    vmov r4, r3, d1
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    bfi r1, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r12, d5
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    bfi r1, r2, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q2
@@ -1054,17 +1006,13 @@ define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    vmov r3, r2, d1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d3
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1131,17 +1079,13 @@ define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    vmov r3, r2, d1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d3
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -1163,17 +1107,13 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs.w r1, r2, r12
 ; CHECK-NEXT:    vmov lr, r12, d7
-; CHECK-NEXT:    cset r1, lo
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
+; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r4, d3
 ; CHECK-NEXT:    subs.w r2, r2, lr
 ; CHECK-NEXT:    sbcs.w r2, r4, r12
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    bfi r3, r2, #8, #8
 ; CHECK-NEXT:    vmov r2, r12, d4
 ; CHECK-NEXT:    vmsr p0, r3
@@ -1183,16 +1123,12 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p)
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
 ; CHECK-NEXT:    vmov r4, r3, d1
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    bfi r1, r2, #0, #8
 ; CHECK-NEXT:    vmov r2, r12, d5
 ; CHECK-NEXT:    subs r2, r4, r2
 ; CHECK-NEXT:    sbcs.w r2, r3, r12
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    bfi r1, r2, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q2

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
index e919891c446e5..ea7a26ee3a9ee 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
@@ -79,14 +79,10 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
 ; CHECK-NEXT:    vmov r2, r3, d0
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    rsbs r1, r2, #0
 ; CHECK-NEXT:    sbcs.w r1, r12, r3
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
 ; CHECK-NEXT:    bx lr
@@ -105,24 +101,22 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) {
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI6_0
-; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    vmov r4, r5, d0
 ; CHECK-NEXT:    mov r2, r4
 ; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpeq
 ; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    mov r2, r4
 ; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpeq
-; CHECK-NEXT:    clz r0, r0
-; CHECK-NEXT:    clz r1, r6
-; CHECK-NEXT:    lsrs r0, r0, #5
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    lsrs r1, r1, #5
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
+; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-NEXT:    .p2align 3
@@ -218,14 +212,10 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
 ; CHECK-NEXT:    rsbs r0, r0, #0
 ; CHECK-NEXT:    sbcs.w r0, r12, r1
 ; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    cset r0, ne
 ; CHECK-NEXT:    rsbs r1, r2, #0
 ; CHECK-NEXT:    sbcs.w r1, r12, r3
 ; CHECK-NEXT:    vmov s2, r0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cset r0, lt
 ; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 2
@@ -247,27 +237,24 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2f64(<2 x double> %src) {
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    vldr d0, .LCPI13_0
-; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r0, r1, d8
 ; CHECK-NEXT:    vmov r4, r5, d0
 ; CHECK-NEXT:    mov r2, r4
 ; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpeq
-; CHECK-NEXT:    vmov r2, r1, d8
-; CHECK-NEXT:    clz r0, r0
-; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:    vldr s17, .LCPI13_1
-; CHECK-NEXT:    lsrs r0, r0, #5
-; CHECK-NEXT:    cset r6, ne
-; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    mov r2, r4
+; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    bl __aeabi_dcmpeq
-; CHECK-NEXT:    clz r0, r0
-; CHECK-NEXT:    vmov s18, r6
-; CHECK-NEXT:    vmov.f32 s19, s17
-; CHECK-NEXT:    lsrs r0, r0, #5
-; CHECK-NEXT:    cset r0, ne
-; CHECK-NEXT:    vmov s16, r0
-; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vldr s1, .LCPI13_1
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vmov s2, r0
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vmov.f32 s3, s1
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-NEXT:    .p2align 3
@@ -472,21 +459,18 @@ define arm_aapcs_vfpcc <2 x double> @uitofp_v2i1_v2f64(<2 x i64> %src) {
 ; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov r0, r1, d0
-; CHECK-NEXT:    mov.w r12, #0
-; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    rsbs r0, r0, #0
-; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r4, lt
-; CHECK-NEXT:    rsbs r0, r2, #0
-; CHECK-NEXT:    sbcs.w r0, r12, r3
+; CHECK-NEXT:    sbcs.w r0, r4, r1
 ; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    cset r0, ne
 ; CHECK-NEXT:    bl __aeabi_ui2d
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vmov r2, r3, d8
 ; CHECK-NEXT:    vmov d9, r0, r1
-; CHECK-NEXT:    cset r2, ne
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    sbcs.w r2, r4, r3
+; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    bl __aeabi_ui2d
 ; CHECK-NEXT:    vmov d8, r0, r1
@@ -506,21 +490,18 @@ define arm_aapcs_vfpcc <2 x double> @sitofp_v2i1_v2f64(<2 x i64> %src) {
 ; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov r0, r1, d0
-; CHECK-NEXT:    mov.w r12, #0
-; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    rsbs r0, r0, #0
-; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r4, lt
-; CHECK-NEXT:    rsbs r0, r2, #0
-; CHECK-NEXT:    sbcs.w r0, r12, r3
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    sbcs.w r0, r4, r1
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bl __aeabi_i2d
-; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vmov r2, r3, d8
 ; CHECK-NEXT:    vmov d9, r0, r1
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    sbcs.w r2, r4, r3
+; CHECK-NEXT:    csetm r2, lt
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    bl __aeabi_i2d
 ; CHECK-NEXT:    vmov d8, r0, r1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 20112715a0a45..251b187e7bcf2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -6,8 +6,8 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, #12
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    beq.w .LBB0_8
 ; CHECK-NEXT:  @ %bb.1: @ %entry
@@ -16,64 +16,57 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:  @ %bb.2:
 ; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    mov r12, r0
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    mov r11, r2
+; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    mov r10, r2
 ; CHECK-NEXT:    b .LBB0_6
 ; CHECK-NEXT:  .LBB0_3: @ %vector.ph
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    bic r3, r3, #1
-; CHECK-NEXT:    subs r7, r3, #2
+; CHECK-NEXT:    bic r5, r3, #1
 ; CHECK-NEXT:    adr r4, .LCPI0_0
+; CHECK-NEXT:    subs r7, r5, #2
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    movs r6, #1
+; CHECK-NEXT:    add.w r3, r1, r5, lsl #2
 ; CHECK-NEXT:    vldrw.u32 q0, [r4]
 ; CHECK-NEXT:    adr r4, .LCPI0_1
 ; CHECK-NEXT:    add.w lr, r6, r7, lsr #1
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    add.w r11, r2, r3, lsl #2
-; CHECK-NEXT:    add.w r10, r1, r3, lsl #2
-; CHECK-NEXT:    add.w r12, r0, r3, lsl #2
+; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT:    add.w r10, r2, r5, lsl #2
+; CHECK-NEXT:    add.w r12, r0, r5, lsl #2
 ; CHECK-NEXT:    vldrw.u32 q1, [r4]
 ; CHECK-NEXT:  .LBB0_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrd r4, r5, [r0], #8
-; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    ldrd r4, r6, [r0], #8
+; CHECK-NEXT:    movs r5, #0
 ; CHECK-NEXT:    ldrd r7, r8, [r1], #8
-; CHECK-NEXT:    smull r8, r5, r8, r5
-; CHECK-NEXT:    smull r4, r7, r7, r4
-; CHECK-NEXT:    asrl r8, r5, #31
-; CHECK-NEXT:    asrl r4, r7, #31
+; CHECK-NEXT:    smull r4, r11, r7, r4
+; CHECK-NEXT:    asrl r4, r11, #31
 ; CHECK-NEXT:    rsbs.w r9, r4, #-2147483648
-; CHECK-NEXT:    vmov q2[2], q2[0], r4, r8
 ; CHECK-NEXT:    mov.w r9, #-1
-; CHECK-NEXT:    sbcs.w r3, r9, r7
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r6, r3, #0, #8
-; CHECK-NEXT:    rsbs.w r3, r8, #-2147483648
-; CHECK-NEXT:    sbcs.w r3, r9, r5
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r6, r3, #8, #8
-; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    sbcs.w r3, r9, r11
+; CHECK-NEXT:    csetm r3, lt
+; CHECK-NEXT:    bfi r5, r3, #0, #8
+; CHECK-NEXT:    smull r6, r3, r8, r6
+; CHECK-NEXT:    asrl r6, r3, #31
+; CHECK-NEXT:    rsbs.w r7, r6, #-2147483648
+; CHECK-NEXT:    vmov q2[2], q2[0], r4, r6
+; CHECK-NEXT:    sbcs.w r7, r9, r3
+; CHECK-NEXT:    vmov q2[3], q2[1], r11, r3
+; CHECK-NEXT:    csetm r7, lt
 ; CHECK-NEXT:    mvn r6, #-2147483648
+; CHECK-NEXT:    bfi r5, r7, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
 ; CHECK-NEXT:    vpsel q2, q2, q0
 ; CHECK-NEXT:    vmov r3, r4, d4
 ; CHECK-NEXT:    subs r3, r3, r6
 ; CHECK-NEXT:    sbcs r3, r4, #0
 ; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #0, #8
 ; CHECK-NEXT:    vmov r3, r5, d5
 ; CHECK-NEXT:    subs r3, r3, r6
 ; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #8, #8
 ; CHECK-NEXT:    vmsr p0, r4
 ; CHECK-NEXT:    vpsel q2, q2, q1
@@ -83,6 +76,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    le lr, .LBB0_4
 ; CHECK-NEXT:  @ %bb.5: @ %middle.block
 ; CHECK-NEXT:    ldrd r7, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r7, r3
 ; CHECK-NEXT:    beq .LBB0_8
 ; CHECK-NEXT:  .LBB0_6: @ %for.body.preheader
@@ -93,7 +87,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:  .LBB0_7: @ %for.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r3, [r12], #4
-; CHECK-NEXT:    ldr r4, [r10], #4
+; CHECK-NEXT:    ldr r4, [r6], #4
 ; CHECK-NEXT:    smull r4, r3, r4, r3
 ; CHECK-NEXT:    asrl r4, r3, #31
 ; CHECK-NEXT:    subs r5, r1, r4
@@ -105,10 +99,10 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    subs r5, r4, r2
 ; CHECK-NEXT:    sbcs r3, r3, #0
 ; CHECK-NEXT:    csel r3, r4, r2, lt
-; CHECK-NEXT:    str r3, [r11], #4
+; CHECK-NEXT:    str r3, [r10], #4
 ; CHECK-NEXT:    le lr, .LBB0_7
 ; CHECK-NEXT:  .LBB0_8: @ %for.cond.cleanup
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    add sp, #12
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.9:
@@ -212,122 +206,108 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    beq.w .LBB1_8
 ; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB1_3
 ; CHECK-NEXT:  @ %bb.2:
 ; CHECK-NEXT:    mov r12, r0
-; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    mov r9, r5
 ; CHECK-NEXT:    mov r11, r2
 ; CHECK-NEXT:    b .LBB1_6
 ; CHECK-NEXT:  .LBB1_3: @ %vector.ph
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    bic r3, r3, #3
-; CHECK-NEXT:    subs r7, r3, #4
+; CHECK-NEXT:    bic r1, r3, #3
 ; CHECK-NEXT:    adr r4, .LCPI1_0
+; CHECK-NEXT:    subs r7, r1, #4
 ; CHECK-NEXT:    movs r6, #1
 ; CHECK-NEXT:    vldrw.u32 q0, [r4]
-; CHECK-NEXT:    add.w lr, r6, r7, lsr #2
-; CHECK-NEXT:    add.w r7, r1, r3, lsl #2
-; CHECK-NEXT:    strd r7, r3, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    adr r4, .LCPI1_1
-; CHECK-NEXT:    add.w r11, r2, r3, lsl #2
-; CHECK-NEXT:    add.w r12, r0, r3, lsl #2
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    add.w lr, r6, r7, lsr #2
+; CHECK-NEXT:    str r1, [sp] @ 4-byte Spill
+; CHECK-NEXT:    add.w r11, r2, r1, lsl #2
+; CHECK-NEXT:    add.w r9, r5, r1, lsl #2
+; CHECK-NEXT:    add.w r12, r0, r1, lsl #2
 ; CHECK-NEXT:    vldrw.u32 q1, [r4]
-; CHECK-NEXT:    mov.w r9, #-1
 ; CHECK-NEXT:  .LBB1_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldrw.u32 q4, [r5], #16
 ; CHECK-NEXT:    vldrw.u32 q3, [r0], #16
-; CHECK-NEXT:    vldrw.u32 q4, [r1], #16
-; CHECK-NEXT:    mov.w r3, #-1
-; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov.w r2, #-1
 ; CHECK-NEXT:    vmov.f32 s8, s14
-; CHECK-NEXT:    mov.w r6, #-1
+; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    vmov.f32 s20, s18
+; CHECK-NEXT:    mov.w r8, #0
 ; CHECK-NEXT:    vmov.f32 s10, s15
 ; CHECK-NEXT:    vmov.f32 s22, s19
 ; CHECK-NEXT:    vmullb.s32 q6, q5, q2
-; CHECK-NEXT:    vmov.f32 s14, s13
+; CHECK-NEXT:    vmov.f32 s18, s17
 ; CHECK-NEXT:    vmov r4, r7, d12
 ; CHECK-NEXT:    asrl r4, r7, #31
-; CHECK-NEXT:    vmov.f32 s18, s17
+; CHECK-NEXT:    vmov.f32 s14, s13
 ; CHECK-NEXT:    rsbs.w r5, r4, #-2147483648
-; CHECK-NEXT:    sbcs.w r5, r3, r7
-; CHECK-NEXT:    cset r5, lt
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csetm r5, ne
+; CHECK-NEXT:    sbcs.w r5, r2, r7
+; CHECK-NEXT:    csetm r5, lt
 ; CHECK-NEXT:    bfi r8, r5, #0, #8
 ; CHECK-NEXT:    vmov r10, r5, d13
 ; CHECK-NEXT:    asrl r10, r5, #31
+; CHECK-NEXT:    vmov r6, s18
 ; CHECK-NEXT:    rsbs.w r3, r10, #-2147483648
 ; CHECK-NEXT:    vmov q2[2], q2[0], r4, r10
-; CHECK-NEXT:    sbcs.w r3, r6, r5
+; CHECK-NEXT:    sbcs.w r3, r2, r5
 ; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    mvn r10, #-2147483648
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mov.w r6, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r8, r3, #8, #8
 ; CHECK-NEXT:    vmsr p0, r8
+; CHECK-NEXT:    mvn r8, #-2147483648
 ; CHECK-NEXT:    vpsel q2, q2, q0
 ; CHECK-NEXT:    vmov r3, r4, d4
-; CHECK-NEXT:    subs.w r3, r3, r10
+; CHECK-NEXT:    subs.w r3, r3, r8
 ; CHECK-NEXT:    sbcs r3, r4, #0
 ; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #0, #8
 ; CHECK-NEXT:    vmov r3, r5, d5
-; CHECK-NEXT:    subs.w r3, r3, r10
+; CHECK-NEXT:    subs.w r3, r3, r8
 ; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    vmov r5, s18
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    mov.w r5, #0
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #8, #8
 ; CHECK-NEXT:    vmov r3, s12
 ; CHECK-NEXT:    vmsr p0, r4
 ; CHECK-NEXT:    vmov r4, s16
 ; CHECK-NEXT:    vpsel q2, q2, q1
-; CHECK-NEXT:    smull r8, r7, r4, r3
-; CHECK-NEXT:    asrl r8, r7, #31
-; CHECK-NEXT:    rsbs.w r3, r8, #-2147483648
-; CHECK-NEXT:    sbcs.w r3, r9, r7
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r6, r3, #0, #8
-; CHECK-NEXT:    vmov r3, s14
-; CHECK-NEXT:    smull r4, r5, r5, r3
-; CHECK-NEXT:    asrl r4, r5, #31
+; CHECK-NEXT:    smull r4, r7, r4, r3
+; CHECK-NEXT:    asrl r4, r7, #31
 ; CHECK-NEXT:    rsbs.w r3, r4, #-2147483648
-; CHECK-NEXT:    vmov q3[2], q3[0], r8, r4
-; CHECK-NEXT:    sbcs.w r3, r9, r5
-; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r6, r3, #8, #8
-; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    sbcs.w r3, r2, r7
+; CHECK-NEXT:    csetm r3, lt
+; CHECK-NEXT:    bfi r5, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s14
+; CHECK-NEXT:    smull r6, r3, r6, r3
+; CHECK-NEXT:    asrl r6, r3, #31
+; CHECK-NEXT:    rsbs.w r1, r6, #-2147483648
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r6
+; CHECK-NEXT:    sbcs.w r1, r2, r3
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r3
+; CHECK-NEXT:    csetm r1, lt
+; CHECK-NEXT:    bfi r5, r1, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload
 ; CHECK-NEXT:    vpsel q3, q3, q0
-; CHECK-NEXT:    vmov r3, r4, d6
-; CHECK-NEXT:    subs.w r3, r3, r10
-; CHECK-NEXT:    sbcs r3, r4, #0
-; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r4, r3, #0, #8
-; CHECK-NEXT:    vmov r3, r5, d7
-; CHECK-NEXT:    subs.w r3, r3, r10
-; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r4, r3, #8, #8
-; CHECK-NEXT:    vmsr p0, r4
+; CHECK-NEXT:    vmov r1, r3, d6
+; CHECK-NEXT:    subs.w r1, r1, r8
+; CHECK-NEXT:    sbcs r1, r3, #0
+; CHECK-NEXT:    mov.w r3, #0
+; CHECK-NEXT:    csetm r1, lt
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, r4, d7
+; CHECK-NEXT:    subs.w r1, r1, r8
+; CHECK-NEXT:    sbcs r1, r4, #0
+; CHECK-NEXT:    csetm r1, lt
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q3, q3, q1
 ; CHECK-NEXT:    vmov.f32 s13, s14
 ; CHECK-NEXT:    vmov.f32 s14, s8
@@ -335,31 +315,30 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vstrb.8 q3, [r2], #16
 ; CHECK-NEXT:    le lr, .LBB1_4
 ; CHECK-NEXT:  @ %bb.5: @ %middle.block
-; CHECK-NEXT:    ldrd r7, r3, [sp, #8] @ 8-byte Folded Reload
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    cmp r7, r3
+; CHECK-NEXT:    ldrd r1, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT:    cmp r1, r3
 ; CHECK-NEXT:    beq .LBB1_8
 ; CHECK-NEXT:  .LBB1_6: @ %for.body.preheader21
-; CHECK-NEXT:    sub.w lr, r3, r7
-; CHECK-NEXT:    mov.w r1, #-1
+; CHECK-NEXT:    sub.w lr, r3, r1
+; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:    mov.w r3, #-2147483648
 ; CHECK-NEXT:    mvn r2, #-2147483648
 ; CHECK-NEXT:  .LBB1_7: @ %for.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr r4, [r12], #4
-; CHECK-NEXT:    ldr r5, [r0], #4
-; CHECK-NEXT:    smull r4, r5, r5, r4
-; CHECK-NEXT:    asrl r4, r5, #31
-; CHECK-NEXT:    subs r6, r3, r4
-; CHECK-NEXT:    sbcs.w r6, r1, r5
-; CHECK-NEXT:    cset r6, lt
-; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    ldr r1, [r12], #4
+; CHECK-NEXT:    ldr r4, [r9], #4
+; CHECK-NEXT:    smull r4, r1, r4, r1
+; CHECK-NEXT:    asrl r4, r1, #31
+; CHECK-NEXT:    subs r5, r3, r4
+; CHECK-NEXT:    sbcs.w r5, r0, r1
+; CHECK-NEXT:    cset r5, lt
+; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    csel r4, r4, r3, ne
-; CHECK-NEXT:    csel r5, r5, r1, ne
-; CHECK-NEXT:    subs r6, r4, r2
-; CHECK-NEXT:    sbcs r5, r5, #0
-; CHECK-NEXT:    csel r4, r4, r2, lt
-; CHECK-NEXT:    str r4, [r11], #4
+; CHECK-NEXT:    csel r1, r1, r0, ne
+; CHECK-NEXT:    subs r5, r4, r2
+; CHECK-NEXT:    sbcs r1, r1, #0
+; CHECK-NEXT:    csel r1, r4, r2, lt
+; CHECK-NEXT:    str r1, [r11], #4
 ; CHECK-NEXT:    le lr, .LBB1_7
 ; CHECK-NEXT:  .LBB1_8: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #16
@@ -468,21 +447,21 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    beq.w .LBB2_3
 ; CHECK-NEXT:  @ %bb.1: @ %vector.ph
-; CHECK-NEXT:    adds r7, r3, #3
-; CHECK-NEXT:    movs r6, #1
-; CHECK-NEXT:    bic r7, r7, #3
+; CHECK-NEXT:    adds r6, r3, #3
+; CHECK-NEXT:    movs r5, #1
+; CHECK-NEXT:    bic r6, r6, #3
 ; CHECK-NEXT:    adr r4, .LCPI2_1
-; CHECK-NEXT:    subs r7, #4
-; CHECK-NEXT:    adr r5, .LCPI2_2
+; CHECK-NEXT:    subs r6, #4
 ; CHECK-NEXT:    vldrw.u32 q2, [r4]
-; CHECK-NEXT:    vldrw.u32 q3, [r5]
-; CHECK-NEXT:    add.w lr, r6, r7, lsr #2
-; CHECK-NEXT:    adr r6, .LCPI2_0
-; CHECK-NEXT:    subs r7, r3, #1
-; CHECK-NEXT:    vldrw.u32 q0, [r6]
 ; CHECK-NEXT:    mov.w r9, #0
-; CHECK-NEXT:    vdup.32 q1, r7
 ; CHECK-NEXT:    mov.w r12, #-1
+; CHECK-NEXT:    add.w lr, r5, r6, lsr #2
+; CHECK-NEXT:    adr r5, .LCPI2_0
+; CHECK-NEXT:    vldrw.u32 q0, [r5]
+; CHECK-NEXT:    adr r5, .LCPI2_2
+; CHECK-NEXT:    subs r6, r3, #1
+; CHECK-NEXT:    vldrw.u32 q3, [r5]
+; CHECK-NEXT:    vdup.32 q1, r6
 ; CHECK-NEXT:    mvn r8, #-2147483648
 ; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
 ; CHECK-NEXT:  .LBB2_2: @ %vector.body
@@ -502,14 +481,12 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vmov.f32 s28, s26
 ; CHECK-NEXT:    vmov.f32 s30, s27
 ; CHECK-NEXT:    vmullb.s32 q0, q7, q4
-; CHECK-NEXT:    vmov.f32 s22, s21
+; CHECK-NEXT:    vmov.f32 s22, s25
 ; CHECK-NEXT:    vmov r10, r5, d0
 ; CHECK-NEXT:    asrl r10, r5, #31
 ; CHECK-NEXT:    rsbs.w r7, r10, #-2147483648
 ; CHECK-NEXT:    sbcs.w r7, r12, r5
-; CHECK-NEXT:    cset r7, lt
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    csetm r7, ne
+; CHECK-NEXT:    csetm r7, lt
 ; CHECK-NEXT:    bfi r4, r7, #0, #8
 ; CHECK-NEXT:    vmov r6, r7, d1
 ; CHECK-NEXT:    asrl r6, r7, #31
@@ -517,72 +494,58 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vmov q0[2], q0[0], r10, r6
 ; CHECK-NEXT:    sbcs.w r3, r12, r7
 ; CHECK-NEXT:    vmov q0[3], q0[1], r5, r7
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
+; CHECK-NEXT:    vmov r7, s22
 ; CHECK-NEXT:    bfi r4, r3, #8, #8
 ; CHECK-NEXT:    vmsr p0, r4
-; CHECK-NEXT:    vpsel q4, q0, q2
-; CHECK-NEXT:    vmov.f32 s2, s25
-; CHECK-NEXT:    vmov r3, r4, d8
-; CHECK-NEXT:    vmov r7, s2
+; CHECK-NEXT:    vpsel q0, q0, q2
+; CHECK-NEXT:    vmov r3, r4, d0
 ; CHECK-NEXT:    subs.w r3, r3, r8
 ; CHECK-NEXT:    sbcs r3, r4, #0
 ; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #0, #8
-; CHECK-NEXT:    vmov r3, r5, d9
+; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    subs.w r3, r3, r8
 ; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #8, #8
 ; CHECK-NEXT:    vmov r3, s20
 ; CHECK-NEXT:    vmsr p0, r4
 ; CHECK-NEXT:    vmov r4, s24
-; CHECK-NEXT:    vpsel q4, q4, q3
+; CHECK-NEXT:    vpsel q4, q0, q3
+; CHECK-NEXT:    vmov.f32 s2, s21
 ; CHECK-NEXT:    smull r10, r5, r4, r3
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    asrl r10, r5, #31
 ; CHECK-NEXT:    rsbs.w r3, r10, #-2147483648
 ; CHECK-NEXT:    sbcs.w r3, r12, r5
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #0, #8
-; CHECK-NEXT:    vmov r3, s22
-; CHECK-NEXT:    smull r6, r7, r7, r3
-; CHECK-NEXT:    asrl r6, r7, #31
-; CHECK-NEXT:    rsbs.w r3, r6, #-2147483648
+; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    smull r6, r3, r7, r3
+; CHECK-NEXT:    asrl r6, r3, #31
+; CHECK-NEXT:    rsbs.w r7, r6, #-2147483648
 ; CHECK-NEXT:    vmov q0[2], q0[0], r10, r6
-; CHECK-NEXT:    sbcs.w r3, r12, r7
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r7
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r4, r3, #8, #8
+; CHECK-NEXT:    sbcs.w r7, r12, r3
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r3
+; CHECK-NEXT:    csetm r7, lt
+; CHECK-NEXT:    bfi r4, r7, #8, #8
 ; CHECK-NEXT:    vmsr p0, r4
-; CHECK-NEXT:    vpsel q5, q0, q2
-; CHECK-NEXT:    vmov r3, r4, d10
+; CHECK-NEXT:    vpsel q0, q0, q2
+; CHECK-NEXT:    vmov r3, r4, d0
 ; CHECK-NEXT:    subs.w r3, r3, r8
 ; CHECK-NEXT:    sbcs r3, r4, #0
 ; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #0, #8
-; CHECK-NEXT:    vmov r3, r5, d11
+; CHECK-NEXT:    vmov r3, r5, d1
 ; CHECK-NEXT:    subs.w r3, r3, r8
 ; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    cset r3, lt
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lt
 ; CHECK-NEXT:    bfi r4, r3, #8, #8
 ; CHECK-NEXT:    vmsr p0, r4
-; CHECK-NEXT:    vpsel q0, q5, q3
+; CHECK-NEXT:    vpsel q0, q0, q3
 ; CHECK-NEXT:    vldr p0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    vmov.f32 s1, s2
 ; CHECK-NEXT:    vmov.f32 s2, s16
@@ -693,9 +656,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    lsrl r4, r9, #31
 ; CHECK-NEXT:    subs.w r5, r4, #-1
 ; CHECK-NEXT:    sbcs r5, r9, #0
-; CHECK-NEXT:    cset r5, lo
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csetm r5, ne
+; CHECK-NEXT:    csetm r5, lo
 ; CHECK-NEXT:    bfi r8, r5, #0, #8
 ; CHECK-NEXT:    umull r6, r5, r3, r6
 ; CHECK-NEXT:    lsrl r6, r5, #31
@@ -703,9 +664,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r6
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    vmov q1[3], q1[1], r9, r5
-; CHECK-NEXT:    cset r3, lo
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lo
 ; CHECK-NEXT:    bfi r8, r3, #8, #8
 ; CHECK-NEXT:    vmsr p0, r8
 ; CHECK-NEXT:    vpsel q1, q1, q0
@@ -858,9 +817,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    subs.w r5, r4, #-1
 ; CHECK-NEXT:    vmullb.u32 q4, q3, q1
 ; CHECK-NEXT:    sbcs r5, r9, #0
-; CHECK-NEXT:    cset r5, lo
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csetm r5, ne
+; CHECK-NEXT:    csetm r5, lo
 ; CHECK-NEXT:    bfi r6, r5, #0, #8
 ; CHECK-NEXT:    vmov r8, r5, d11
 ; CHECK-NEXT:    lsrl r8, r5, #31
@@ -868,9 +825,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vmov q2[2], q2[0], r4, r8
 ; CHECK-NEXT:    sbcs r7, r5, #0
 ; CHECK-NEXT:    vmov q2[3], q2[1], r9, r5
-; CHECK-NEXT:    cset r7, lo
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    csetm r7, ne
+; CHECK-NEXT:    csetm r7, lo
 ; CHECK-NEXT:    bfi r6, r7, #8, #8
 ; CHECK-NEXT:    vmov r4, r7, d8
 ; CHECK-NEXT:    lsrl r4, r7, #31
@@ -879,9 +834,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    sbcs r5, r7, #0
 ; CHECK-NEXT:    vpsel q2, q2, q0
-; CHECK-NEXT:    cset r5, lo
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csetm r5, ne
+; CHECK-NEXT:    csetm r5, lo
 ; CHECK-NEXT:    bfi r6, r5, #0, #8
 ; CHECK-NEXT:    vmov r2, r5, d9
 ; CHECK-NEXT:    lsrl r2, r5, #31
@@ -889,9 +842,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r2
 ; CHECK-NEXT:    sbcs r3, r5, #0
 ; CHECK-NEXT:    vmov q1[3], q1[1], r7, r5
-; CHECK-NEXT:    cset r3, lo
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
+; CHECK-NEXT:    csetm r3, lo
 ; CHECK-NEXT:    bfi r6, r3, #8, #8
 ; CHECK-NEXT:    vmsr p0, r6
 ; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload

diff  --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
index 10ecdc01918dd..bbc0ff9bd1be5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
@@ -36,21 +36,19 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    vmov r0, r2, d2
-; CHECK-NEXT:    vmov r3, r1, d0
-; CHECK-NEXT:    adds.w r12, r3, r0
+; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    adds.w r12, r2, r0
 ; CHECK-NEXT:    vmov r0, r4, d1
-; CHECK-NEXT:    adc.w lr, r1, r2
-; CHECK-NEXT:    subs.w r3, r12, r3
-; CHECK-NEXT:    sbcs.w r1, lr, r1
-; CHECK-NEXT:    cset r1, lt
+; CHECK-NEXT:    adc.w lr, r3, r1
+; CHECK-NEXT:    subs.w r2, r12, r2
+; CHECK-NEXT:    sbcs.w r2, lr, r3
+; CHECK-NEXT:    cset r2, lt
 ; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    cset r1, ne
-; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    it mi
-; CHECK-NEXT:    eormi r1, r1, #1
+; CHECK-NEXT:    eormi r2, r2, #1
+; CHECK-NEXT:    rsbs r1, r2, #0
 ; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    rsbs r1, r1, #0
 ; CHECK-NEXT:    bfi r2, r1, #0, #8
 ; CHECK-NEXT:    vmov r1, r3, d3
 ; CHECK-NEXT:    adds r1, r1, r0
@@ -59,14 +57,12 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
 ; CHECK-NEXT:    sbcs.w r0, r5, r4
 ; CHECK-NEXT:    vmov q0[2], q0[0], r12, r1
 ; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    asr.w r1, lr, #31
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], lr, r5
-; CHECK-NEXT:    cset r0, ne
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    it mi
 ; CHECK-NEXT:    eormi r0, r0, #1
+; CHECK-NEXT:    asr.w r1, lr, #31
 ; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], lr, r5
 ; CHECK-NEXT:    bfi r2, r0, #8, #8
 ; CHECK-NEXT:    asrs r0, r5, #31
 ; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
@@ -122,32 +118,28 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: uadd_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    vmov r0, r1, d3
 ; CHECK-NEXT:    vmov r2, r3, d1
-; CHECK-NEXT:    adds.w lr, r2, r0
-; CHECK-NEXT:    vmov r0, r4, d0
-; CHECK-NEXT:    adc.w r12, r3, r1
-; CHECK-NEXT:    subs.w r2, lr, r2
-; CHECK-NEXT:    sbcs.w r2, r12, r3
-; CHECK-NEXT:    vmov r3, r1, d2
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
-; CHECK-NEXT:    adds r3, r3, r0
-; CHECK-NEXT:    adcs r1, r4
-; CHECK-NEXT:    subs r0, r3, r0
-; CHECK-NEXT:    sbcs.w r0, r1, r4
-; CHECK-NEXT:    vmov q1[2], q1[0], r3, lr
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    vmov q1[3], q1[1], r1, r12
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r2
+; CHECK-NEXT:    adds r5, r2, r0
+; CHECK-NEXT:    adc.w lr, r3, r1
+; CHECK-NEXT:    subs r2, r5, r2
+; CHECK-NEXT:    sbcs.w r2, lr, r3
+; CHECK-NEXT:    vmov r3, r12, d2
+; CHECK-NEXT:    vmov r1, r4, d0
+; CHECK-NEXT:    csetm r2, lo
+; CHECK-NEXT:    adds r3, r3, r1
+; CHECK-NEXT:    adc.w r0, r4, r12
+; CHECK-NEXT:    subs r1, r3, r1
+; CHECK-NEXT:    sbcs.w r1, r0, r4
+; CHECK-NEXT:    vmov q1[2], q1[0], r3, r5
+; CHECK-NEXT:    csetm r1, lo
+; CHECK-NEXT:    vmov q1[3], q1[1], r0, lr
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r2
 ; CHECK-NEXT:    vorr q0, q1, q0
-; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
   ret <2 x i64> %0
@@ -187,55 +179,47 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: ssub_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    vmov r1, r3, d2
-; CHECK-NEXT:    movs r0, #0
-; CHECK-NEXT:    rsbs r2, r1, #0
-; CHECK-NEXT:    sbcs.w r2, r0, r3
-; CHECK-NEXT:    vmov r2, r4, d0
-; CHECK-NEXT:    cset lr, lt
-; CHECK-NEXT:    subs.w r12, r2, r1
-; CHECK-NEXT:    sbc.w r5, r4, r3
-; CHECK-NEXT:    subs.w r2, r12, r2
-; CHECK-NEXT:    sbcs.w r2, r5, r4
-; CHECK-NEXT:    vmov r3, r4, d3
-; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
-; CHECK-NEXT:    cmp.w lr, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    eorne r2, r2, #1
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    vmov r2, r3, d2
+; CHECK-NEXT:    vmov r1, r0, d0
+; CHECK-NEXT:    vmov r4, r5, d1
+; CHECK-NEXT:    subs.w r12, r1, r2
+; CHECK-NEXT:    sbc.w lr, r0, r3
+; CHECK-NEXT:    subs.w r1, r12, r1
+; CHECK-NEXT:    sbcs.w r0, lr, r0
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    cset r0, lt
 ; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    rsbs r1, r3, #0
-; CHECK-NEXT:    sbcs.w r1, r0, r4
-; CHECK-NEXT:    bfi r0, r2, #0, #8
-; CHECK-NEXT:    vmov r2, r1, d1
-; CHECK-NEXT:    cset lr, lt
-; CHECK-NEXT:    subs r3, r2, r3
-; CHECK-NEXT:    sbc.w r4, r1, r4
-; CHECK-NEXT:    subs r2, r3, r2
-; CHECK-NEXT:    sbcs.w r1, r4, r1
-; CHECK-NEXT:    vmov q0[2], q0[0], r12, r3
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r4
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    cset r1, ne
-; CHECK-NEXT:    cmp.w lr, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    eorne r1, r1, #1
-; CHECK-NEXT:    rsbs r1, r1, #0
-; CHECK-NEXT:    bfi r0, r1, #8, #8
-; CHECK-NEXT:    asrs r1, r5, #31
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    asrs r0, r4, #31
+; CHECK-NEXT:    sbcs.w r2, r1, r3
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    eorlt r0, r0, #1
+; CHECK-NEXT:    vmov r2, r3, d3
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    subs r6, r4, r2
+; CHECK-NEXT:    sbc.w r7, r5, r3
+; CHECK-NEXT:    subs r4, r6, r4
+; CHECK-NEXT:    sbcs.w r4, r7, r5
+; CHECK-NEXT:    vmov q0[2], q0[0], r12, r6
+; CHECK-NEXT:    cset r4, lt
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    sbcs.w r2, r1, r3
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    eorlt r4, r4, #1
+; CHECK-NEXT:    rsbs r0, r4, #0
+; CHECK-NEXT:    bfi r1, r0, #8, #8
+; CHECK-NEXT:    asrs r0, r7, #31
+; CHECK-NEXT:    vmsr p0, r1
+; CHECK-NEXT:    asr.w r1, lr, #31
 ; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], lr, r7
 ; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
 ; CHECK-NEXT:    adr r0, .LCPI11_0
 ; CHECK-NEXT:    vldrw.u32 q2, [r0]
 ; CHECK-NEXT:    veor q1, q1, q2
 ; CHECK-NEXT:    vpsel q0, q1, q0
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI11_0:
@@ -281,32 +265,28 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: usub_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    vmov r0, r1, d3
 ; CHECK-NEXT:    vmov r2, r3, d1
-; CHECK-NEXT:    subs.w lr, r2, r0
-; CHECK-NEXT:    vmov r0, r4, d0
-; CHECK-NEXT:    sbc.w r12, r3, r1
-; CHECK-NEXT:    subs.w r2, r2, lr
-; CHECK-NEXT:    sbcs.w r2, r3, r12
-; CHECK-NEXT:    vmov r3, r1, d2
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
-; CHECK-NEXT:    subs r3, r0, r3
-; CHECK-NEXT:    sbc.w r1, r4, r1
-; CHECK-NEXT:    subs r0, r0, r3
-; CHECK-NEXT:    sbcs.w r0, r4, r1
-; CHECK-NEXT:    vmov q1[2], q1[0], r3, lr
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    vmov q1[3], q1[1], r1, r12
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
-; CHECK-NEXT:    vmov q0[3], q0[1], r0, r2
+; CHECK-NEXT:    subs r5, r2, r0
+; CHECK-NEXT:    sbc.w lr, r3, r1
+; CHECK-NEXT:    subs r2, r2, r5
+; CHECK-NEXT:    sbcs.w r2, r3, lr
+; CHECK-NEXT:    vmov r3, r12, d2
+; CHECK-NEXT:    vmov r1, r4, d0
+; CHECK-NEXT:    csetm r2, lo
+; CHECK-NEXT:    subs r3, r1, r3
+; CHECK-NEXT:    sbc.w r0, r4, r12
+; CHECK-NEXT:    subs r1, r1, r3
+; CHECK-NEXT:    sbcs.w r1, r4, r0
+; CHECK-NEXT:    vmov q1[2], q1[0], r3, r5
+; CHECK-NEXT:    csetm r1, lo
+; CHECK-NEXT:    vmov q1[3], q1[1], r0, lr
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r2
 ; CHECK-NEXT:    vbic q0, q1, q0
-; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
   ret <2 x i64> %0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll
index fae8e393ea949..f8e0a493b403e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll
@@ -399,17 +399,13 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_slt_v2i64(<2 x i64> %src, <2 x i64> %srcb
 ; CHECK-NEXT:    subs r0, r2, r0
 ; CHECK-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lt
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    vmov r3, r2, d1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r12, d3
 ; CHECK-NEXT:    subs r0, r3, r0
 ; CHECK-NEXT:    sbcs.w r0, r2, r12
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q2, q3
@@ -470,8 +466,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
 ; CHECK-NEXT:    sbcs.w r2, r12, r3, asr #31
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r3, #0
@@ -487,8 +481,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
 ; CHECK-NEXT:    sbcs.w r2, r12, r3, asr #31
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r3, #0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
index d5cddc6fcfeb4..5802b0073f292 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
@@ -279,25 +279,17 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_une_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s3, s7
-; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s6
-; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r3, ne
-; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s5
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
-; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1122,69 +1114,53 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %src2, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9}
-; CHECK-MVE-NEXT:    vpush {d8, d9}
+; CHECK-MVE-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-MVE-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-MVE-NEXT:    vmovx.f16 s16, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s18, s0
 ; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s16, s8
+; CHECK-MVE-NEXT:    vmovx.f16 s20, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmovx.f16 s22, s12
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s18, s12
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s5
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s16, s18, s16
+; CHECK-MVE-NEXT:    vseleq.f16 s16, s22, s20
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s12, s8
 ; CHECK-MVE-NEXT:    vmovx.f16 s8, s1
 ; CHECK-MVE-NEXT:    vcmp.f16 s8, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s9
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s13
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s5
 ; CHECK-MVE-NEXT:    vins.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s5
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s9
+; CHECK-MVE-NEXT:    vmovx.f16 s16, s13
 ; CHECK-MVE-NEXT:    vmovx.f16 s8, s2
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s14
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s16, s12
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s10
 ; CHECK-MVE-NEXT:    vseleq.f16 s1, s13, s9
 ; CHECK-MVE-NEXT:    vins.f16 s1, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s6
 ; CHECK-MVE-NEXT:    vcmp.f16 s8, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s14
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s3
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s5, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s2, s14, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s15
 ; CHECK-MVE-NEXT:    vins.f16 s2, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s7
 ; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s15
 ; CHECK-MVE-NEXT:    vcmp.f16 s3, s7
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s6, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s10, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s15, s11
 ; CHECK-MVE-NEXT:    vins.f16 s3, s4
-; CHECK-MVE-NEXT:    vpop {d8, d9}
+; CHECK-MVE-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_une_v8f16:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index 37225a44b3657..de6e85a8f5887 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -300,25 +300,17 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_une_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s3, s4
-; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, s4
-; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r3, ne
-; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s1, s4
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s0, s4
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
-; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1137,59 +1129,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half %src2, <
 ; CHECK-MVE-LABEL: vcmp_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
-; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s8
 ; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
+; CHECK-MVE-NEXT:    vmovx.f16 s7, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, s4
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s7, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s12, s8
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s13
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s9
 ; CHECK-MVE-NEXT:    vins.f16 s0, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
 ; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s13
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s1, s4
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s14
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s14
 ; CHECK-MVE-NEXT:    vseleq.f16 s1, s13, s9
 ; CHECK-MVE-NEXT:    vins.f16 s1, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
 ; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, s4
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s15
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s11
 ; CHECK-MVE-NEXT:    vseleq.f16 s2, s14, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s15
 ; CHECK-MVE-NEXT:    vins.f16 s2, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s3
 ; CHECK-MVE-NEXT:    vcmp.f16 s6, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s3, s4
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s10, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s15, s11
 ; CHECK-MVE-NEXT:    vins.f16 s3, s6
 ; CHECK-MVE-NEXT:    bx lr
@@ -1961,25 +1937,17 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_une_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s3
-; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s4, s2
-; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r3, ne
-; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s3, s15, s11
-; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s1
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s14, s10
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s4, s0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s13, s9
-; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s12, s8
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2798,59 +2766,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half %src2,
 ; CHECK-MVE-LABEL: vcmp_r_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s0
-; CHECK-MVE-NEXT:    vmovx.f16 s5, s12
+; CHECK-MVE-NEXT:    vmovx.f16 s5, s8
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s8
+; CHECK-MVE-NEXT:    vmovx.f16 s7, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s5, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s7, s5
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s12, s8
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s13
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s9
 ; CHECK-MVE-NEXT:    vins.f16 s0, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s1
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s9
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s13
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s1
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s14
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s14
 ; CHECK-MVE-NEXT:    vseleq.f16 s1, s13, s9
 ; CHECK-MVE-NEXT:    vins.f16 s1, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s10
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s2
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s15
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s11
 ; CHECK-MVE-NEXT:    vseleq.f16 s2, s14, s10
+; CHECK-MVE-NEXT:    vmovx.f16 s10, s15
 ; CHECK-MVE-NEXT:    vins.f16 s2, s6
 ; CHECK-MVE-NEXT:    vmovx.f16 s6, s3
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s11
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, s3
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s6, s8, s6
+; CHECK-MVE-NEXT:    vseleq.f16 s6, s10, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s15, s11
 ; CHECK-MVE-NEXT:    vins.f16 s3, s6
 ; CHECK-MVE-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
index 722a5313b1d6e..809bf664fc95f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll
@@ -279,25 +279,17 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_une_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
-; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
-; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r3, ne
-; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
-; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -1074,59 +1066,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %a
 ; CHECK-MVE-LABEL: vcmp_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
+; CHECK-MVE-NEXT:    vmovx.f16 s14, s4
 ; CHECK-MVE-NEXT:    vcmp.f16 s12, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s13, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vseleq.f16 s12, s13, s14
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s8, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s5
+; CHECK-MVE-NEXT:    vins.f16 s0, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s9
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s9
 ; CHECK-MVE-NEXT:    vcmp.f16 s1, #0
-; CHECK-MVE-NEXT:    vins.f16 s0, s12
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s10
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s10
 ; CHECK-MVE-NEXT:    vseleq.f16 s1, s9, s5
 ; CHECK-MVE-NEXT:    vins.f16 s1, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s11
 ; CHECK-MVE-NEXT:    vseleq.f16 s2, s10, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s11
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
 ; CHECK-MVE-NEXT:    vins.f16 s2, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s7
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s3, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s6, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s11, s7
 ; CHECK-MVE-NEXT:    vins.f16 s3, s4
 ; CHECK-MVE-NEXT:    bx lr
@@ -1856,25 +1832,17 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) {
 ; CHECK-MVE-LABEL: vcmp_r_une_v4f32:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s3, #0
-; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f32 s2, #0
-; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r3, ne
-; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s3, s11, s7
-; CHECK-MVE-NEXT:    cmp r3, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s1, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s2, s10, s6
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vcmp.f32 s0, #0
 ; CHECK-MVE-NEXT:    vseleq.f32 s1, s9, s5
-; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vseleq.f32 s0, s8, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
@@ -2651,59 +2619,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, <8 x half>
 ; CHECK-MVE-LABEL: vcmp_r_une_v8f16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
+; CHECK-MVE-NEXT:    vmovx.f16 s14, s4
 ; CHECK-MVE-NEXT:    vcmp.f16 s12, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s13, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vseleq.f16 s12, s13, s14
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s8, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s5
+; CHECK-MVE-NEXT:    vins.f16 s0, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s9
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s9
 ; CHECK-MVE-NEXT:    vcmp.f16 s1, #0
-; CHECK-MVE-NEXT:    vins.f16 s0, s12
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmovx.f16 s8, s10
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
+; CHECK-MVE-NEXT:    vmovx.f16 s12, s10
 ; CHECK-MVE-NEXT:    vseleq.f16 s1, s9, s5
 ; CHECK-MVE-NEXT:    vins.f16 s1, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s12, s8
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s8, s11
 ; CHECK-MVE-NEXT:    vseleq.f16 s2, s10, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s6, s11
+; CHECK-MVE-NEXT:    vmovx.f16 s6, s7
 ; CHECK-MVE-NEXT:    vins.f16 s2, s4
 ; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
 ; CHECK-MVE-NEXT:    vcmp.f16 s4, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s7
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vcmp.f16 s3, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    vseleq.f16 s4, s6, s4
+; CHECK-MVE-NEXT:    vseleq.f16 s4, s8, s6
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vseleq.f16 s3, s11, s7
 ; CHECK-MVE-NEXT:    vins.f16 s3, s4
 ; CHECK-MVE-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll
index 9b765e8ac938b..707290f4f66cd 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll
@@ -507,8 +507,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
 ; CHECK-NEXT:    sbcs.w r2, r12, r3, asr #31
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r3, #0
@@ -524,8 +522,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
 ; CHECK-NEXT:    sbcs.w r2, r12, r3, asr #31
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r3, #0
@@ -1056,8 +1052,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b,
 ; CHECK-NEXT:    sbcs.w r2, r12, r3, asr #31
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r3, #0
@@ -1073,8 +1067,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b,
 ; CHECK-NEXT:    sbcs.w r2, r12, r3, asr #31
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    cset r2, lt
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    cset r2, ne
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r3, #0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll
index a3b1cc0a24a88..75f7350fcd5b1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll
@@ -169,17 +169,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
 ; CHECK-NEXT:    movs r3, #0
 ; CHECK-NEXT:    subs.w r0, r0, r12
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    bfi r3, r1, #0, #8
 ; CHECK-NEXT:    vmov r1, r2, d1
 ; CHECK-NEXT:    subs.w r1, r1, r12
 ; CHECK-NEXT:    sbcs r1, r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r3, r1, #8, #8
 ; CHECK-NEXT:    adr r1, .LCPI12_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
@@ -189,16 +185,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
 ; CHECK-NEXT:    vmov r1, r2, d0
 ; CHECK-NEXT:    rsbs.w r1, r1, #-2147483648
 ; CHECK-NEXT:    sbcs.w r1, r3, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
 ; CHECK-NEXT:    vmov r1, r2, d1
 ; CHECK-NEXT:    rsbs.w r1, r1, #-2147483648
 ; CHECK-NEXT:    sbcs.w r1, r3, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    adr r0, .LCPI12_1
@@ -233,17 +225,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
 ; CHECK-NEXT:    movs r3, #0
 ; CHECK-NEXT:    rsbs.w r0, r0, #-2147483648
 ; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    bfi r3, r1, #0, #8
 ; CHECK-NEXT:    vmov r1, r2, d1
 ; CHECK-NEXT:    rsbs.w r1, r1, #-2147483648
 ; CHECK-NEXT:    sbcs.w r1, r12, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r3, r1, #8, #8
 ; CHECK-NEXT:    adr r1, .LCPI13_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
@@ -253,16 +241,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
 ; CHECK-NEXT:    vmov r1, r2, d0
 ; CHECK-NEXT:    subs r1, r1, r3
 ; CHECK-NEXT:    sbcs r1, r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #0, #8
 ; CHECK-NEXT:    vmov r1, r2, d1
 ; CHECK-NEXT:    subs r1, r1, r3
 ; CHECK-NEXT:    sbcs r1, r2, #0
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    csetm r1, lt
 ; CHECK-NEXT:    bfi r0, r1, #8, #8
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    adr r0, .LCPI13_1
@@ -297,16 +281,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) {
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r2, d1
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r2, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -325,16 +305,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) {
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r2, d1
 ; CHECK-NEXT:    subs.w r0, r0, #-1
 ; CHECK-NEXT:    sbcs r0, r2, #0
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r1, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll
index 1220ca2f60700..f78d36222c312 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll
@@ -182,49 +182,41 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    vmov r2, r1, d1
+; CHECK-NEXT:    vmov r0, r1, d1
 ; CHECK-NEXT:    mvn r12, #-2147483648
-; CHECK-NEXT:    vmov r0, r3, d0
-; CHECK-NEXT:    asrl r2, r1, #3
-; CHECK-NEXT:    asrl r0, r3, #3
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
-; CHECK-NEXT:    subs.w r0, r0, r12
-; CHECK-NEXT:    sbcs r0, r3, #0
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    asrl r0, r1, #3
+; CHECK-NEXT:    asrl r2, r3, #3
+; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
+; CHECK-NEXT:    subs.w r2, r2, r12
+; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
-; CHECK-NEXT:    cset r0, lt
+; CHECK-NEXT:    csetm lr, lt
+; CHECK-NEXT:    subs.w r0, r0, r12
+; CHECK-NEXT:    mov.w r2, #0
+; CHECK-NEXT:    sbcs r0, r1, #0
+; CHECK-NEXT:    bfi r2, lr, #0, #8
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r2, r0, #8, #8
+; CHECK-NEXT:    adr r0, .LCPI12_0
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmsr p0, r2
+; CHECK-NEXT:    mov.w r2, #-1
 ; CHECK-NEXT:    movs r3, #0
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    csetm lr, ne
-; CHECK-NEXT:    subs.w r2, r2, r12
-; CHECK-NEXT:    sbcs r1, r1, #0
-; CHECK-NEXT:    bfi r3, lr, #0, #8
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    bfi r3, r1, #8, #8
-; CHECK-NEXT:    adr r1, .LCPI12_0
-; CHECK-NEXT:    vldrw.u32 q1, [r1]
-; CHECK-NEXT:    vmsr p0, r3
-; CHECK-NEXT:    mov.w r3, #-1
 ; CHECK-NEXT:    vpsel q0, q0, q1
-; CHECK-NEXT:    vmov r1, r2, d0
-; CHECK-NEXT:    rsbs.w r1, r1, #-2147483648
-; CHECK-NEXT:    sbcs.w r1, r3, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    bfi r0, r1, #0, #8
-; CHECK-NEXT:    vmov r1, r2, d1
-; CHECK-NEXT:    rsbs.w r1, r1, #-2147483648
-; CHECK-NEXT:    sbcs.w r1, r3, r2
-; CHECK-NEXT:    cset r1, lt
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    bfi r0, r1, #8, #8
-; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    rsbs.w r0, r0, #-2147483648
+; CHECK-NEXT:    sbcs.w r0, r2, r1
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r3, r0, #0, #8
+; CHECK-NEXT:    vmov r0, r1, d1
+; CHECK-NEXT:    rsbs.w r0, r0, #-2147483648
+; CHECK-NEXT:    sbcs.w r0, r2, r1
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    adr r0, .LCPI12_1
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    pop {r7, pc}
 ; CHECK-NEXT:    .p2align 4
@@ -251,53 +243,45 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) {
 ; CHECK-LABEL: vqshrni64_sminmax:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-NEXT:    vmov r2, r1, d0
 ; CHECK-NEXT:    mov.w r12, #-1
 ; CHECK-NEXT:    asrl r2, r1, #3
-; CHECK-NEXT:    mov.w lr, #0
+; CHECK-NEXT:    vmov r4, r5, d1
 ; CHECK-NEXT:    rsbs.w r0, r2, #-2147483648
+; CHECK-NEXT:    asrl r4, r5, #3
 ; CHECK-NEXT:    sbcs.w r0, r12, r1
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    bfi r0, r3, #0, #8
-; CHECK-NEXT:    vmov r4, r3, d1
-; CHECK-NEXT:    asrl r4, r3, #3
-; CHECK-NEXT:    rsbs.w r5, r4, #-2147483648
+; CHECK-NEXT:    mov.w r3, #0
+; CHECK-NEXT:    csetm lr, lt
+; CHECK-NEXT:    rsbs.w r0, r4, #-2147483648
+; CHECK-NEXT:    sbcs.w r0, r12, r5
+; CHECK-NEXT:    bfi r3, lr, #0, #8
+; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    vmov q0[2], q0[0], r2, r4
-; CHECK-NEXT:    sbcs.w r5, r12, r3
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
-; CHECK-NEXT:    cset r5, lt
-; CHECK-NEXT:    mvn r2, #-2147483648
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csetm r5, ne
-; CHECK-NEXT:    bfi r0, r5, #8, #8
-; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    adr r0, .LCPI13_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
+; CHECK-NEXT:    mvn r2, #-2147483648
 ; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    movs r6, #0
 ; CHECK-NEXT:    vmov r0, r1, d0
 ; CHECK-NEXT:    subs r0, r0, r2
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    bfi lr, r0, #0, #8
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r6, r0, #0, #8
 ; CHECK-NEXT:    vmov r0, r1, d1
 ; CHECK-NEXT:    subs r0, r0, r2
 ; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    cset r0, lt
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    bfi lr, r0, #8, #8
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r6, r0, #8, #8
 ; CHECK-NEXT:    adr r0, .LCPI13_1
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
-; CHECK-NEXT:    vmsr p0, lr
+; CHECK-NEXT:    vmsr p0, r6
 ; CHECK-NEXT:    vpsel q0, q0, q1
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI13_0:
@@ -331,16 +315,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) {
 ; CHECK-NEXT:    subs.w r2, r2, #-1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    movs r3, #0
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
+; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q0, q0, q1
@@ -364,16 +344,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) {
 ; CHECK-NEXT:    subs.w r2, r2, #-1
 ; CHECK-NEXT:    sbcs r2, r3, #0
 ; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
-; CHECK-NEXT:    cset r2, lo
-; CHECK-NEXT:    movs r3, #0
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    csetm r2, lo
 ; CHECK-NEXT:    subs.w r0, r0, #-1
+; CHECK-NEXT:    mov.w r3, #0
 ; CHECK-NEXT:    sbcs r0, r1, #0
 ; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    cset r0, lo
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    csetm r0, lo
 ; CHECK-NEXT:    bfi r3, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r3
 ; CHECK-NEXT:    vpsel q0, q0, q1


        


More information about the llvm-commits mailing list