[llvm] ad12e6e - [ARM] Turn sext_inreg(VGetLaneu) into VGetLaneu

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 1 03:10:56 PST 2021


Author: David Green
Date: 2021-02-01T11:10:35Z
New Revision: ad12e6ee9579149c0efb594211fa3fb8aed2d84f

URL: https://github.com/llvm/llvm-project/commit/ad12e6ee9579149c0efb594211fa3fb8aed2d84f
DIFF: https://github.com/llvm/llvm-project/commit/ad12e6ee9579149c0efb594211fa3fb8aed2d84f.diff

LOG: [ARM] Turn sext_inreg(VGetLaneu) into VGetLaneu

This adds a DAG combine for converting sext_inreg of VGetLaneu into
VGetLanes, providing the types match correctly.

Differential Revision: https://reviews.llvm.org/D95073

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
    llvm/test/CodeGen/Thumb2/mve-div-expand.ll
    llvm/test/CodeGen/Thumb2/mve-vcvt.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
    llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8f2f07d71994..6ae12cc4c6ca 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -977,6 +977,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
     setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+    setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
     setTargetDAGCombine(ISD::STORE);
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -13985,6 +13986,20 @@ static SDValue PerformExtractEltCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // sext_inreg(VGETLANEu) -> VGETLANEs
+  if (Op.getOpcode() == ARMISD::VGETLANEu &&
+      cast<VTSDNode>(N->getOperand(1))->getVT() ==
+          Op.getOperand(0).getValueType().getScalarType())
+    return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
+                       Op.getOperand(1));
+
+  return SDValue();
+}
+
 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
 /// ISD::VECTOR_SHUFFLE.
 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
@@ -16356,6 +16371,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   case ISD::EXTRACT_VECTOR_ELT:
     return PerformExtractEltCombine(N, DCI, Subtarget);
+  case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
   case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);

diff  --git a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 643468c6dc8b..cf884f340ebf 100644
--- a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
 
 define float @f(<4 x i16>* nocapture %in) {
@@ -64,12 +65,10 @@ define <4 x i32> @h(<4 x i8> *%in) {
 }
 
 define float @i(<4 x i16>* nocapture %in) {
-  ; FIXME: The vmov.u + sxt can convert to a vmov.s
 ; CHECK-LABEL: i:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vmov.u16 r0, d16[0]
-; CHECK-NEXT:    sxth r0, r0
+; CHECK-NEXT:    vmov.s16 r0, d16[0]
 ; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    vcvt.f32.s32 s0, s0
 ; CHECK-NEXT:    vmov r0, s0
@@ -96,12 +95,10 @@ define float @j(<8 x i8>* nocapture %in) {
 }
 
 define float @k(<8 x i8>* nocapture %in) {
-; FIXME: The vmov.u + sxt can convert to a vmov.s
 ; CHECK-LABEL: k:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vmov.u8 r0, d16[7]
-; CHECK-NEXT:    sxtb r0, r0
+; CHECK-NEXT:    vmov.s8 r0, d16[7]
 ; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    vcvt.f32.s32 s0, s0
 ; CHECK-NEXT:    vmov r0, s0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
index 5704ca95e2b6..2c16b818d557 100644
--- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
@@ -154,58 +154,40 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @sdiv_i16(<8 x i16> %in1, <8 x i16> %in2) {
 ; CHECK-LABEL: sdiv_i16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    vmov.u16 r0, q1[3]
-; CHECK-NEXT:    vmov.u16 r1, q0[3]
-; CHECK-NEXT:    sxth r0, r0
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    vmov.u16 r2, q0[2]
-; CHECK-NEXT:    sdiv r12, r1, r0
-; CHECK-NEXT:    vmov.u16 r1, q1[2]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    vmov.u16 r4, q1[6]
-; CHECK-NEXT:    sdiv r3, r2, r1
-; CHECK-NEXT:    vmov.u16 r1, q1[1]
-; CHECK-NEXT:    vmov.u16 r2, q0[1]
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    vmov.u16 r5, q0[6]
-; CHECK-NEXT:    sdiv r0, r2, r1
-; CHECK-NEXT:    vmov.u16 r1, q1[0]
-; CHECK-NEXT:    vmov.u16 r2, q0[0]
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    vmov.s16 r0, q1[0]
+; CHECK-NEXT:    vmov.s16 r1, q0[0]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s16 r1, q1[1]
+; CHECK-NEXT:    vmov.s16 r2, q0[1]
+; CHECK-NEXT:    vmov.16 q2[0], r0
 ; CHECK-NEXT:    sdiv r1, r2, r1
-; CHECK-NEXT:    vmov.u16 r2, q1[7]
-; CHECK-NEXT:    vmov.16 q2[0], r1
-; CHECK-NEXT:    sxth.w lr, r2
-; CHECK-NEXT:    vmov.16 q2[1], r0
-; CHECK-NEXT:    vmov.u16 r2, q0[7]
-; CHECK-NEXT:    vmov.16 q2[2], r3
-; CHECK-NEXT:    vmov.u16 r3, q1[4]
-; CHECK-NEXT:    sxth r6, r2
-; CHECK-NEXT:    vmov.u16 r2, q0[4]
-; CHECK-NEXT:    vmov.u16 r1, q1[5]
-; CHECK-NEXT:    vmov.u16 r0, q0[5]
-; CHECK-NEXT:    sxth r3, r3
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    sxth r0, r0
-; CHECK-NEXT:    vmov.16 q2[3], r12
-; CHECK-NEXT:    sdiv r2, r2, r3
-; CHECK-NEXT:    sxth r5, r5
-; CHECK-NEXT:    vmov.16 q2[4], r2
-; CHECK-NEXT:    sdiv r0, r0, r1
+; CHECK-NEXT:    vmov.s16 r0, q1[2]
+; CHECK-NEXT:    vmov.16 q2[1], r1
+; CHECK-NEXT:    vmov.s16 r1, q0[2]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s16 r1, q0[3]
+; CHECK-NEXT:    vmov.16 q2[2], r0
+; CHECK-NEXT:    vmov.s16 r0, q1[3]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s16 r1, q0[4]
+; CHECK-NEXT:    vmov.16 q2[3], r0
+; CHECK-NEXT:    vmov.s16 r0, q1[4]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s16 r1, q0[5]
+; CHECK-NEXT:    vmov.16 q2[4], r0
+; CHECK-NEXT:    vmov.s16 r0, q1[5]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s16 r1, q0[6]
 ; CHECK-NEXT:    vmov.16 q2[5], r0
-; CHECK-NEXT:    sdiv r0, r5, r4
+; CHECK-NEXT:    vmov.s16 r0, q1[6]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s16 r1, q0[7]
 ; CHECK-NEXT:    vmov.16 q2[6], r0
-; CHECK-NEXT:    sdiv r0, r6, lr
+; CHECK-NEXT:    vmov.s16 r0, q1[7]
+; CHECK-NEXT:    sdiv r0, r1, r0
 ; CHECK-NEXT:    vmov.16 q2[7], r0
 ; CHECK-NEXT:    vmov q0, q2
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    bx lr
 entry:
   %out = sdiv <8 x i16> %in1, %in2
   ret <8 x i16> %out
@@ -265,65 +247,49 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @srem_i16(<8 x i16> %in1, <8 x i16> %in2) {
 ; CHECK-LABEL: srem_i16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    vmov.u16 r5, q1[6]
-; CHECK-NEXT:    vmov.u16 r6, q0[6]
-; CHECK-NEXT:    sxth r5, r5
-; CHECK-NEXT:    sxth r6, r6
-; CHECK-NEXT:    vmov.u16 r0, q1[0]
-; CHECK-NEXT:    sdiv r7, r6, r5
-; CHECK-NEXT:    vmov.u16 r2, q1[7]
-; CHECK-NEXT:    sxth.w r8, r0
-; CHECK-NEXT:    vmov.u16 r0, q1[3]
-; CHECK-NEXT:    mls r12, r7, r5, r6
-; CHECK-NEXT:    vmov.u16 r7, q0[7]
-; CHECK-NEXT:    sxth r3, r0
-; CHECK-NEXT:    vmov.u16 r0, q1[2]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r7, r7
-; CHECK-NEXT:    sxth r4, r0
-; CHECK-NEXT:    vmov.u16 r0, q1[5]
-; CHECK-NEXT:    sdiv r6, r7, r2
-; CHECK-NEXT:    mls lr, r6, r2, r7
-; CHECK-NEXT:    vmov.u16 r2, q0[4]
-; CHECK-NEXT:    sxth r1, r0
-; CHECK-NEXT:    vmov.u16 r0, q1[4]
-; CHECK-NEXT:    sxth r0, r0
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sdiv r5, r2, r0
-; CHECK-NEXT:    vmov.u16 r6, q0[1]
-; CHECK-NEXT:    mls r0, r5, r0, r2
-; CHECK-NEXT:    vmov.u16 r2, q0[5]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sdiv r5, r2, r1
-; CHECK-NEXT:    sxth r6, r6
-; CHECK-NEXT:    mls r1, r5, r1, r2
-; CHECK-NEXT:    vmov.u16 r2, q0[2]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sdiv r5, r2, r4
-; CHECK-NEXT:    mls r2, r5, r4, r2
-; CHECK-NEXT:    vmov.u16 r4, q0[3]
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    vmov.s16 r0, q1[6]
+; CHECK-NEXT:    vmov.s16 r1, q0[6]
+; CHECK-NEXT:    sdiv r2, r1, r0
+; CHECK-NEXT:    mls r12, r2, r0, r1
+; CHECK-NEXT:    vmov.s16 r1, q1[7]
+; CHECK-NEXT:    vmov.s16 r2, q0[7]
+; CHECK-NEXT:    sdiv r3, r2, r1
+; CHECK-NEXT:    mls lr, r3, r1, r2
+; CHECK-NEXT:    vmov.s16 r2, q1[4]
+; CHECK-NEXT:    vmov.s16 r3, q0[4]
+; CHECK-NEXT:    sdiv r0, r3, r2
+; CHECK-NEXT:    mls r2, r0, r2, r3
+; CHECK-NEXT:    vmov.s16 r0, q1[5]
+; CHECK-NEXT:    vmov.s16 r3, q0[5]
+; CHECK-NEXT:    sdiv r1, r3, r0
+; CHECK-NEXT:    mls r0, r1, r0, r3
+; CHECK-NEXT:    vmov.s16 r1, q1[2]
+; CHECK-NEXT:    vmov.s16 r3, q0[2]
+; CHECK-NEXT:    sdiv r4, r3, r1
+; CHECK-NEXT:    mls r1, r4, r1, r3
+; CHECK-NEXT:    vmov.s16 r3, q1[3]
+; CHECK-NEXT:    vmov.s16 r4, q0[3]
 ; CHECK-NEXT:    sdiv r5, r4, r3
 ; CHECK-NEXT:    mls r3, r5, r3, r4
-; CHECK-NEXT:    vmov.u16 r4, q0[0]
-; CHECK-NEXT:    sxth r4, r4
-; CHECK-NEXT:    sdiv r5, r4, r8
-; CHECK-NEXT:    mls r4, r5, r8, r4
-; CHECK-NEXT:    vmov.u16 r5, q1[1]
-; CHECK-NEXT:    sxth r5, r5
+; CHECK-NEXT:    vmov.s16 r4, q1[0]
+; CHECK-NEXT:    vmov.s16 r5, q0[0]
+; CHECK-NEXT:    sdiv r6, r5, r4
+; CHECK-NEXT:    mls r4, r6, r4, r5
+; CHECK-NEXT:    vmov.s16 r6, q0[1]
+; CHECK-NEXT:    vmov.s16 r5, q1[1]
 ; CHECK-NEXT:    sdiv r7, r6, r5
 ; CHECK-NEXT:    vmov.16 q0[0], r4
 ; CHECK-NEXT:    mls r5, r7, r5, r6
 ; CHECK-NEXT:    vmov.16 q0[1], r5
-; CHECK-NEXT:    vmov.16 q0[2], r2
+; CHECK-NEXT:    vmov.16 q0[2], r1
 ; CHECK-NEXT:    vmov.16 q0[3], r3
-; CHECK-NEXT:    vmov.16 q0[4], r0
-; CHECK-NEXT:    vmov.16 q0[5], r1
+; CHECK-NEXT:    vmov.16 q0[4], r2
+; CHECK-NEXT:    vmov.16 q0[5], r0
 ; CHECK-NEXT:    vmov.16 q0[6], r12
 ; CHECK-NEXT:    vmov.16 q0[7], lr
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %out = srem <8 x i16> %in1, %in2
   ret <8 x i16> %out
@@ -407,106 +373,72 @@ entry:
 define arm_aapcs_vfpcc <16 x i8> @sdiv_i8(<16 x i8> %in1, <16 x i8> %in2) {
 ; CHECK-LABEL: sdiv_i8:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    vmov.u8 r0, q1[1]
-; CHECK-NEXT:    vmov.u8 r1, q0[1]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    vmov.u8 r2, q0[0]
+; CHECK-NEXT:    vmov.s8 r0, q1[0]
+; CHECK-NEXT:    vmov.s8 r1, q0[0]
 ; CHECK-NEXT:    sdiv r0, r1, r0
-; CHECK-NEXT:    vmov.u8 r1, q1[0]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    vmov.u8 r4, q1[3]
+; CHECK-NEXT:    vmov.s8 r1, q1[1]
+; CHECK-NEXT:    vmov.s8 r2, q0[1]
+; CHECK-NEXT:    vmov.8 q2[0], r0
 ; CHECK-NEXT:    sdiv r1, r2, r1
-; CHECK-NEXT:    vmov.u8 r5, q0[3]
-; CHECK-NEXT:    vmov.8 q2[0], r1
-; CHECK-NEXT:    vmov.u8 r1, q1[2]
-; CHECK-NEXT:    vmov.8 q2[1], r0
-; CHECK-NEXT:    vmov.u8 r0, q0[2]
-; CHECK-NEXT:    vmov.u8 r2, q1[11]
-; CHECK-NEXT:    vmov.u8 r3, q0[11]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sdiv r0, r0, r1
-; CHECK-NEXT:    sxtb.w r12, r2
-; CHECK-NEXT:    sxtb.w lr, r3
-; CHECK-NEXT:    vmov.u8 r2, q1[4]
-; CHECK-NEXT:    vmov.u8 r3, q0[4]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r0, q1[2]
+; CHECK-NEXT:    vmov.8 q2[1], r1
+; CHECK-NEXT:    vmov.s8 r1, q0[2]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[3]
 ; CHECK-NEXT:    vmov.8 q2[2], r0
-; CHECK-NEXT:    sdiv r0, r5, r4
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q1[3]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[4]
 ; CHECK-NEXT:    vmov.8 q2[3], r0
-; CHECK-NEXT:    sdiv r0, r3, r2
-; CHECK-NEXT:    vmov.u8 r1, q0[10]
+; CHECK-NEXT:    vmov.s8 r0, q1[4]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[5]
 ; CHECK-NEXT:    vmov.8 q2[4], r0
-; CHECK-NEXT:    vmov.u8 r0, q1[10]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sdiv r12, lr, r12
-; CHECK-NEXT:    sdiv lr, r1, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[9]
-; CHECK-NEXT:    vmov.u8 r1, q0[9]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sdiv r2, r1, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[8]
-; CHECK-NEXT:    vmov.u8 r1, q0[8]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    vmov.u8 r3, q0[7]
-; CHECK-NEXT:    sdiv r1, r1, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[7]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sdiv r4, r3, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[6]
-; CHECK-NEXT:    vmov.u8 r3, q0[6]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    vmov.u8 r6, q0[12]
-; CHECK-NEXT:    sdiv r5, r3, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[5]
-; CHECK-NEXT:    vmov.u8 r3, q0[5]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sdiv r0, r3, r0
-; CHECK-NEXT:    vmov.u8 r3, q1[15]
+; CHECK-NEXT:    vmov.s8 r0, q1[5]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[6]
 ; CHECK-NEXT:    vmov.8 q2[5], r0
-; CHECK-NEXT:    sxtb r7, r3
-; CHECK-NEXT:    vmov.8 q2[6], r5
-; CHECK-NEXT:    vmov.u8 r3, q1[12]
-; CHECK-NEXT:    vmov.8 q2[7], r4
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    vmov.8 q2[8], r1
-; CHECK-NEXT:    vmov.u8 r1, q1[13]
-; CHECK-NEXT:    vmov.8 q2[9], r2
-; CHECK-NEXT:    vmov.u8 r2, q0[13]
-; CHECK-NEXT:    vmov.8 q2[10], lr
-; CHECK-NEXT:    vmov.u8 r5, q1[14]
-; CHECK-NEXT:    vmov.u8 r4, q0[14]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    vmov.8 q2[11], r12
-; CHECK-NEXT:    sdiv r3, r6, r3
-; CHECK-NEXT:    vmov.u8 r0, q0[15]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov.8 q2[12], r3
-; CHECK-NEXT:    sdiv r1, r2, r1
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    vmov.8 q2[13], r1
-; CHECK-NEXT:    sdiv r1, r4, r5
-; CHECK-NEXT:    sdiv r0, r0, r7
-; CHECK-NEXT:    vmov.8 q2[14], r1
+; CHECK-NEXT:    vmov.s8 r0, q1[6]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[7]
+; CHECK-NEXT:    vmov.8 q2[6], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[7]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[8]
+; CHECK-NEXT:    vmov.8 q2[7], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[8]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[9]
+; CHECK-NEXT:    vmov.8 q2[8], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[9]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[10]
+; CHECK-NEXT:    vmov.8 q2[9], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[10]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[11]
+; CHECK-NEXT:    vmov.8 q2[10], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[11]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[12]
+; CHECK-NEXT:    vmov.8 q2[11], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[12]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[13]
+; CHECK-NEXT:    vmov.8 q2[12], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[13]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[14]
+; CHECK-NEXT:    vmov.8 q2[13], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[14]
+; CHECK-NEXT:    sdiv r0, r1, r0
+; CHECK-NEXT:    vmov.s8 r1, q0[15]
+; CHECK-NEXT:    vmov.8 q2[14], r0
+; CHECK-NEXT:    vmov.s8 r0, q1[15]
+; CHECK-NEXT:    sdiv r0, r1, r0
 ; CHECK-NEXT:    vmov.8 q2[15], r0
 ; CHECK-NEXT:    vmov q0, q2
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    bx lr
 entry:
   %out = sdiv <16 x i8> %in1, %in2
   ret <16 x i8> %out
@@ -607,122 +539,90 @@ entry:
 define arm_aapcs_vfpcc <16 x i8> @srem_i8(<16 x i8> %in1, <16 x i8> %in2) {
 ; CHECK-LABEL: srem_i8:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    vmov.u8 r5, q1[14]
-; CHECK-NEXT:    vmov.u8 r6, q0[14]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sdiv r7, r6, r5
-; CHECK-NEXT:    vmov.u8 r4, q1[15]
-; CHECK-NEXT:    mls r12, r7, r5, r6
-; CHECK-NEXT:    vmov.u8 r7, q0[15]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov.u8 r2, q1[13]
-; CHECK-NEXT:    sxtb r7, r7
-; CHECK-NEXT:    sxtb r3, r2
-; CHECK-NEXT:    sdiv r6, r7, r4
-; CHECK-NEXT:    vmov.u8 r2, q1[12]
-; CHECK-NEXT:    mls lr, r6, r4, r7
-; CHECK-NEXT:    vmov.u8 r4, q0[12]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    vmov.u8 r0, q1[8]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb.w r8, r0
-; CHECK-NEXT:    sdiv r5, r4, r2
-; CHECK-NEXT:    vmov.u8 r0, q1[11]
-; CHECK-NEXT:    mls r9, r5, r2, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[13]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov.u8 r6, q0[0]
-; CHECK-NEXT:    sdiv r5, r4, r3
-; CHECK-NEXT:    sxtb r1, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[10]
-; CHECK-NEXT:    mls r3, r5, r3, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[10]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sdiv r5, r4, r0
-; CHECK-NEXT:    mls r0, r5, r0, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[11]
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    vmov.s8 r0, q1[14]
+; CHECK-NEXT:    vmov.s8 r1, q0[14]
+; CHECK-NEXT:    sdiv r2, r1, r0
+; CHECK-NEXT:    mls r12, r2, r0, r1
+; CHECK-NEXT:    vmov.s8 r0, q1[15]
+; CHECK-NEXT:    vmov.s8 r1, q0[15]
+; CHECK-NEXT:    sdiv r2, r1, r0
+; CHECK-NEXT:    mls lr, r2, r0, r1
+; CHECK-NEXT:    vmov.s8 r0, q1[12]
+; CHECK-NEXT:    vmov.s8 r1, q0[12]
+; CHECK-NEXT:    sdiv r2, r1, r0
+; CHECK-NEXT:    mls r8, r2, r0, r1
+; CHECK-NEXT:    vmov.s8 r0, q1[13]
+; CHECK-NEXT:    vmov.s8 r1, q0[13]
+; CHECK-NEXT:    sdiv r3, r1, r0
+; CHECK-NEXT:    mls r3, r3, r0, r1
+; CHECK-NEXT:    vmov.s8 r0, q1[10]
+; CHECK-NEXT:    vmov.s8 r1, q0[10]
+; CHECK-NEXT:    sdiv r4, r1, r0
+; CHECK-NEXT:    mls r0, r4, r0, r1
+; CHECK-NEXT:    vmov.s8 r1, q1[11]
+; CHECK-NEXT:    vmov.s8 r4, q0[11]
 ; CHECK-NEXT:    sdiv r5, r4, r1
 ; CHECK-NEXT:    mls r1, r5, r1, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[8]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sdiv r5, r4, r8
-; CHECK-NEXT:    mls r4, r5, r8, r4
-; CHECK-NEXT:    vmov.u8 r5, q1[0]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r4, q1[8]
+; CHECK-NEXT:    vmov.s8 r5, q0[8]
+; CHECK-NEXT:    sdiv r6, r5, r4
+; CHECK-NEXT:    mls r4, r6, r4, r5
+; CHECK-NEXT:    vmov.s8 r5, q1[0]
+; CHECK-NEXT:    vmov.s8 r6, q0[0]
 ; CHECK-NEXT:    sdiv r7, r6, r5
 ; CHECK-NEXT:    mls r5, r7, r5, r6
-; CHECK-NEXT:    vmov.u8 r6, q1[1]
-; CHECK-NEXT:    vmov.u8 r7, q0[1]
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sxtb r7, r7
-; CHECK-NEXT:    vmov.8 q2[0], r5
+; CHECK-NEXT:    vmov.s8 r6, q1[1]
+; CHECK-NEXT:    vmov.s8 r7, q0[1]
 ; CHECK-NEXT:    sdiv r2, r7, r6
-; CHECK-NEXT:    vmov.u8 r5, q0[2]
+; CHECK-NEXT:    vmov.8 q2[0], r5
 ; CHECK-NEXT:    mls r2, r2, r6, r7
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[2]
 ; CHECK-NEXT:    vmov.8 q2[1], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[2]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[2]
 ; CHECK-NEXT:    sdiv r6, r5, r2
 ; CHECK-NEXT:    mls r2, r6, r2, r5
-; CHECK-NEXT:    vmov.u8 r5, q0[3]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[3]
 ; CHECK-NEXT:    vmov.8 q2[2], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[3]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[3]
 ; CHECK-NEXT:    sdiv r6, r5, r2
 ; CHECK-NEXT:    mls r2, r6, r2, r5
-; CHECK-NEXT:    vmov.u8 r5, q0[4]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[4]
 ; CHECK-NEXT:    vmov.8 q2[3], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[4]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[4]
 ; CHECK-NEXT:    sdiv r6, r5, r2
 ; CHECK-NEXT:    mls r2, r6, r2, r5
-; CHECK-NEXT:    vmov.u8 r5, q0[5]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[5]
 ; CHECK-NEXT:    vmov.8 q2[4], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[5]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[5]
 ; CHECK-NEXT:    sdiv r6, r5, r2
 ; CHECK-NEXT:    mls r2, r6, r2, r5
-; CHECK-NEXT:    vmov.u8 r5, q0[6]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[6]
 ; CHECK-NEXT:    vmov.8 q2[5], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[6]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[6]
 ; CHECK-NEXT:    sdiv r6, r5, r2
 ; CHECK-NEXT:    mls r2, r6, r2, r5
-; CHECK-NEXT:    vmov.u8 r5, q0[7]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[7]
 ; CHECK-NEXT:    vmov.8 q2[6], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[7]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[7]
 ; CHECK-NEXT:    sdiv r6, r5, r2
 ; CHECK-NEXT:    mls r2, r6, r2, r5
-; CHECK-NEXT:    vmov.u8 r5, q0[9]
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r5, q0[9]
 ; CHECK-NEXT:    vmov.8 q2[7], r2
-; CHECK-NEXT:    vmov.u8 r2, q1[9]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    vmov.8 q2[8], r4
+; CHECK-NEXT:    vmov.s8 r2, q1[9]
 ; CHECK-NEXT:    sdiv r6, r5, r2
+; CHECK-NEXT:    vmov.8 q2[8], r4
 ; CHECK-NEXT:    mls r2, r6, r2, r5
 ; CHECK-NEXT:    vmov.8 q2[9], r2
 ; CHECK-NEXT:    vmov.8 q2[10], r0
 ; CHECK-NEXT:    vmov.8 q2[11], r1
-; CHECK-NEXT:    vmov.8 q2[12], r9
+; CHECK-NEXT:    vmov.8 q2[12], r8
 ; CHECK-NEXT:    vmov.8 q2[13], r3
 ; CHECK-NEXT:    vmov.8 q2[14], r12
 ; CHECK-NEXT:    vmov.8 q2[15], lr
 ; CHECK-NEXT:    vmov q0, q2
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %out = srem <16 x i8> %in1, %in2
   ret <16 x i8> %out

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
index 851be7124e5b..93c8af4928fa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
@@ -91,10 +91,8 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
 ; CHECK-MVE-LABEL: foo_half_int16:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
-; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
-; CHECK-MVE-NEXT:    sxth r0, r0
-; CHECK-MVE-NEXT:    sxth r1, r1
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.s16 r1, q0[1]
 ; CHECK-MVE-NEXT:    vmov s4, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s4, s4
 ; CHECK-MVE-NEXT:    vmov r0, s4
@@ -102,39 +100,33 @@ define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s4, s4
 ; CHECK-MVE-NEXT:    vmov r1, s4
 ; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[2]
 ; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
-; CHECK-MVE-NEXT:    sxth r0, r0
 ; CHECK-MVE-NEXT:    vmov s8, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
 ; CHECK-MVE-NEXT:    vmov r0, s8
 ; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
-; CHECK-MVE-NEXT:    sxth r0, r0
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[3]
 ; CHECK-MVE-NEXT:    vmov s8, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
 ; CHECK-MVE-NEXT:    vmov r0, s8
 ; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
-; CHECK-MVE-NEXT:    sxth r0, r0
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[4]
 ; CHECK-MVE-NEXT:    vmov s8, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
 ; CHECK-MVE-NEXT:    vmov r0, s8
 ; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
-; CHECK-MVE-NEXT:    sxth r0, r0
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[5]
 ; CHECK-MVE-NEXT:    vmov s8, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
 ; CHECK-MVE-NEXT:    vmov r0, s8
 ; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
-; CHECK-MVE-NEXT:    sxth r0, r0
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[6]
 ; CHECK-MVE-NEXT:    vmov s8, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
 ; CHECK-MVE-NEXT:    vmov r0, s8
 ; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
-; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
-; CHECK-MVE-NEXT:    sxth r0, r0
+; CHECK-MVE-NEXT:    vmov.s16 r0, q0[7]
 ; CHECK-MVE-NEXT:    vmov s0, r0
 ; CHECK-MVE-NEXT:    vcvt.f16.s32 s0, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
index 49bd421d17aa..4b48861a6fc1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
@@ -178,10 +178,8 @@ entry:
 define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
 ; CHECK-LABEL: add_v8i16_v8i64_sext:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.u16 r0, q0[1]
-; CHECK-NEXT:    vmov.u16 r1, q0[0]
-; CHECK-NEXT:    sxth r0, r0
-; CHECK-NEXT:    sxth r1, r1
+; CHECK-NEXT:    vmov.s16 r0, q0[1]
+; CHECK-NEXT:    vmov.s16 r1, q0[0]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
 ; CHECK-NEXT:    asrs r2, r0, #31
 ; CHECK-NEXT:    asrs r1, r1, #31
@@ -190,11 +188,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    vmov r1, s5
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u16 r3, q0[2]
+; CHECK-NEXT:    vmov.s16 r3, q0[2]
 ; CHECK-NEXT:    adc.w r12, r1, r0, asr #31
-; CHECK-NEXT:    vmov.u16 r1, q0[3]
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r1, q0[3]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -205,11 +201,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u16 r3, q0[4]
+; CHECK-NEXT:    vmov.s16 r3, q0[4]
 ; CHECK-NEXT:    adc.w r12, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u16 r1, q0[5]
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r1, q0[5]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -220,13 +214,11 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
+; CHECK-NEXT:    vmov.s16 r3, q0[7]
 ; CHECK-NEXT:    adc.w r0, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u16 r1, q0[6]
-; CHECK-NEXT:    sxth r1, r1
+; CHECK-NEXT:    vmov.s16 r1, q0[6]
 ; CHECK-NEXT:    adds r2, r2, r1
 ; CHECK-NEXT:    adc.w r1, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u16 r0, q0[7]
-; CHECK-NEXT:    sxth r3, r0
 ; CHECK-NEXT:    adds r0, r2, r3
 ; CHECK-NEXT:    adc.w r1, r1, r3, asr #31
 ; CHECK-NEXT:    bx lr
@@ -545,10 +537,8 @@ entry:
 define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-LABEL: add_v16i8_v16i64_sext:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.u8 r0, q0[1]
-; CHECK-NEXT:    vmov.u8 r1, q0[0]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r0, q0[1]
+; CHECK-NEXT:    vmov.s8 r1, q0[0]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
 ; CHECK-NEXT:    asrs r2, r0, #31
 ; CHECK-NEXT:    asrs r1, r1, #31
@@ -557,11 +547,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    vmov r1, s5
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[2]
+; CHECK-NEXT:    vmov.s8 r3, q0[2]
 ; CHECK-NEXT:    adc.w r12, r1, r0, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[3]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r1, q0[3]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -572,11 +560,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[4]
+; CHECK-NEXT:    vmov.s8 r3, q0[4]
 ; CHECK-NEXT:    adc.w r12, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[5]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r1, q0[5]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -587,11 +573,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[6]
+; CHECK-NEXT:    vmov.s8 r3, q0[6]
 ; CHECK-NEXT:    adc.w r12, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[7]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r1, q0[7]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -602,11 +586,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[8]
+; CHECK-NEXT:    vmov.s8 r3, q0[8]
 ; CHECK-NEXT:    adc.w r12, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[9]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r1, q0[9]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -617,11 +599,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[10]
+; CHECK-NEXT:    vmov.s8 r3, q0[10]
 ; CHECK-NEXT:    adc.w r12, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[11]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r1, q0[11]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -632,11 +612,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[12]
+; CHECK-NEXT:    vmov.s8 r3, q0[12]
 ; CHECK-NEXT:    adc.w r12, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[13]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r1, q0[13]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r1
 ; CHECK-NEXT:    asrs r0, r1, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -647,13 +625,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds r2, r2, r3
+; CHECK-NEXT:    vmov.s8 r3, q0[15]
 ; CHECK-NEXT:    adc.w r0, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r1, q0[14]
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r1, q0[14]
 ; CHECK-NEXT:    adds r2, r2, r1
 ; CHECK-NEXT:    adc.w r1, r0, r1, asr #31
-; CHECK-NEXT:    vmov.u8 r0, q0[15]
-; CHECK-NEXT:    sxtb r3, r0
 ; CHECK-NEXT:    adds r0, r2, r3
 ; CHECK-NEXT:    adc.w r1, r1, r3, asr #31
 ; CHECK-NEXT:    bx lr
@@ -1051,10 +1027,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    vmov.u16 r2, q0[1]
-; CHECK-NEXT:    vmov.u16 r3, q0[0]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r2, q0[1]
+; CHECK-NEXT:    vmov.s16 r3, q0[0]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
 ; CHECK-NEXT:    asr.w r12, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1063,11 +1037,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    vmov r12, s5
 ; CHECK-NEXT:    adds.w lr, lr, r3
-; CHECK-NEXT:    vmov.u16 r3, q0[2]
+; CHECK-NEXT:    vmov.s16 r3, q0[2]
 ; CHECK-NEXT:    adc.w r12, r12, r2, asr #31
-; CHECK-NEXT:    vmov.u16 r2, q0[3]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r2, q0[3]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
 ; CHECK-NEXT:    asrs r4, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1078,11 +1050,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
 ; CHECK-NEXT:    adc.w r12, r12, r3
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adds.w lr, r4, r3
-; CHECK-NEXT:    vmov.u16 r4, q0[5]
+; CHECK-NEXT:    vmov.s16 r4, q0[5]
 ; CHECK-NEXT:    adc.w r12, r12, r2, asr #31
-; CHECK-NEXT:    vmov.u16 r2, q0[4]
-; CHECK-NEXT:    sxth r4, r4
-; CHECK-NEXT:    sxth r2, r2
+; CHECK-NEXT:    vmov.s16 r2, q0[4]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    asrs r3, r4, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1094,12 +1064,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
 ; CHECK-NEXT:    vmov r2, s6
 ; CHECK-NEXT:    adds r2, r2, r3
 ; CHECK-NEXT:    adc.w r3, r12, r4, asr #31
-; CHECK-NEXT:    vmov.u16 r4, q0[6]
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    vmov.s16 r4, q0[6]
 ; CHECK-NEXT:    adds r2, r2, r4
 ; CHECK-NEXT:    adc.w r3, r3, r4, asr #31
-; CHECK-NEXT:    vmov.u16 r4, q0[7]
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    vmov.s16 r4, q0[7]
 ; CHECK-NEXT:    adds r2, r2, r4
 ; CHECK-NEXT:    adc.w r3, r3, r4, asr #31
 ; CHECK-NEXT:    adds r0, r0, r2
@@ -1447,10 +1415,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    vmov.u8 r2, q0[1]
-; CHECK-NEXT:    vmov.u8 r3, q0[0]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q0[1]
+; CHECK-NEXT:    vmov.s8 r3, q0[0]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
 ; CHECK-NEXT:    asr.w r12, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1459,11 +1425,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    vmov r3, s4
 ; CHECK-NEXT:    vmov r12, s5
 ; CHECK-NEXT:    adds.w lr, lr, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[2]
+; CHECK-NEXT:    vmov.s8 r3, q0[2]
 ; CHECK-NEXT:    adc.w r12, r12, r2, asr #31
-; CHECK-NEXT:    vmov.u8 r2, q0[3]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q0[3]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
 ; CHECK-NEXT:    asrs r4, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1474,11 +1438,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    adc.w r12, r12, r3
 ; CHECK-NEXT:    vmov r3, s6
 ; CHECK-NEXT:    adds.w lr, r4, r3
-; CHECK-NEXT:    vmov.u8 r4, q0[5]
+; CHECK-NEXT:    vmov.s8 r4, q0[5]
 ; CHECK-NEXT:    adc.w r12, r12, r2, asr #31
-; CHECK-NEXT:    vmov.u8 r2, q0[4]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q0[4]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    asrs r3, r4, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1489,11 +1451,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    adc.w r12, r12, r2
 ; CHECK-NEXT:    vmov r2, s6
 ; CHECK-NEXT:    adds.w lr, r3, r2
-; CHECK-NEXT:    vmov.u8 r2, q0[6]
+; CHECK-NEXT:    vmov.s8 r2, q0[6]
 ; CHECK-NEXT:    adc.w r12, r12, r4, asr #31
-; CHECK-NEXT:    vmov.u8 r4, q0[7]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r4, q0[7]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    asrs r3, r4, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1504,11 +1464,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    adc.w r12, r12, r2
 ; CHECK-NEXT:    vmov r2, s6
 ; CHECK-NEXT:    adds.w lr, r3, r2
-; CHECK-NEXT:    vmov.u8 r2, q0[8]
+; CHECK-NEXT:    vmov.s8 r2, q0[8]
 ; CHECK-NEXT:    adc.w r12, r12, r4, asr #31
-; CHECK-NEXT:    vmov.u8 r4, q0[9]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r4, q0[9]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    asrs r3, r4, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1519,11 +1477,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    adc.w r12, r12, r2
 ; CHECK-NEXT:    vmov r2, s6
 ; CHECK-NEXT:    adds.w lr, r3, r2
-; CHECK-NEXT:    vmov.u8 r2, q0[10]
+; CHECK-NEXT:    vmov.s8 r2, q0[10]
 ; CHECK-NEXT:    adc.w r12, r12, r4, asr #31
-; CHECK-NEXT:    vmov.u8 r4, q0[11]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r4, q0[11]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    asrs r3, r4, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1534,11 +1490,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    adc.w r12, r12, r2
 ; CHECK-NEXT:    vmov r2, s6
 ; CHECK-NEXT:    adds.w lr, r3, r2
-; CHECK-NEXT:    vmov.u8 r2, q0[12]
+; CHECK-NEXT:    vmov.s8 r2, q0[12]
 ; CHECK-NEXT:    adc.w r12, r12, r4, asr #31
-; CHECK-NEXT:    vmov.u8 r4, q0[13]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r4, q0[13]
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    asrs r3, r4, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1550,12 +1504,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
 ; CHECK-NEXT:    vmov r2, s6
 ; CHECK-NEXT:    adds r2, r2, r3
 ; CHECK-NEXT:    adc.w r3, r12, r4, asr #31
-; CHECK-NEXT:    vmov.u8 r4, q0[14]
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r4, q0[14]
 ; CHECK-NEXT:    adds r2, r2, r4
 ; CHECK-NEXT:    adc.w r3, r3, r4, asr #31
-; CHECK-NEXT:    vmov.u8 r4, q0[15]
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r4, q0[15]
 ; CHECK-NEXT:    adds r2, r2, r4
 ; CHECK-NEXT:    adc.w r3, r3, r4, asr #31
 ; CHECK-NEXT:    adds r0, r0, r2

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
index dc120b98961b..0d6d88c13ab7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
@@ -317,10 +317,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
 ; CHECK-NEXT:    rsbs r1, r1, #0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r1
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r1
-; CHECK-NEXT:    vmov.u16 r1, q0[1]
-; CHECK-NEXT:    vmov.u16 r2, q0[0]
-; CHECK-NEXT:    sxth r1, r1
-; CHECK-NEXT:    sxth r2, r2
+; CHECK-NEXT:    vmov.s16 r1, q0[1]
+; CHECK-NEXT:    vmov.s16 r2, q0[0]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r2, r1
 ; CHECK-NEXT:    asrs r1, r1, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -338,10 +336,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r3
 ; CHECK-NEXT:    vmov q2[3], q2[1], r0, r3
-; CHECK-NEXT:    vmov.u16 r0, q0[3]
-; CHECK-NEXT:    vmov.u16 r3, q0[2]
-; CHECK-NEXT:    sxth r0, r0
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r0, q0[3]
+; CHECK-NEXT:    vmov.s16 r3, q0[2]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r3, r0
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -369,10 +365,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r0, r3
 ; CHECK-NEXT:    vmov q1[3], q1[1], r0, r3
-; CHECK-NEXT:    vmov.u16 r0, q0[5]
-; CHECK-NEXT:    vmov.u16 r3, q0[4]
-; CHECK-NEXT:    sxth r0, r0
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r0, q0[5]
+; CHECK-NEXT:    vmov.s16 r3, q0[4]
 ; CHECK-NEXT:    vmov q2[2], q2[0], r3, r0
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -392,10 +386,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
 ; CHECK-NEXT:    adc.w r1, r1, r12
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
 ; CHECK-NEXT:    vmov q1[3], q1[1], r2, r3
-; CHECK-NEXT:    vmov.u16 r2, q0[7]
-; CHECK-NEXT:    vmov.u16 r3, q0[6]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r3, r3
+; CHECK-NEXT:    vmov.s16 r2, q0[7]
+; CHECK-NEXT:    vmov.s16 r3, q0[6]
 ; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1050,10 +1042,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    rsbs r1, r1, #0
 ; CHECK-NEXT:    vmov q5[2], q5[0], r2, r1
 ; CHECK-NEXT:    vmov q5[3], q5[1], r2, r1
-; CHECK-NEXT:    vmov.u8 r1, q0[1]
-; CHECK-NEXT:    vmov.u8 r2, q0[0]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r1, q0[1]
+; CHECK-NEXT:    vmov.s8 r2, q0[0]
 ; CHECK-NEXT:    vmov q6[2], q6[0], r2, r1
 ; CHECK-NEXT:    asrs r1, r1, #31
 ; CHECK-NEXT:    asrs r2, r2, #31
@@ -1071,10 +1061,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    vmov q5[2], q5[0], r0, r3
 ; CHECK-NEXT:    vmov q5[3], q5[1], r0, r3
-; CHECK-NEXT:    vmov.u8 r0, q0[3]
-; CHECK-NEXT:    vmov.u8 r3, q0[2]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q0[3]
+; CHECK-NEXT:    vmov.s8 r3, q0[2]
 ; CHECK-NEXT:    vmov q6[2], q6[0], r3, r0
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1102,10 +1090,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q4[2], q4[0], r0, r3
 ; CHECK-NEXT:    vmov q4[3], q4[1], r0, r3
-; CHECK-NEXT:    vmov.u8 r0, q0[5]
-; CHECK-NEXT:    vmov.u8 r3, q0[4]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q0[5]
+; CHECK-NEXT:    vmov.s8 r3, q0[4]
 ; CHECK-NEXT:    vmov q5[2], q5[0], r3, r0
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1125,10 +1111,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    adc.w r1, r1, r12
 ; CHECK-NEXT:    vmov q4[2], q4[0], r2, r3
 ; CHECK-NEXT:    vmov q4[3], q4[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q0[7]
-; CHECK-NEXT:    vmov.u8 r3, q0[6]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q0[7]
+; CHECK-NEXT:    vmov.s8 r3, q0[6]
 ; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1174,10 +1158,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r3
 ; CHECK-NEXT:    vmov q2[3], q2[1], r0, r3
-; CHECK-NEXT:    vmov.u8 r0, q0[9]
-; CHECK-NEXT:    vmov.u8 r3, q0[8]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q0[9]
+; CHECK-NEXT:    vmov.s8 r3, q0[8]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r3, r0
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1197,10 +1179,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    adc.w r1, r1, r12
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q0[11]
-; CHECK-NEXT:    vmov.u8 r3, q0[10]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q0[11]
+; CHECK-NEXT:    vmov.s8 r3, q0[10]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1228,10 +1208,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r0, r3
 ; CHECK-NEXT:    vmov q1[3], q1[1], r0, r3
-; CHECK-NEXT:    vmov.u8 r0, q0[13]
-; CHECK-NEXT:    vmov.u8 r3, q0[12]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q0[13]
+; CHECK-NEXT:    vmov.s8 r3, q0[12]
 ; CHECK-NEXT:    vmov q2[2], q2[0], r3, r0
 ; CHECK-NEXT:    asrs r0, r0, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1251,10 +1229,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
 ; CHECK-NEXT:    adc.w r1, r1, r12
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
 ; CHECK-NEXT:    vmov q1[3], q1[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q0[15]
-; CHECK-NEXT:    vmov.u8 r3, q0[14]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q0[15]
+; CHECK-NEXT:    vmov.s8 r3, q0[14]
 ; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r3, r3, #31
@@ -1988,39 +1964,35 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
 ; CHECK-NEXT:    vmov.u16 r2, q1[3]
 ; CHECK-NEXT:    vmov.u16 r3, q1[1]
 ; CHECK-NEXT:    vmov q2[3], q2[1], r3, r2
+; CHECK-NEXT:    vmov.s16 r2, q0[0]
 ; CHECK-NEXT:    vcmp.i32 ne, q2, zr
-; CHECK-NEXT:    vmrs r12, p0
-; CHECK-NEXT:    and r2, r12, #1
-; CHECK-NEXT:    ubfx r3, r12, #4, #1
-; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    vmrs lr, p0
+; CHECK-NEXT:    ubfx r3, lr, #4, #1
+; CHECK-NEXT:    rsb.w r12, r3, #0
+; CHECK-NEXT:    and r3, lr, #1
 ; CHECK-NEXT:    rsbs r3, r3, #0
-; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
-; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
-; CHECK-NEXT:    vmov.u16 r2, q0[1]
-; CHECK-NEXT:    vmov.u16 r3, q0[0]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r3, r3
-; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
-; CHECK-NEXT:    asrs r2, r2, #31
+; CHECK-NEXT:    vmov q2[2], q2[0], r3, r12
+; CHECK-NEXT:    vmov q2[3], q2[1], r3, r12
+; CHECK-NEXT:    vmov.s16 r3, q0[1]
+; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
 ; CHECK-NEXT:    asrs r3, r3, #31
-; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
+; CHECK-NEXT:    asrs r2, r2, #31
+; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
 ; CHECK-NEXT:    vand q2, q3, q2
 ; CHECK-NEXT:    vmov r2, s10
 ; CHECK-NEXT:    vmov r4, s8
-; CHECK-NEXT:    vmov lr, s11
+; CHECK-NEXT:    vmov r12, s11
 ; CHECK-NEXT:    vmov r3, s9
 ; CHECK-NEXT:    adds r5, r4, r2
-; CHECK-NEXT:    ubfx r4, r12, #12, #1
-; CHECK-NEXT:    ubfx r2, r12, #8, #1
+; CHECK-NEXT:    ubfx r4, lr, #12, #1
+; CHECK-NEXT:    ubfx r2, lr, #8, #1
 ; CHECK-NEXT:    rsb.w r4, r4, #0
 ; CHECK-NEXT:    rsb.w r2, r2, #0
-; CHECK-NEXT:    adc.w r3, r3, lr
+; CHECK-NEXT:    adc.w r3, r3, r12
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r4
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r4
-; CHECK-NEXT:    vmov.u16 r2, q0[3]
-; CHECK-NEXT:    vmov.u16 r4, q0[2]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    vmov.s16 r2, q0[3]
+; CHECK-NEXT:    vmov.s16 r4, q0[2]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r4, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2048,10 +2020,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
 ; CHECK-NEXT:    rsbs r4, r4, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    vmov q1[3], q1[1], r2, r4
-; CHECK-NEXT:    vmov.u16 r2, q0[5]
-; CHECK-NEXT:    vmov.u16 r4, q0[4]
-; CHECK-NEXT:    sxth r2, r2
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    vmov.s16 r2, q0[5]
+; CHECK-NEXT:    vmov.s16 r4, q0[4]
 ; CHECK-NEXT:    vmov q2[2], q2[0], r4, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2071,10 +2041,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
 ; CHECK-NEXT:    adc.w r3, r3, r12
 ; CHECK-NEXT:    vmov q1[2], q1[0], r5, r4
 ; CHECK-NEXT:    vmov q1[3], q1[1], r5, r4
-; CHECK-NEXT:    vmov.u16 r5, q0[7]
-; CHECK-NEXT:    vmov.u16 r4, q0[6]
-; CHECK-NEXT:    sxth r5, r5
-; CHECK-NEXT:    sxth r4, r4
+; CHECK-NEXT:    vmov.s16 r5, q0[7]
+; CHECK-NEXT:    vmov.s16 r4, q0[6]
 ; CHECK-NEXT:    vmov q0[2], q0[0], r4, r5
 ; CHECK-NEXT:    asrs r5, r5, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2611,39 +2579,35 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    vmov.u16 r2, q4[3]
 ; CHECK-NEXT:    vmov.u16 r3, q4[1]
 ; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
+; CHECK-NEXT:    vmov.s8 r2, q0[0]
 ; CHECK-NEXT:    vcmp.i32 ne, q5, zr
-; CHECK-NEXT:    vmrs r12, p0
-; CHECK-NEXT:    and r2, r12, #1
-; CHECK-NEXT:    ubfx r3, r12, #4, #1
-; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    vmrs lr, p0
+; CHECK-NEXT:    ubfx r3, lr, #4, #1
+; CHECK-NEXT:    rsb.w r12, r3, #0
+; CHECK-NEXT:    and r3, lr, #1
 ; CHECK-NEXT:    rsbs r3, r3, #0
-; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
-; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q0[1]
-; CHECK-NEXT:    vmov.u8 r3, q0[0]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
-; CHECK-NEXT:    asrs r2, r2, #31
+; CHECK-NEXT:    vmov q5[2], q5[0], r3, r12
+; CHECK-NEXT:    vmov q5[3], q5[1], r3, r12
+; CHECK-NEXT:    vmov.s8 r3, q0[1]
+; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
 ; CHECK-NEXT:    asrs r3, r3, #31
-; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
+; CHECK-NEXT:    asrs r2, r2, #31
+; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
 ; CHECK-NEXT:    vand q5, q6, q5
 ; CHECK-NEXT:    vmov r2, s22
 ; CHECK-NEXT:    vmov r4, s20
-; CHECK-NEXT:    vmov lr, s23
+; CHECK-NEXT:    vmov r12, s23
 ; CHECK-NEXT:    vmov r3, s21
 ; CHECK-NEXT:    adds r5, r4, r2
-; CHECK-NEXT:    ubfx r4, r12, #12, #1
-; CHECK-NEXT:    ubfx r2, r12, #8, #1
+; CHECK-NEXT:    ubfx r4, lr, #12, #1
+; CHECK-NEXT:    ubfx r2, lr, #8, #1
 ; CHECK-NEXT:    rsb.w r4, r4, #0
 ; CHECK-NEXT:    rsb.w r2, r2, #0
-; CHECK-NEXT:    adc.w r3, r3, lr
+; CHECK-NEXT:    adc.w r3, r3, r12
 ; CHECK-NEXT:    vmov q5[2], q5[0], r2, r4
 ; CHECK-NEXT:    vmov q5[3], q5[1], r2, r4
-; CHECK-NEXT:    vmov.u8 r2, q0[3]
-; CHECK-NEXT:    vmov.u8 r4, q0[2]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r2, q0[3]
+; CHECK-NEXT:    vmov.s8 r4, q0[2]
 ; CHECK-NEXT:    vmov q6[2], q6[0], r4, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2671,10 +2635,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    rsbs r4, r4, #0
 ; CHECK-NEXT:    vmov q4[2], q4[0], r2, r4
 ; CHECK-NEXT:    vmov q4[3], q4[1], r2, r4
-; CHECK-NEXT:    vmov.u8 r2, q0[5]
-; CHECK-NEXT:    vmov.u8 r4, q0[4]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r2, q0[5]
+; CHECK-NEXT:    vmov.s8 r4, q0[4]
 ; CHECK-NEXT:    vmov q5[2], q5[0], r4, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2694,10 +2656,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    adc.w r3, r3, r12
 ; CHECK-NEXT:    vmov q4[2], q4[0], r5, r4
 ; CHECK-NEXT:    vmov q4[3], q4[1], r5, r4
-; CHECK-NEXT:    vmov.u8 r5, q0[7]
-; CHECK-NEXT:    vmov.u8 r4, q0[6]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r5, q0[7]
+; CHECK-NEXT:    vmov.s8 r4, q0[6]
 ; CHECK-NEXT:    vmov q5[2], q5[0], r4, r5
 ; CHECK-NEXT:    asrs r5, r5, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2743,10 +2703,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    rsbs r4, r4, #0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r4
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r4
-; CHECK-NEXT:    vmov.u8 r2, q0[9]
-; CHECK-NEXT:    vmov.u8 r4, q0[8]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r2, q0[9]
+; CHECK-NEXT:    vmov.s8 r4, q0[8]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r4, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2766,10 +2724,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    adc.w r3, r3, r12
 ; CHECK-NEXT:    vmov q2[2], q2[0], r5, r4
 ; CHECK-NEXT:    vmov q2[3], q2[1], r5, r4
-; CHECK-NEXT:    vmov.u8 r5, q0[11]
-; CHECK-NEXT:    vmov.u8 r4, q0[10]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r5, q0[11]
+; CHECK-NEXT:    vmov.s8 r4, q0[10]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r4, r5
 ; CHECK-NEXT:    asrs r5, r5, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2797,10 +2753,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    rsbs r4, r4, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r4
 ; CHECK-NEXT:    vmov q1[3], q1[1], r2, r4
-; CHECK-NEXT:    vmov.u8 r2, q0[13]
-; CHECK-NEXT:    vmov.u8 r4, q0[12]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r2, q0[13]
+; CHECK-NEXT:    vmov.s8 r4, q0[12]
 ; CHECK-NEXT:    vmov q2[2], q2[0], r4, r2
 ; CHECK-NEXT:    asrs r2, r2, #31
 ; CHECK-NEXT:    asrs r4, r4, #31
@@ -2820,10 +2774,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
 ; CHECK-NEXT:    adc.w r3, r3, r12
 ; CHECK-NEXT:    vmov q1[2], q1[0], r5, r4
 ; CHECK-NEXT:    vmov q1[3], q1[1], r5, r4
-; CHECK-NEXT:    vmov.u8 r5, q0[15]
-; CHECK-NEXT:    vmov.u8 r4, q0[14]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r5, q0[15]
+; CHECK-NEXT:    vmov.s8 r4, q0[14]
 ; CHECK-NEXT:    vmov q0[2], q0[0], r4, r5
 ; CHECK-NEXT:    asrs r5, r5, #31
 ; CHECK-NEXT:    asrs r4, r4, #31

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
index 82ef43d96fe3..0b157cf511db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
@@ -737,14 +737,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    vmov.u8 r0, q1[1]
-; CHECK-NEXT:    vmov.u8 r1, q0[1]
-; CHECK-NEXT:    vmov.u8 r2, q1[0]
-; CHECK-NEXT:    vmov.u8 r3, q0[0]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q1[1]
+; CHECK-NEXT:    vmov.s8 r1, q0[1]
+; CHECK-NEXT:    vmov.s8 r2, q1[0]
+; CHECK-NEXT:    vmov.s8 r3, q0[0]
 ; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r0
@@ -753,15 +749,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    vmov r3, s8
 ; CHECK-NEXT:    vmov r0, s9
 ; CHECK-NEXT:    adds.w lr, r3, r2
-; CHECK-NEXT:    vmov.u8 r3, q0[3]
+; CHECK-NEXT:    vmov.s8 r3, q0[3]
 ; CHECK-NEXT:    adc.w r12, r0, r1
-; CHECK-NEXT:    vmov.u8 r1, q1[3]
-; CHECK-NEXT:    vmov.u8 r0, q1[2]
-; CHECK-NEXT:    vmov.u8 r2, q0[2]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r1, q1[3]
+; CHECK-NEXT:    vmov.s8 r0, q1[2]
+; CHECK-NEXT:    vmov.s8 r2, q0[2]
 ; CHECK-NEXT:    smull r1, r3, r3, r1
 ; CHECK-NEXT:    smull r0, r2, r2, r0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r1
@@ -772,15 +764,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    adds.w r1, r1, lr
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds.w lr, r1, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[5]
+; CHECK-NEXT:    vmov.s8 r2, q1[5]
 ; CHECK-NEXT:    adc.w r12, r0, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[5]
-; CHECK-NEXT:    vmov.u8 r0, q1[4]
-; CHECK-NEXT:    vmov.u8 r1, q0[4]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r3, q0[5]
+; CHECK-NEXT:    vmov.s8 r0, q1[4]
+; CHECK-NEXT:    vmov.s8 r1, q0[4]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r2
@@ -791,15 +779,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    adds.w r1, r1, lr
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds.w lr, r1, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[7]
+; CHECK-NEXT:    vmov.s8 r2, q1[7]
 ; CHECK-NEXT:    adc.w r12, r0, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[7]
-; CHECK-NEXT:    vmov.u8 r0, q1[6]
-; CHECK-NEXT:    vmov.u8 r1, q0[6]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r3, q0[7]
+; CHECK-NEXT:    vmov.s8 r0, q1[6]
+; CHECK-NEXT:    vmov.s8 r1, q0[6]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r2
@@ -810,15 +794,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    adds.w r1, r1, lr
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds.w lr, r1, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[9]
+; CHECK-NEXT:    vmov.s8 r2, q1[9]
 ; CHECK-NEXT:    adc.w r12, r0, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[9]
-; CHECK-NEXT:    vmov.u8 r0, q1[8]
-; CHECK-NEXT:    vmov.u8 r1, q0[8]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r3, q0[9]
+; CHECK-NEXT:    vmov.s8 r0, q1[8]
+; CHECK-NEXT:    vmov.s8 r1, q0[8]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r2
@@ -829,15 +809,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    adds.w r1, r1, lr
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds.w lr, r1, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[11]
+; CHECK-NEXT:    vmov.s8 r2, q1[11]
 ; CHECK-NEXT:    adc.w r12, r0, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[11]
-; CHECK-NEXT:    vmov.u8 r0, q1[10]
-; CHECK-NEXT:    vmov.u8 r1, q0[10]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r3, q0[11]
+; CHECK-NEXT:    vmov.s8 r0, q1[10]
+; CHECK-NEXT:    vmov.s8 r1, q0[10]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r2
@@ -848,15 +824,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    adds.w r1, r1, lr
 ; CHECK-NEXT:    adc.w r0, r0, r12
 ; CHECK-NEXT:    adds.w lr, r1, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[13]
+; CHECK-NEXT:    vmov.s8 r2, q1[13]
 ; CHECK-NEXT:    adc.w r12, r0, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[13]
-; CHECK-NEXT:    vmov.u8 r0, q1[12]
-; CHECK-NEXT:    vmov.u8 r1, q0[12]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    vmov.s8 r3, q0[13]
+; CHECK-NEXT:    vmov.s8 r0, q1[12]
+; CHECK-NEXT:    vmov.s8 r1, q0[12]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r0, r2
@@ -868,15 +840,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    vmov r0, s10
 ; CHECK-NEXT:    adds r0, r0, r1
 ; CHECK-NEXT:    adc.w r1, r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q1[14]
-; CHECK-NEXT:    vmov.u8 r3, q0[14]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[14]
+; CHECK-NEXT:    vmov.s8 r3, q0[14]
 ; CHECK-NEXT:    smlal r0, r1, r3, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[15]
-; CHECK-NEXT:    vmov.u8 r3, q0[15]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[15]
+; CHECK-NEXT:    vmov.s8 r3, q0[15]
 ; CHECK-NEXT:    smlal r0, r1, r3, r2
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
@@ -1690,20 +1658,14 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    vmov.u8 r2, q1[1]
-; CHECK-NEXT:    vmov.u8 r3, q0[1]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[1]
+; CHECK-NEXT:    vmov.s8 r3, q0[1]
 ; CHECK-NEXT:    smull r12, r3, r3, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[0]
-; CHECK-NEXT:    sxtb.w lr, r2
-; CHECK-NEXT:    vmov.u8 r2, q0[0]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    vmov.u8 r4, q1[2]
+; CHECK-NEXT:    vmov.s8 lr, q1[0]
+; CHECK-NEXT:    vmov.s8 r2, q0[0]
+; CHECK-NEXT:    vmov.s8 r4, q1[2]
+; CHECK-NEXT:    vmov.s8 r5, q0[2]
 ; CHECK-NEXT:    smull r2, lr, r2, lr
-; CHECK-NEXT:    vmov.u8 r5, q0[2]
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r5, r5
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r12
 ; CHECK-NEXT:    smull r4, r5, r5, r4
 ; CHECK-NEXT:    vmov q2[3], q2[1], lr, r3
@@ -1711,11 +1673,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    vmov r2, s8
 ; CHECK-NEXT:    vmov r12, s9
 ; CHECK-NEXT:    adds.w lr, lr, r2
-; CHECK-NEXT:    vmov.u8 r2, q1[3]
+; CHECK-NEXT:    vmov.s8 r2, q1[3]
 ; CHECK-NEXT:    adc.w r12, r12, r3
-; CHECK-NEXT:    vmov.u8 r3, q0[3]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r3, q0[3]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r4, r2
 ; CHECK-NEXT:    vmov q2[3], q2[1], r5, r3
@@ -1725,15 +1685,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    vmov r4, s10
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    adds.w lr, r5, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[5]
+; CHECK-NEXT:    vmov.s8 r4, q0[5]
 ; CHECK-NEXT:    adc.w r12, r2, r3
-; CHECK-NEXT:    vmov.u8 r3, q1[5]
-; CHECK-NEXT:    vmov.u8 r2, q1[4]
-; CHECK-NEXT:    vmov.u8 r5, q0[4]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r5, r5
+; CHECK-NEXT:    vmov.s8 r3, q1[5]
+; CHECK-NEXT:    vmov.s8 r2, q1[4]
+; CHECK-NEXT:    vmov.s8 r5, q0[4]
 ; CHECK-NEXT:    smull r3, r4, r4, r3
 ; CHECK-NEXT:    smull r2, r5, r5, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
@@ -1744,15 +1700,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    adds.w r3, r3, lr
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    adds.w lr, r3, r5
-; CHECK-NEXT:    vmov.u8 r5, q1[7]
+; CHECK-NEXT:    vmov.s8 r5, q1[7]
 ; CHECK-NEXT:    adc.w r12, r2, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[7]
-; CHECK-NEXT:    vmov.u8 r2, q1[6]
-; CHECK-NEXT:    vmov.u8 r3, q0[6]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r4, q0[7]
+; CHECK-NEXT:    vmov.s8 r2, q1[6]
+; CHECK-NEXT:    vmov.s8 r3, q0[6]
 ; CHECK-NEXT:    smull r5, r4, r4, r5
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r5
@@ -1763,15 +1715,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    adds.w r3, r3, lr
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    adds.w lr, r3, r5
-; CHECK-NEXT:    vmov.u8 r5, q1[9]
+; CHECK-NEXT:    vmov.s8 r5, q1[9]
 ; CHECK-NEXT:    adc.w r12, r2, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[9]
-; CHECK-NEXT:    vmov.u8 r2, q1[8]
-; CHECK-NEXT:    vmov.u8 r3, q0[8]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r4, q0[9]
+; CHECK-NEXT:    vmov.s8 r2, q1[8]
+; CHECK-NEXT:    vmov.s8 r3, q0[8]
 ; CHECK-NEXT:    smull r5, r4, r4, r5
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r5
@@ -1782,15 +1730,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    adds.w r3, r3, lr
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    adds.w lr, r3, r5
-; CHECK-NEXT:    vmov.u8 r5, q1[11]
+; CHECK-NEXT:    vmov.s8 r5, q1[11]
 ; CHECK-NEXT:    adc.w r12, r2, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[11]
-; CHECK-NEXT:    vmov.u8 r2, q1[10]
-; CHECK-NEXT:    vmov.u8 r3, q0[10]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r4, q0[11]
+; CHECK-NEXT:    vmov.s8 r2, q1[10]
+; CHECK-NEXT:    vmov.s8 r3, q0[10]
 ; CHECK-NEXT:    smull r5, r4, r4, r5
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r5
@@ -1801,15 +1745,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    adds.w r3, r3, lr
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    adds.w lr, r3, r5
-; CHECK-NEXT:    vmov.u8 r5, q1[13]
+; CHECK-NEXT:    vmov.s8 r5, q1[13]
 ; CHECK-NEXT:    adc.w r12, r2, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[13]
-; CHECK-NEXT:    vmov.u8 r2, q1[12]
-; CHECK-NEXT:    vmov.u8 r3, q0[12]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r4, q0[13]
+; CHECK-NEXT:    vmov.s8 r2, q1[12]
+; CHECK-NEXT:    vmov.s8 r3, q0[12]
 ; CHECK-NEXT:    smull r5, r4, r4, r5
 ; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r5
@@ -1820,16 +1760,12 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    adds.w r3, r3, lr
 ; CHECK-NEXT:    adc.w r2, r2, r12
 ; CHECK-NEXT:    adds r3, r3, r5
-; CHECK-NEXT:    vmov.u8 r5, q1[14]
+; CHECK-NEXT:    vmov.s8 r5, q1[14]
 ; CHECK-NEXT:    adcs r2, r4
-; CHECK-NEXT:    vmov.u8 r4, q0[14]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r4, q0[14]
 ; CHECK-NEXT:    smlal r3, r2, r4, r5
-; CHECK-NEXT:    vmov.u8 r5, q1[15]
-; CHECK-NEXT:    vmov.u8 r4, q0[15]
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r5, q1[15]
+; CHECK-NEXT:    vmov.s8 r4, q0[15]
 ; CHECK-NEXT:    smlal r3, r2, r4, r5
 ; CHECK-NEXT:    adds r0, r0, r3
 ; CHECK-NEXT:    adcs r1, r2

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
index f20dc9480ce1..69aa577149ca 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
@@ -1127,11 +1127,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    vcmp.i8 eq, q2, zr
 ; CHECK-NEXT:    vmov.i8 q2, #0x0
 ; CHECK-NEXT:    vmov.i8 q3, #0xff
-; CHECK-NEXT:    vmov.u8 r3, q1[0]
+; CHECK-NEXT:    vmov.s8 r3, q1[0]
 ; CHECK-NEXT:    vpsel q4, q3, q2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r4, q0[4]
 ; CHECK-NEXT:    vmov.u8 r0, q4[0]
-; CHECK-NEXT:    vmov.u8 r4, q0[4]
 ; CHECK-NEXT:    vmov.16 q5[0], r0
 ; CHECK-NEXT:    vmov.u8 r0, q4[1]
 ; CHECK-NEXT:    vmov.16 q5[1], r0
@@ -1147,7 +1146,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    vmov.16 q5[6], r0
 ; CHECK-NEXT:    vmov.u8 r0, q4[7]
 ; CHECK-NEXT:    vmov.16 q5[7], r0
-; CHECK-NEXT:    sxtb r4, r4
 ; CHECK-NEXT:    vcmp.i16 ne, q5, zr
 ; CHECK-NEXT:    vpsel q5, q3, q2
 ; CHECK-NEXT:    vmov.u16 r0, q5[2]
@@ -1164,13 +1162,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    rsbs r1, r1, #0
 ; CHECK-NEXT:    vmov q6[2], q6[0], r2, r1
 ; CHECK-NEXT:    vmov q6[3], q6[1], r2, r1
-; CHECK-NEXT:    vmov.u8 r1, q1[1]
-; CHECK-NEXT:    vmov.u8 r2, q0[1]
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r1, q1[1]
+; CHECK-NEXT:    vmov.s8 r2, q0[1]
 ; CHECK-NEXT:    smull r1, r12, r2, r1
-; CHECK-NEXT:    vmov.u8 r2, q0[0]
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q0[0]
 ; CHECK-NEXT:    smull r2, r3, r2, r3
 ; CHECK-NEXT:    vmov q7[2], q7[0], r2, r1
 ; CHECK-NEXT:    vmov q7[3], q7[1], r3, r12
@@ -1184,17 +1179,13 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    ubfx r0, r0, #8, #1
 ; CHECK-NEXT:    rsb.w r3, r3, #0
 ; CHECK-NEXT:    rsb.w r0, r0, #0
-; CHECK-NEXT:    vmov.u8 r1, q1[2]
-; CHECK-NEXT:    vmov q6[2], q6[0], r0, r3
 ; CHECK-NEXT:    adc.w r12, r12, r2
+; CHECK-NEXT:    vmov q6[2], q6[0], r0, r3
+; CHECK-NEXT:    vmov.s8 r1, q1[2]
 ; CHECK-NEXT:    vmov q6[3], q6[1], r0, r3
-; CHECK-NEXT:    vmov.u8 r0, q1[3]
-; CHECK-NEXT:    vmov.u8 r3, q0[3]
-; CHECK-NEXT:    vmov.u8 r2, q0[2]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r1, r1
-; CHECK-NEXT:    sxtb r2, r2
+; CHECK-NEXT:    vmov.s8 r2, q0[2]
+; CHECK-NEXT:    vmov.s8 r0, q1[3]
+; CHECK-NEXT:    vmov.s8 r3, q0[3]
 ; CHECK-NEXT:    smull r0, r3, r3, r0
 ; CHECK-NEXT:    smull r1, r2, r2, r1
 ; CHECK-NEXT:    vmov q7[2], q7[0], r1, r0
@@ -1207,15 +1198,14 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    adc.w r2, r12, r0
 ; CHECK-NEXT:    vmov r0, s26
 ; CHECK-NEXT:    adds.w r12, r1, r0
-; CHECK-NEXT:    vmov.u8 r1, q1[4]
+; CHECK-NEXT:    vmov.s8 r1, q1[4]
 ; CHECK-NEXT:    adc.w lr, r2, r3
 ; CHECK-NEXT:    vmov.u16 r2, q5[6]
 ; CHECK-NEXT:    vmov.u16 r3, q5[4]
-; CHECK-NEXT:    sxtb r1, r1
+; CHECK-NEXT:    smull r1, r4, r4, r1
 ; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
 ; CHECK-NEXT:    vmov.u16 r2, q5[7]
 ; CHECK-NEXT:    vmov.u16 r3, q5[5]
-; CHECK-NEXT:    smull r1, r4, r4, r1
 ; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
 ; CHECK-NEXT:    vcmp.i32 ne, q6, zr
 ; CHECK-NEXT:    vmrs r2, p0
@@ -1225,10 +1215,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q5[2], q5[0], r0, r3
 ; CHECK-NEXT:    vmov q5[3], q5[1], r0, r3
-; CHECK-NEXT:    vmov.u8 r0, q1[5]
-; CHECK-NEXT:    vmov.u8 r3, q0[5]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r0, q1[5]
+; CHECK-NEXT:    vmov.s8 r3, q0[5]
 ; CHECK-NEXT:    smull r0, r3, r3, r0
 ; CHECK-NEXT:    vmov q6[2], q6[0], r1, r0
 ; CHECK-NEXT:    vmov q6[3], q6[1], r4, r3
@@ -1240,23 +1228,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    adds.w r1, r1, r12
 ; CHECK-NEXT:    adc.w r0, r0, lr
 ; CHECK-NEXT:    adds r1, r1, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[6]
+; CHECK-NEXT:    vmov.s8 r4, q1[6]
 ; CHECK-NEXT:    adc.w r12, r0, r3
 ; CHECK-NEXT:    ubfx r3, r2, #12, #1
 ; CHECK-NEXT:    ubfx r2, r2, #8, #1
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    vmov.u8 r0, q0[6]
+; CHECK-NEXT:    vmov.s8 r0, q0[6]
 ; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    smull r0, r4, r0, r4
 ; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q1[7]
-; CHECK-NEXT:    vmov.u8 r3, q0[7]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[7]
+; CHECK-NEXT:    vmov.s8 r3, q0[7]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
-; CHECK-NEXT:    smull r0, r4, r0, r4
 ; CHECK-NEXT:    vmov q6[2], q6[0], r0, r2
 ; CHECK-NEXT:    vmov q6[3], q6[1], r4, r3
 ; CHECK-NEXT:    vand q5, q6, q5
@@ -1267,7 +1251,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    adc.w r2, r12, r0
 ; CHECK-NEXT:    vmov r0, s22
 ; CHECK-NEXT:    adds.w r12, r1, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[8]
+; CHECK-NEXT:    vmov.s8 r0, q1[8]
 ; CHECK-NEXT:    adc.w lr, r2, r3
 ; CHECK-NEXT:    vmov.u8 r2, q4[8]
 ; CHECK-NEXT:    vmov.16 q5[0], r2
@@ -1285,17 +1269,15 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    vmov.16 q5[6], r2
 ; CHECK-NEXT:    vmov.u8 r2, q4[15]
 ; CHECK-NEXT:    vmov.16 q5[7], r2
-; CHECK-NEXT:    vmov.u8 r1, q0[8]
+; CHECK-NEXT:    vmov.s8 r1, q0[8]
 ; CHECK-NEXT:    vcmp.i16 ne, q5, zr
-; CHECK-NEXT:    sxtb r0, r0
+; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vpsel q2, q3, q2
-; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    vmov.u16 r2, q2[2]
 ; CHECK-NEXT:    vmov.u16 r3, q2[0]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
 ; CHECK-NEXT:    vmov.u16 r2, q2[3]
 ; CHECK-NEXT:    vmov.u16 r3, q2[1]
-; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
 ; CHECK-NEXT:    vcmp.i32 ne, q3, zr
 ; CHECK-NEXT:    vmrs r2, p0
@@ -1305,10 +1287,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q3[2], q3[0], r4, r3
 ; CHECK-NEXT:    vmov q3[3], q3[1], r4, r3
-; CHECK-NEXT:    vmov.u8 r3, q1[9]
-; CHECK-NEXT:    vmov.u8 r4, q0[9]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r3, q1[9]
+; CHECK-NEXT:    vmov.s8 r4, q0[9]
 ; CHECK-NEXT:    smull r3, r4, r4, r3
 ; CHECK-NEXT:    vmov q4[2], q4[0], r0, r3
 ; CHECK-NEXT:    vmov q4[3], q4[1], r1, r4
@@ -1320,23 +1300,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    adds.w r1, r1, r12
 ; CHECK-NEXT:    adc.w r0, r0, lr
 ; CHECK-NEXT:    adds r1, r1, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[10]
+; CHECK-NEXT:    vmov.s8 r4, q1[10]
 ; CHECK-NEXT:    adc.w r12, r0, r3
 ; CHECK-NEXT:    ubfx r3, r2, #12, #1
 ; CHECK-NEXT:    ubfx r2, r2, #8, #1
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    vmov.u8 r0, q0[10]
+; CHECK-NEXT:    vmov.s8 r0, q0[10]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    smull r0, r4, r0, r4
 ; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q1[11]
-; CHECK-NEXT:    vmov.u8 r3, q0[11]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[11]
+; CHECK-NEXT:    vmov.s8 r3, q0[11]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
-; CHECK-NEXT:    smull r0, r4, r0, r4
 ; CHECK-NEXT:    vmov q4[2], q4[0], r0, r2
 ; CHECK-NEXT:    vmov q4[3], q4[1], r4, r3
 ; CHECK-NEXT:    vand q3, q4, q3
@@ -1347,19 +1323,17 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    adc.w r2, r12, r0
 ; CHECK-NEXT:    vmov r0, s14
 ; CHECK-NEXT:    adds.w r12, r1, r0
-; CHECK-NEXT:    vmov.u8 r0, q1[12]
+; CHECK-NEXT:    vmov.s8 r0, q1[12]
 ; CHECK-NEXT:    adc.w lr, r2, r3
 ; CHECK-NEXT:    vmov.u16 r2, q2[6]
 ; CHECK-NEXT:    vmov.u16 r3, q2[4]
-; CHECK-NEXT:    vmov.u8 r1, q0[12]
+; CHECK-NEXT:    vmov.s8 r1, q0[12]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
 ; CHECK-NEXT:    vmov.u16 r2, q2[7]
 ; CHECK-NEXT:    vmov.u16 r3, q2[5]
-; CHECK-NEXT:    sxtb r0, r0
+; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
-; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    vcmp.i32 ne, q3, zr
-; CHECK-NEXT:    smull r0, r1, r1, r0
 ; CHECK-NEXT:    vmrs r2, p0
 ; CHECK-NEXT:    and r4, r2, #1
 ; CHECK-NEXT:    ubfx r3, r2, #4, #1
@@ -1367,10 +1341,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q2[2], q2[0], r4, r3
 ; CHECK-NEXT:    vmov q2[3], q2[1], r4, r3
-; CHECK-NEXT:    vmov.u8 r3, q1[13]
-; CHECK-NEXT:    vmov.u8 r4, q0[13]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r3, q1[13]
+; CHECK-NEXT:    vmov.s8 r4, q0[13]
 ; CHECK-NEXT:    smull r3, r4, r4, r3
 ; CHECK-NEXT:    vmov q3[2], q3[0], r0, r3
 ; CHECK-NEXT:    vmov q3[3], q3[1], r1, r4
@@ -1382,23 +1354,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
 ; CHECK-NEXT:    adds.w r1, r1, r12
 ; CHECK-NEXT:    adc.w r0, r0, lr
 ; CHECK-NEXT:    adds r1, r1, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[14]
+; CHECK-NEXT:    vmov.s8 r4, q1[14]
 ; CHECK-NEXT:    adc.w r12, r0, r3
 ; CHECK-NEXT:    ubfx r3, r2, #12, #1
 ; CHECK-NEXT:    ubfx r2, r2, #8, #1
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    vmov.u8 r0, q0[14]
+; CHECK-NEXT:    vmov.s8 r0, q0[14]
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    smull r0, r4, r0, r4
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q1[15]
-; CHECK-NEXT:    vmov.u8 r3, q0[15]
-; CHECK-NEXT:    sxtb r0, r0
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[15]
+; CHECK-NEXT:    vmov.s8 r3, q0[15]
 ; CHECK-NEXT:    smull r2, r3, r3, r2
-; CHECK-NEXT:    smull r0, r4, r0, r4
 ; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
 ; CHECK-NEXT:    vmov q0[3], q0[1], r4, r3
 ; CHECK-NEXT:    vand q0, q0, q2
@@ -2637,11 +2605,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    vcmp.i8 eq, q2, zr
 ; CHECK-NEXT:    vmov.i8 q2, #0x0
 ; CHECK-NEXT:    vmov.i8 q3, #0xff
-; CHECK-NEXT:    vmov.u8 r4, q0[0]
+; CHECK-NEXT:    vmov.s8 r4, q0[0]
 ; CHECK-NEXT:    vpsel q4, q3, q2
-; CHECK-NEXT:    sxtb r4, r4
+; CHECK-NEXT:    vmov.s8 r5, q0[2]
 ; CHECK-NEXT:    vmov.u8 r2, q4[0]
-; CHECK-NEXT:    vmov.u8 r5, q0[2]
 ; CHECK-NEXT:    vmov.16 q5[0], r2
 ; CHECK-NEXT:    vmov.u8 r2, q4[1]
 ; CHECK-NEXT:    vmov.16 q5[1], r2
@@ -2657,7 +2624,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    vmov.16 q5[6], r2
 ; CHECK-NEXT:    vmov.u8 r2, q4[7]
 ; CHECK-NEXT:    vmov.16 q5[7], r2
-; CHECK-NEXT:    sxtb r5, r5
 ; CHECK-NEXT:    vcmp.i16 ne, q5, zr
 ; CHECK-NEXT:    vpsel q5, q3, q2
 ; CHECK-NEXT:    vmov.u16 r2, q5[2]
@@ -2674,13 +2640,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    rsbs r3, r3, #0
 ; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
 ; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
-; CHECK-NEXT:    vmov.u8 r2, q1[1]
-; CHECK-NEXT:    vmov.u8 r3, q0[1]
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r2, q1[1]
+; CHECK-NEXT:    vmov.s8 r3, q0[1]
 ; CHECK-NEXT:    smull r2, lr, r3, r2
-; CHECK-NEXT:    vmov.u8 r3, q1[0]
-; CHECK-NEXT:    sxtb r3, r3
+; CHECK-NEXT:    vmov.s8 r3, q1[0]
 ; CHECK-NEXT:    smull r3, r4, r4, r3
 ; CHECK-NEXT:    vmov q7[2], q7[0], r3, r2
 ; CHECK-NEXT:    vmov q7[3], q7[1], r4, lr
@@ -2696,223 +2659,196 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
 ; CHECK-NEXT:    rsb.w r2, r2, #0
 ; CHECK-NEXT:    adc.w lr, lr, r3
 ; CHECK-NEXT:    vmov q6[2], q6[0], r2, r4
-; CHECK-NEXT:    vmov.u8 r3, q1[2]
+; CHECK-NEXT:    vmov.s8 r3, q1[2]
 ; CHECK-NEXT:    vmov q6[3], q6[1], r2, r4
-; CHECK-NEXT:    vmov.u8 r2, q1[3]
-; CHECK-NEXT:    vmov.u8 r4, q0[3]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    smull r2, r4, r4, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[3]
+; CHECK-NEXT:    vmov.s8 r4, q0[3]
 ; CHECK-NEXT:    smull r3, r5, r5, r3
+; CHECK-NEXT:    smull r2, r4, r4, r2
 ; CHECK-NEXT:    vmov q7[2], q7[0], r3, r2
 ; CHECK-NEXT:    vmov q7[3], q7[1], r5, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[4]
 ; CHECK-NEXT:    vand q6, q7, q6
-; CHECK-NEXT:    sxtb r4, r4
 ; CHECK-NEXT:    vmov r3, s24
 ; CHECK-NEXT:    vmov r2, s25
-; CHECK-NEXT:    vmov r5, s26
+; CHECK-NEXT:    vmov r5, s27
 ; CHECK-NEXT:    adds r3, r3, r6
-; CHECK-NEXT:    vmov r6, s27
-; CHECK-NEXT:    adc.w r2, r2, lr
-; CHECK-NEXT:    adds.w r12, r3, r5
-; CHECK-NEXT:    vmov.u8 r3, q0[4]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    smull r3, r4, r3, r4
-; CHECK-NEXT:    adc.w lr, r2, r6
-; CHECK-NEXT:    vmov.u16 r2, q5[6]
-; CHECK-NEXT:    vmov.u16 r6, q5[4]
-; CHECK-NEXT:    vmov q6[2], q6[0], r6, r2
-; CHECK-NEXT:    vmov.u16 r2, q5[7]
-; CHECK-NEXT:    vmov.u16 r6, q5[5]
-; CHECK-NEXT:    vmov q6[3], q6[1], r6, r2
+; CHECK-NEXT:    adc.w r6, lr, r2
+; CHECK-NEXT:    vmov r2, s26
+; CHECK-NEXT:    adds.w r12, r3, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[4]
+; CHECK-NEXT:    adc.w lr, r6, r5
+; CHECK-NEXT:    vmov.u16 r6, q5[6]
+; CHECK-NEXT:    vmov.u16 r5, q5[4]
+; CHECK-NEXT:    vmov.s8 r3, q0[4]
+; CHECK-NEXT:    vmov q6[2], q6[0], r5, r6
+; CHECK-NEXT:    vmov.u16 r6, q5[7]
+; CHECK-NEXT:    vmov.u16 r5, q5[5]
+; CHECK-NEXT:    smull r2, r3, r3, r2
+; CHECK-NEXT:    vmov q6[3], q6[1], r5, r6
 ; CHECK-NEXT:    vcmp.i32 ne, q6, zr
-; CHECK-NEXT:    vmrs r2, p0
-; CHECK-NEXT:    and r5, r2, #1
-; CHECK-NEXT:    ubfx r6, r2, #4, #1
+; CHECK-NEXT:    vmrs r6, p0
+; CHECK-NEXT:    and r4, r6, #1
+; CHECK-NEXT:    ubfx r5, r6, #4, #1
+; CHECK-NEXT:    rsbs r4, r4, #0
 ; CHECK-NEXT:    rsbs r5, r5, #0
-; CHECK-NEXT:    rsbs r6, r6, #0
-; CHECK-NEXT:    vmov q5[2], q5[0], r5, r6
-; CHECK-NEXT:    vmov q5[3], q5[1], r5, r6
-; CHECK-NEXT:    vmov.u8 r6, q1[5]
-; CHECK-NEXT:    vmov.u8 r5, q0[5]
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    smull r6, r5, r5, r6
-; CHECK-NEXT:    vmov q6[2], q6[0], r3, r6
-; CHECK-NEXT:    vmov q6[3], q6[1], r4, r5
+; CHECK-NEXT:    vmov q5[2], q5[0], r4, r5
+; CHECK-NEXT:    vmov q5[3], q5[1], r4, r5
+; CHECK-NEXT:    vmov.s8 r5, q1[5]
+; CHECK-NEXT:    vmov.s8 r4, q0[5]
+; CHECK-NEXT:    smull r5, r4, r4, r5
+; CHECK-NEXT:    vmov q6[2], q6[0], r2, r5
+; CHECK-NEXT:    vmov q6[3], q6[1], r3, r4
 ; CHECK-NEXT:    vand q5, q6, q5
-; CHECK-NEXT:    vmov r4, s20
-; CHECK-NEXT:    vmov r3, s21
-; CHECK-NEXT:    vmov r5, s23
-; CHECK-NEXT:    adds.w r6, r12, r4
+; CHECK-NEXT:    vmov r3, s20
+; CHECK-NEXT:    vmov r2, s21
 ; CHECK-NEXT:    vmov r4, s22
-; CHECK-NEXT:    adc.w r3, r3, lr
-; CHECK-NEXT:    adds r6, r6, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[6]
-; CHECK-NEXT:    adc.w r12, r3, r5
-; CHECK-NEXT:    ubfx r5, r2, #12, #1
-; CHECK-NEXT:    ubfx r2, r2, #8, #1
+; CHECK-NEXT:    vmov r5, s23
+; CHECK-NEXT:    adds.w r3, r3, r12
+; CHECK-NEXT:    adc.w r2, r2, lr
+; CHECK-NEXT:    adds r3, r3, r4
+; CHECK-NEXT:    vmov.s8 r4, q1[6]
+; CHECK-NEXT:    adc.w r12, r2, r5
+; CHECK-NEXT:    ubfx r5, r6, #12, #1
+; CHECK-NEXT:    ubfx r6, r6, #8, #1
 ; CHECK-NEXT:    rsbs r5, r5, #0
-; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    vmov.u8 r3, q0[6]
-; CHECK-NEXT:    vmov q5[2], q5[0], r2, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov q5[3], q5[1], r2, r5
-; CHECK-NEXT:    vmov.u8 r2, q1[7]
-; CHECK-NEXT:    vmov.u8 r5, q0[7]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    smull r2, r5, r5, r2
-; CHECK-NEXT:    smull r3, r4, r3, r4
-; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
+; CHECK-NEXT:    rsbs r6, r6, #0
+; CHECK-NEXT:    vmov.s8 r2, q0[6]
+; CHECK-NEXT:    vmov q5[2], q5[0], r6, r5
+; CHECK-NEXT:    smull r2, r4, r2, r4
+; CHECK-NEXT:    vmov q5[3], q5[1], r6, r5
+; CHECK-NEXT:    vmov.s8 r6, q1[7]
+; CHECK-NEXT:    vmov.s8 r5, q0[7]
+; CHECK-NEXT:    smull r6, r5, r5, r6
+; CHECK-NEXT:    vmov q6[2], q6[0], r2, r6
 ; CHECK-NEXT:    vmov q6[3], q6[1], r4, r5
-; CHECK-NEXT:    vmov.u8 r4, q1[8]
 ; CHECK-NEXT:    vand q5, q6, q5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov r3, s20
+; CHECK-NEXT:    vmov r6, s20
 ; CHECK-NEXT:    vmov r2, s21
-; CHECK-NEXT:    vmov r5, s22
+; CHECK-NEXT:    vmov r5, s23
 ; CHECK-NEXT:    adds r3, r3, r6
-; CHECK-NEXT:    vmov r6, s23
-; CHECK-NEXT:    adc.w r2, r2, r12
-; CHECK-NEXT:    adds.w r12, r3, r5
-; CHECK-NEXT:    vmov.u8 r3, q0[8]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    smull r3, r4, r3, r4
-; CHECK-NEXT:    adc.w lr, r2, r6
-; CHECK-NEXT:    vmov.u8 r2, q4[8]
-; CHECK-NEXT:    vmov.16 q5[0], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[9]
-; CHECK-NEXT:    vmov.16 q5[1], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[10]
-; CHECK-NEXT:    vmov.16 q5[2], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[11]
-; CHECK-NEXT:    vmov.16 q5[3], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[12]
-; CHECK-NEXT:    vmov.16 q5[4], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[13]
-; CHECK-NEXT:    vmov.16 q5[5], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[14]
-; CHECK-NEXT:    vmov.16 q5[6], r2
-; CHECK-NEXT:    vmov.u8 r2, q4[15]
-; CHECK-NEXT:    vmov.16 q5[7], r2
+; CHECK-NEXT:    adc.w r6, r12, r2
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    adds.w r12, r3, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[8]
+; CHECK-NEXT:    adc.w lr, r6, r5
+; CHECK-NEXT:    vmov.u8 r6, q4[8]
+; CHECK-NEXT:    vmov.16 q5[0], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[9]
+; CHECK-NEXT:    vmov.16 q5[1], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[10]
+; CHECK-NEXT:    vmov.16 q5[2], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[11]
+; CHECK-NEXT:    vmov.16 q5[3], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[12]
+; CHECK-NEXT:    vmov.16 q5[4], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[13]
+; CHECK-NEXT:    vmov.16 q5[5], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[14]
+; CHECK-NEXT:    vmov.16 q5[6], r6
+; CHECK-NEXT:    vmov.u8 r6, q4[15]
+; CHECK-NEXT:    vmov.16 q5[7], r6
+; CHECK-NEXT:    vmov.s8 r3, q0[8]
 ; CHECK-NEXT:    vcmp.i16 ne, q5, zr
+; CHECK-NEXT:    smull r2, r3, r3, r2
 ; CHECK-NEXT:    vpsel q2, q3, q2
-; CHECK-NEXT:    vmov.u16 r2, q2[2]
-; CHECK-NEXT:    vmov.u16 r6, q2[0]
-; CHECK-NEXT:    vmov q3[2], q3[0], r6, r2
-; CHECK-NEXT:    vmov.u16 r2, q2[3]
-; CHECK-NEXT:    vmov.u16 r6, q2[1]
-; CHECK-NEXT:    vmov q3[3], q3[1], r6, r2
-; CHECK-NEXT:    vcmp.i32 ne, q3, zr
-; CHECK-NEXT:    vmrs r2, p0
-; CHECK-NEXT:    and r5, r2, #1
-; CHECK-NEXT:    ubfx r6, r2, #4, #1
-; CHECK-NEXT:    rsbs r5, r5, #0
-; CHECK-NEXT:    rsbs r6, r6, #0
+; CHECK-NEXT:    vmov.u16 r6, q2[2]
+; CHECK-NEXT:    vmov.u16 r5, q2[0]
 ; CHECK-NEXT:    vmov q3[2], q3[0], r5, r6
+; CHECK-NEXT:    vmov.u16 r6, q2[3]
+; CHECK-NEXT:    vmov.u16 r5, q2[1]
 ; CHECK-NEXT:    vmov q3[3], q3[1], r5, r6
-; CHECK-NEXT:    vmov.u8 r6, q1[9]
-; CHECK-NEXT:    vmov.u8 r5, q0[9]
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    smull r6, r5, r5, r6
-; CHECK-NEXT:    vmov q4[2], q4[0], r3, r6
-; CHECK-NEXT:    vmov q4[3], q4[1], r4, r5
+; CHECK-NEXT:    vcmp.i32 ne, q3, zr
+; CHECK-NEXT:    vmrs r6, p0
+; CHECK-NEXT:    and r4, r6, #1
+; CHECK-NEXT:    ubfx r5, r6, #4, #1
+; CHECK-NEXT:    rsbs r4, r4, #0
+; CHECK-NEXT:    rsbs r5, r5, #0
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r5
+; CHECK-NEXT:    vmov q3[3], q3[1], r4, r5
+; CHECK-NEXT:    vmov.s8 r5, q1[9]
+; CHECK-NEXT:    vmov.s8 r4, q0[9]
+; CHECK-NEXT:    smull r5, r4, r4, r5
+; CHECK-NEXT:    vmov q4[2], q4[0], r2, r5
+; CHECK-NEXT:    vmov q4[3], q4[1], r3, r4
 ; CHECK-NEXT:    vand q3, q4, q3
-; CHECK-NEXT:    vmov r4, s12
-; CHECK-NEXT:    vmov r3, s13
-; CHECK-NEXT:    vmov r5, s15
-; CHECK-NEXT:    adds.w r6, r12, r4
+; CHECK-NEXT:    vmov r3, s12
+; CHECK-NEXT:    vmov r2, s13
 ; CHECK-NEXT:    vmov r4, s14
-; CHECK-NEXT:    adc.w r3, r3, lr
-; CHECK-NEXT:    adds r6, r6, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[10]
-; CHECK-NEXT:    adc.w r12, r3, r5
-; CHECK-NEXT:    ubfx r5, r2, #12, #1
-; CHECK-NEXT:    ubfx r2, r2, #8, #1
+; CHECK-NEXT:    vmov r5, s15
+; CHECK-NEXT:    adds.w r3, r3, r12
+; CHECK-NEXT:    adc.w r2, r2, lr
+; CHECK-NEXT:    adds r3, r3, r4
+; CHECK-NEXT:    vmov.s8 r4, q1[10]
+; CHECK-NEXT:    adc.w r12, r2, r5
+; CHECK-NEXT:    ubfx r5, r6, #12, #1
+; CHECK-NEXT:    ubfx r6, r6, #8, #1
 ; CHECK-NEXT:    rsbs r5, r5, #0
-; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    vmov.u8 r3, q0[10]
-; CHECK-NEXT:    vmov q3[2], q3[0], r2, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov q3[3], q3[1], r2, r5
-; CHECK-NEXT:    vmov.u8 r2, q1[11]
-; CHECK-NEXT:    vmov.u8 r5, q0[11]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    smull r2, r5, r5, r2
-; CHECK-NEXT:    smull r3, r4, r3, r4
-; CHECK-NEXT:    vmov q4[2], q4[0], r3, r2
+; CHECK-NEXT:    rsbs r6, r6, #0
+; CHECK-NEXT:    vmov.s8 r2, q0[10]
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r5
+; CHECK-NEXT:    smull r2, r4, r2, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r6, r5
+; CHECK-NEXT:    vmov.s8 r6, q1[11]
+; CHECK-NEXT:    vmov.s8 r5, q0[11]
+; CHECK-NEXT:    smull r6, r5, r5, r6
+; CHECK-NEXT:    vmov q4[2], q4[0], r2, r6
 ; CHECK-NEXT:    vmov q4[3], q4[1], r4, r5
-; CHECK-NEXT:    vmov.u8 r4, q1[12]
 ; CHECK-NEXT:    vand q3, q4, q3
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov r3, s12
+; CHECK-NEXT:    vmov r6, s12
 ; CHECK-NEXT:    vmov r2, s13
-; CHECK-NEXT:    vmov r5, s14
+; CHECK-NEXT:    vmov r5, s15
 ; CHECK-NEXT:    adds r3, r3, r6
-; CHECK-NEXT:    vmov r6, s15
-; CHECK-NEXT:    adc.w r2, r2, r12
-; CHECK-NEXT:    adds.w r12, r3, r5
-; CHECK-NEXT:    vmov.u8 r3, q0[12]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    smull r3, r4, r3, r4
-; CHECK-NEXT:    adc.w lr, r2, r6
-; CHECK-NEXT:    vmov.u16 r2, q2[6]
-; CHECK-NEXT:    vmov.u16 r6, q2[4]
-; CHECK-NEXT:    vmov q3[2], q3[0], r6, r2
-; CHECK-NEXT:    vmov.u16 r2, q2[7]
-; CHECK-NEXT:    vmov.u16 r6, q2[5]
-; CHECK-NEXT:    vmov q3[3], q3[1], r6, r2
+; CHECK-NEXT:    adc.w r6, r12, r2
+; CHECK-NEXT:    vmov r2, s14
+; CHECK-NEXT:    adds.w r12, r3, r2
+; CHECK-NEXT:    vmov.s8 r2, q1[12]
+; CHECK-NEXT:    adc.w lr, r6, r5
+; CHECK-NEXT:    vmov.u16 r6, q2[6]
+; CHECK-NEXT:    vmov.u16 r5, q2[4]
+; CHECK-NEXT:    vmov.s8 r3, q0[12]
+; CHECK-NEXT:    vmov q3[2], q3[0], r5, r6
+; CHECK-NEXT:    vmov.u16 r6, q2[7]
+; CHECK-NEXT:    vmov.u16 r5, q2[5]
+; CHECK-NEXT:    smull r2, r3, r3, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r6
 ; CHECK-NEXT:    vcmp.i32 ne, q3, zr
-; CHECK-NEXT:    vmrs r2, p0
-; CHECK-NEXT:    and r5, r2, #1
-; CHECK-NEXT:    ubfx r6, r2, #4, #1
+; CHECK-NEXT:    vmrs r6, p0
+; CHECK-NEXT:    and r4, r6, #1
+; CHECK-NEXT:    ubfx r5, r6, #4, #1
+; CHECK-NEXT:    rsbs r4, r4, #0
 ; CHECK-NEXT:    rsbs r5, r5, #0
-; CHECK-NEXT:    rsbs r6, r6, #0
-; CHECK-NEXT:    vmov q2[2], q2[0], r5, r6
-; CHECK-NEXT:    vmov q2[3], q2[1], r5, r6
-; CHECK-NEXT:    vmov.u8 r6, q1[13]
-; CHECK-NEXT:    vmov.u8 r5, q0[13]
-; CHECK-NEXT:    sxtb r6, r6
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    smull r6, r5, r5, r6
-; CHECK-NEXT:    vmov q3[2], q3[0], r3, r6
-; CHECK-NEXT:    vmov q3[3], q3[1], r4, r5
+; CHECK-NEXT:    vmov q2[2], q2[0], r4, r5
+; CHECK-NEXT:    vmov q2[3], q2[1], r4, r5
+; CHECK-NEXT:    vmov.s8 r5, q1[13]
+; CHECK-NEXT:    vmov.s8 r4, q0[13]
+; CHECK-NEXT:    smull r5, r4, r4, r5
+; CHECK-NEXT:    vmov q3[2], q3[0], r2, r5
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r4
 ; CHECK-NEXT:    vand q2, q3, q2
-; CHECK-NEXT:    vmov r4, s8
-; CHECK-NEXT:    vmov r3, s9
-; CHECK-NEXT:    vmov r5, s11
-; CHECK-NEXT:    adds.w r6, r12, r4
+; CHECK-NEXT:    vmov r3, s8
+; CHECK-NEXT:    vmov r2, s9
 ; CHECK-NEXT:    vmov r4, s10
-; CHECK-NEXT:    adc.w r3, r3, lr
-; CHECK-NEXT:    adds r6, r6, r4
-; CHECK-NEXT:    vmov.u8 r4, q1[14]
-; CHECK-NEXT:    adc.w r12, r3, r5
-; CHECK-NEXT:    ubfx r5, r2, #12, #1
-; CHECK-NEXT:    ubfx r2, r2, #8, #1
+; CHECK-NEXT:    vmov r5, s11
+; CHECK-NEXT:    adds.w r3, r3, r12
+; CHECK-NEXT:    adc.w r2, r2, lr
+; CHECK-NEXT:    adds r3, r3, r4
+; CHECK-NEXT:    vmov.s8 r4, q1[14]
+; CHECK-NEXT:    adc.w r12, r2, r5
+; CHECK-NEXT:    ubfx r5, r6, #12, #1
+; CHECK-NEXT:    ubfx r6, r6, #8, #1
 ; CHECK-NEXT:    rsbs r5, r5, #0
-; CHECK-NEXT:    rsbs r2, r2, #0
-; CHECK-NEXT:    vmov.u8 r3, q0[14]
-; CHECK-NEXT:    vmov q2[2], q2[0], r2, r5
-; CHECK-NEXT:    sxtb r4, r4
-; CHECK-NEXT:    vmov q2[3], q2[1], r2, r5
-; CHECK-NEXT:    vmov.u8 r2, q1[15]
-; CHECK-NEXT:    vmov.u8 r5, q0[15]
-; CHECK-NEXT:    sxtb r3, r3
-; CHECK-NEXT:    sxtb r2, r2
-; CHECK-NEXT:    sxtb r5, r5
-; CHECK-NEXT:    smull r2, r5, r5, r2
-; CHECK-NEXT:    smull r3, r4, r3, r4
-; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
+; CHECK-NEXT:    rsbs r6, r6, #0
+; CHECK-NEXT:    vmov.s8 r2, q0[14]
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r5
+; CHECK-NEXT:    smull r2, r4, r2, r4
+; CHECK-NEXT:    vmov q2[3], q2[1], r6, r5
+; CHECK-NEXT:    vmov.s8 r6, q1[15]
+; CHECK-NEXT:    vmov.s8 r5, q0[15]
+; CHECK-NEXT:    smull r6, r5, r5, r6
+; CHECK-NEXT:    vmov q0[2], q0[0], r2, r6
 ; CHECK-NEXT:    vmov q0[3], q0[1], r4, r5
 ; CHECK-NEXT:    vand q0, q0, q2
-; CHECK-NEXT:    vmov r3, s0
+; CHECK-NEXT:    vmov r6, s0
 ; CHECK-NEXT:    vmov r2, s1
 ; CHECK-NEXT:    vmov r5, s2
 ; CHECK-NEXT:    adds r3, r3, r6


        


More information about the llvm-commits mailing list