[llvm] ad12e6e - [ARM] Turn sext_inreg(VGetLaneu) into VGetLaneu
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 1 03:10:56 PST 2021
Author: David Green
Date: 2021-02-01T11:10:35Z
New Revision: ad12e6ee9579149c0efb594211fa3fb8aed2d84f
URL: https://github.com/llvm/llvm-project/commit/ad12e6ee9579149c0efb594211fa3fb8aed2d84f
DIFF: https://github.com/llvm/llvm-project/commit/ad12e6ee9579149c0efb594211fa3fb8aed2d84f.diff
LOG: [ARM] Turn sext_inreg(VGetLaneu) into VGetLaneu
This adds a DAG combine for converting sext_inreg of VGetLaneu into
VGetLanes, providing the types match correctly.
Differential Revision: https://reviews.llvm.org/D95073
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
llvm/test/CodeGen/Thumb2/mve-div-expand.ll
llvm/test/CodeGen/Thumb2/mve-vcvt.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8f2f07d71994..6ae12cc4c6ca 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -977,6 +977,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -13985,6 +13986,20 @@ static SDValue PerformExtractEltCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // sext_inreg(VGETLANEu) -> VGETLANEs
+ if (Op.getOpcode() == ARMISD::VGETLANEu &&
+ cast<VTSDNode>(N->getOperand(1))->getVT() ==
+ Op.getOperand(0).getValueType().getScalarType())
+ return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
+ Op.getOperand(1));
+
+ return SDValue();
+}
+
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
/// ISD::VECTOR_SHUFFLE.
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
@@ -16356,6 +16371,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
return PerformExtractEltCombine(N, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
diff --git a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 643468c6dc8b..cf884f340ebf 100644
--- a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
define float @f(<4 x i16>* nocapture %in) {
@@ -64,12 +65,10 @@ define <4 x i32> @h(<4 x i8> *%in) {
}
define float @i(<4 x i16>* nocapture %in) {
- ; FIXME: The vmov.u + sxt can convert to a vmov.s
; CHECK-LABEL: i:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vmov.u16 r0, d16[0]
-; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: vmov.s16 r0, d16[0]
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vmov r0, s0
@@ -96,12 +95,10 @@ define float @j(<8 x i8>* nocapture %in) {
}
define float @k(<8 x i8>* nocapture %in) {
-; FIXME: The vmov.u + sxt can convert to a vmov.s
; CHECK-LABEL: k:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vmov.u8 r0, d16[7]
-; CHECK-NEXT: sxtb r0, r0
+; CHECK-NEXT: vmov.s8 r0, d16[7]
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vmov r0, s0
diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
index 5704ca95e2b6..2c16b818d557 100644
--- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
@@ -154,58 +154,40 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @sdiv_i16(<8 x i16> %in1, <8 x i16> %in2) {
; CHECK-LABEL: sdiv_i16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: vmov.u16 r0, q1[3]
-; CHECK-NEXT: vmov.u16 r1, q0[3]
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: vmov.u16 r2, q0[2]
-; CHECK-NEXT: sdiv r12, r1, r0
-; CHECK-NEXT: vmov.u16 r1, q1[2]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: vmov.u16 r4, q1[6]
-; CHECK-NEXT: sdiv r3, r2, r1
-; CHECK-NEXT: vmov.u16 r1, q1[1]
-; CHECK-NEXT: vmov.u16 r2, q0[1]
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: vmov.u16 r5, q0[6]
-; CHECK-NEXT: sdiv r0, r2, r1
-; CHECK-NEXT: vmov.u16 r1, q1[0]
-; CHECK-NEXT: vmov.u16 r2, q0[0]
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: vmov.s16 r0, q1[0]
+; CHECK-NEXT: vmov.s16 r1, q0[0]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s16 r1, q1[1]
+; CHECK-NEXT: vmov.s16 r2, q0[1]
+; CHECK-NEXT: vmov.16 q2[0], r0
; CHECK-NEXT: sdiv r1, r2, r1
-; CHECK-NEXT: vmov.u16 r2, q1[7]
-; CHECK-NEXT: vmov.16 q2[0], r1
-; CHECK-NEXT: sxth.w lr, r2
-; CHECK-NEXT: vmov.16 q2[1], r0
-; CHECK-NEXT: vmov.u16 r2, q0[7]
-; CHECK-NEXT: vmov.16 q2[2], r3
-; CHECK-NEXT: vmov.u16 r3, q1[4]
-; CHECK-NEXT: sxth r6, r2
-; CHECK-NEXT: vmov.u16 r2, q0[4]
-; CHECK-NEXT: vmov.u16 r1, q1[5]
-; CHECK-NEXT: vmov.u16 r0, q0[5]
-; CHECK-NEXT: sxth r3, r3
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: vmov.16 q2[3], r12
-; CHECK-NEXT: sdiv r2, r2, r3
-; CHECK-NEXT: sxth r5, r5
-; CHECK-NEXT: vmov.16 q2[4], r2
-; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: vmov.s16 r0, q1[2]
+; CHECK-NEXT: vmov.16 q2[1], r1
+; CHECK-NEXT: vmov.s16 r1, q0[2]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s16 r1, q0[3]
+; CHECK-NEXT: vmov.16 q2[2], r0
+; CHECK-NEXT: vmov.s16 r0, q1[3]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s16 r1, q0[4]
+; CHECK-NEXT: vmov.16 q2[3], r0
+; CHECK-NEXT: vmov.s16 r0, q1[4]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s16 r1, q0[5]
+; CHECK-NEXT: vmov.16 q2[4], r0
+; CHECK-NEXT: vmov.s16 r0, q1[5]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s16 r1, q0[6]
; CHECK-NEXT: vmov.16 q2[5], r0
-; CHECK-NEXT: sdiv r0, r5, r4
+; CHECK-NEXT: vmov.s16 r0, q1[6]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s16 r1, q0[7]
; CHECK-NEXT: vmov.16 q2[6], r0
-; CHECK-NEXT: sdiv r0, r6, lr
+; CHECK-NEXT: vmov.s16 r0, q1[7]
+; CHECK-NEXT: sdiv r0, r1, r0
; CHECK-NEXT: vmov.16 q2[7], r0
; CHECK-NEXT: vmov q0, q2
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: bx lr
entry:
%out = sdiv <8 x i16> %in1, %in2
ret <8 x i16> %out
@@ -265,65 +247,49 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @srem_i16(<8 x i16> %in1, <8 x i16> %in2) {
; CHECK-LABEL: srem_i16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: vmov.u16 r5, q1[6]
-; CHECK-NEXT: vmov.u16 r6, q0[6]
-; CHECK-NEXT: sxth r5, r5
-; CHECK-NEXT: sxth r6, r6
-; CHECK-NEXT: vmov.u16 r0, q1[0]
-; CHECK-NEXT: sdiv r7, r6, r5
-; CHECK-NEXT: vmov.u16 r2, q1[7]
-; CHECK-NEXT: sxth.w r8, r0
-; CHECK-NEXT: vmov.u16 r0, q1[3]
-; CHECK-NEXT: mls r12, r7, r5, r6
-; CHECK-NEXT: vmov.u16 r7, q0[7]
-; CHECK-NEXT: sxth r3, r0
-; CHECK-NEXT: vmov.u16 r0, q1[2]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r7, r7
-; CHECK-NEXT: sxth r4, r0
-; CHECK-NEXT: vmov.u16 r0, q1[5]
-; CHECK-NEXT: sdiv r6, r7, r2
-; CHECK-NEXT: mls lr, r6, r2, r7
-; CHECK-NEXT: vmov.u16 r2, q0[4]
-; CHECK-NEXT: sxth r1, r0
-; CHECK-NEXT: vmov.u16 r0, q1[4]
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sdiv r5, r2, r0
-; CHECK-NEXT: vmov.u16 r6, q0[1]
-; CHECK-NEXT: mls r0, r5, r0, r2
-; CHECK-NEXT: vmov.u16 r2, q0[5]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sdiv r5, r2, r1
-; CHECK-NEXT: sxth r6, r6
-; CHECK-NEXT: mls r1, r5, r1, r2
-; CHECK-NEXT: vmov.u16 r2, q0[2]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sdiv r5, r2, r4
-; CHECK-NEXT: mls r2, r5, r4, r2
-; CHECK-NEXT: vmov.u16 r4, q0[3]
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: vmov.s16 r0, q1[6]
+; CHECK-NEXT: vmov.s16 r1, q0[6]
+; CHECK-NEXT: sdiv r2, r1, r0
+; CHECK-NEXT: mls r12, r2, r0, r1
+; CHECK-NEXT: vmov.s16 r1, q1[7]
+; CHECK-NEXT: vmov.s16 r2, q0[7]
+; CHECK-NEXT: sdiv r3, r2, r1
+; CHECK-NEXT: mls lr, r3, r1, r2
+; CHECK-NEXT: vmov.s16 r2, q1[4]
+; CHECK-NEXT: vmov.s16 r3, q0[4]
+; CHECK-NEXT: sdiv r0, r3, r2
+; CHECK-NEXT: mls r2, r0, r2, r3
+; CHECK-NEXT: vmov.s16 r0, q1[5]
+; CHECK-NEXT: vmov.s16 r3, q0[5]
+; CHECK-NEXT: sdiv r1, r3, r0
+; CHECK-NEXT: mls r0, r1, r0, r3
+; CHECK-NEXT: vmov.s16 r1, q1[2]
+; CHECK-NEXT: vmov.s16 r3, q0[2]
+; CHECK-NEXT: sdiv r4, r3, r1
+; CHECK-NEXT: mls r1, r4, r1, r3
+; CHECK-NEXT: vmov.s16 r3, q1[3]
+; CHECK-NEXT: vmov.s16 r4, q0[3]
; CHECK-NEXT: sdiv r5, r4, r3
; CHECK-NEXT: mls r3, r5, r3, r4
-; CHECK-NEXT: vmov.u16 r4, q0[0]
-; CHECK-NEXT: sxth r4, r4
-; CHECK-NEXT: sdiv r5, r4, r8
-; CHECK-NEXT: mls r4, r5, r8, r4
-; CHECK-NEXT: vmov.u16 r5, q1[1]
-; CHECK-NEXT: sxth r5, r5
+; CHECK-NEXT: vmov.s16 r4, q1[0]
+; CHECK-NEXT: vmov.s16 r5, q0[0]
+; CHECK-NEXT: sdiv r6, r5, r4
+; CHECK-NEXT: mls r4, r6, r4, r5
+; CHECK-NEXT: vmov.s16 r6, q0[1]
+; CHECK-NEXT: vmov.s16 r5, q1[1]
; CHECK-NEXT: sdiv r7, r6, r5
; CHECK-NEXT: vmov.16 q0[0], r4
; CHECK-NEXT: mls r5, r7, r5, r6
; CHECK-NEXT: vmov.16 q0[1], r5
-; CHECK-NEXT: vmov.16 q0[2], r2
+; CHECK-NEXT: vmov.16 q0[2], r1
; CHECK-NEXT: vmov.16 q0[3], r3
-; CHECK-NEXT: vmov.16 q0[4], r0
-; CHECK-NEXT: vmov.16 q0[5], r1
+; CHECK-NEXT: vmov.16 q0[4], r2
+; CHECK-NEXT: vmov.16 q0[5], r0
; CHECK-NEXT: vmov.16 q0[6], r12
; CHECK-NEXT: vmov.16 q0[7], lr
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%out = srem <8 x i16> %in1, %in2
ret <8 x i16> %out
@@ -407,106 +373,72 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @sdiv_i8(<16 x i8> %in1, <16 x i8> %in2) {
; CHECK-LABEL: sdiv_i8:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: vmov.u8 r0, q1[1]
-; CHECK-NEXT: vmov.u8 r1, q0[1]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: vmov.u8 r2, q0[0]
+; CHECK-NEXT: vmov.s8 r0, q1[0]
+; CHECK-NEXT: vmov.s8 r1, q0[0]
; CHECK-NEXT: sdiv r0, r1, r0
-; CHECK-NEXT: vmov.u8 r1, q1[0]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: vmov.u8 r4, q1[3]
+; CHECK-NEXT: vmov.s8 r1, q1[1]
+; CHECK-NEXT: vmov.s8 r2, q0[1]
+; CHECK-NEXT: vmov.8 q2[0], r0
; CHECK-NEXT: sdiv r1, r2, r1
-; CHECK-NEXT: vmov.u8 r5, q0[3]
-; CHECK-NEXT: vmov.8 q2[0], r1
-; CHECK-NEXT: vmov.u8 r1, q1[2]
-; CHECK-NEXT: vmov.8 q2[1], r0
-; CHECK-NEXT: vmov.u8 r0, q0[2]
-; CHECK-NEXT: vmov.u8 r2, q1[11]
-; CHECK-NEXT: vmov.u8 r3, q0[11]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sdiv r0, r0, r1
-; CHECK-NEXT: sxtb.w r12, r2
-; CHECK-NEXT: sxtb.w lr, r3
-; CHECK-NEXT: vmov.u8 r2, q1[4]
-; CHECK-NEXT: vmov.u8 r3, q0[4]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r0, q1[2]
+; CHECK-NEXT: vmov.8 q2[1], r1
+; CHECK-NEXT: vmov.s8 r1, q0[2]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[3]
; CHECK-NEXT: vmov.8 q2[2], r0
-; CHECK-NEXT: sdiv r0, r5, r4
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q1[3]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[4]
; CHECK-NEXT: vmov.8 q2[3], r0
-; CHECK-NEXT: sdiv r0, r3, r2
-; CHECK-NEXT: vmov.u8 r1, q0[10]
+; CHECK-NEXT: vmov.s8 r0, q1[4]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[5]
; CHECK-NEXT: vmov.8 q2[4], r0
-; CHECK-NEXT: vmov.u8 r0, q1[10]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sdiv r12, lr, r12
-; CHECK-NEXT: sdiv lr, r1, r0
-; CHECK-NEXT: vmov.u8 r0, q1[9]
-; CHECK-NEXT: vmov.u8 r1, q0[9]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sdiv r2, r1, r0
-; CHECK-NEXT: vmov.u8 r0, q1[8]
-; CHECK-NEXT: vmov.u8 r1, q0[8]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: vmov.u8 r3, q0[7]
-; CHECK-NEXT: sdiv r1, r1, r0
-; CHECK-NEXT: vmov.u8 r0, q1[7]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sdiv r4, r3, r0
-; CHECK-NEXT: vmov.u8 r0, q1[6]
-; CHECK-NEXT: vmov.u8 r3, q0[6]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: vmov.u8 r6, q0[12]
-; CHECK-NEXT: sdiv r5, r3, r0
-; CHECK-NEXT: vmov.u8 r0, q1[5]
-; CHECK-NEXT: vmov.u8 r3, q0[5]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sdiv r0, r3, r0
-; CHECK-NEXT: vmov.u8 r3, q1[15]
+; CHECK-NEXT: vmov.s8 r0, q1[5]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[6]
; CHECK-NEXT: vmov.8 q2[5], r0
-; CHECK-NEXT: sxtb r7, r3
-; CHECK-NEXT: vmov.8 q2[6], r5
-; CHECK-NEXT: vmov.u8 r3, q1[12]
-; CHECK-NEXT: vmov.8 q2[7], r4
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: vmov.8 q2[8], r1
-; CHECK-NEXT: vmov.u8 r1, q1[13]
-; CHECK-NEXT: vmov.8 q2[9], r2
-; CHECK-NEXT: vmov.u8 r2, q0[13]
-; CHECK-NEXT: vmov.8 q2[10], lr
-; CHECK-NEXT: vmov.u8 r5, q1[14]
-; CHECK-NEXT: vmov.u8 r4, q0[14]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: vmov.8 q2[11], r12
-; CHECK-NEXT: sdiv r3, r6, r3
-; CHECK-NEXT: vmov.u8 r0, q0[15]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov.8 q2[12], r3
-; CHECK-NEXT: sdiv r1, r2, r1
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: vmov.8 q2[13], r1
-; CHECK-NEXT: sdiv r1, r4, r5
-; CHECK-NEXT: sdiv r0, r0, r7
-; CHECK-NEXT: vmov.8 q2[14], r1
+; CHECK-NEXT: vmov.s8 r0, q1[6]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[7]
+; CHECK-NEXT: vmov.8 q2[6], r0
+; CHECK-NEXT: vmov.s8 r0, q1[7]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[8]
+; CHECK-NEXT: vmov.8 q2[7], r0
+; CHECK-NEXT: vmov.s8 r0, q1[8]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[9]
+; CHECK-NEXT: vmov.8 q2[8], r0
+; CHECK-NEXT: vmov.s8 r0, q1[9]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[10]
+; CHECK-NEXT: vmov.8 q2[9], r0
+; CHECK-NEXT: vmov.s8 r0, q1[10]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[11]
+; CHECK-NEXT: vmov.8 q2[10], r0
+; CHECK-NEXT: vmov.s8 r0, q1[11]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[12]
+; CHECK-NEXT: vmov.8 q2[11], r0
+; CHECK-NEXT: vmov.s8 r0, q1[12]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[13]
+; CHECK-NEXT: vmov.8 q2[12], r0
+; CHECK-NEXT: vmov.s8 r0, q1[13]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[14]
+; CHECK-NEXT: vmov.8 q2[13], r0
+; CHECK-NEXT: vmov.s8 r0, q1[14]
+; CHECK-NEXT: sdiv r0, r1, r0
+; CHECK-NEXT: vmov.s8 r1, q0[15]
+; CHECK-NEXT: vmov.8 q2[14], r0
+; CHECK-NEXT: vmov.s8 r0, q1[15]
+; CHECK-NEXT: sdiv r0, r1, r0
; CHECK-NEXT: vmov.8 q2[15], r0
; CHECK-NEXT: vmov q0, q2
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: bx lr
entry:
%out = sdiv <16 x i8> %in1, %in2
ret <16 x i8> %out
@@ -607,122 +539,90 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @srem_i8(<16 x i8> %in1, <16 x i8> %in2) {
; CHECK-LABEL: srem_i8:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT: vmov.u8 r5, q1[14]
-; CHECK-NEXT: vmov.u8 r6, q0[14]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sdiv r7, r6, r5
-; CHECK-NEXT: vmov.u8 r4, q1[15]
-; CHECK-NEXT: mls r12, r7, r5, r6
-; CHECK-NEXT: vmov.u8 r7, q0[15]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov.u8 r2, q1[13]
-; CHECK-NEXT: sxtb r7, r7
-; CHECK-NEXT: sxtb r3, r2
-; CHECK-NEXT: sdiv r6, r7, r4
-; CHECK-NEXT: vmov.u8 r2, q1[12]
-; CHECK-NEXT: mls lr, r6, r4, r7
-; CHECK-NEXT: vmov.u8 r4, q0[12]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: vmov.u8 r0, q1[8]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb.w r8, r0
-; CHECK-NEXT: sdiv r5, r4, r2
-; CHECK-NEXT: vmov.u8 r0, q1[11]
-; CHECK-NEXT: mls r9, r5, r2, r4
-; CHECK-NEXT: vmov.u8 r4, q0[13]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov.u8 r6, q0[0]
-; CHECK-NEXT: sdiv r5, r4, r3
-; CHECK-NEXT: sxtb r1, r0
-; CHECK-NEXT: vmov.u8 r0, q1[10]
-; CHECK-NEXT: mls r3, r5, r3, r4
-; CHECK-NEXT: vmov.u8 r4, q0[10]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sdiv r5, r4, r0
-; CHECK-NEXT: mls r0, r5, r0, r4
-; CHECK-NEXT: vmov.u8 r4, q0[11]
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: vmov.s8 r0, q1[14]
+; CHECK-NEXT: vmov.s8 r1, q0[14]
+; CHECK-NEXT: sdiv r2, r1, r0
+; CHECK-NEXT: mls r12, r2, r0, r1
+; CHECK-NEXT: vmov.s8 r0, q1[15]
+; CHECK-NEXT: vmov.s8 r1, q0[15]
+; CHECK-NEXT: sdiv r2, r1, r0
+; CHECK-NEXT: mls lr, r2, r0, r1
+; CHECK-NEXT: vmov.s8 r0, q1[12]
+; CHECK-NEXT: vmov.s8 r1, q0[12]
+; CHECK-NEXT: sdiv r2, r1, r0
+; CHECK-NEXT: mls r8, r2, r0, r1
+; CHECK-NEXT: vmov.s8 r0, q1[13]
+; CHECK-NEXT: vmov.s8 r1, q0[13]
+; CHECK-NEXT: sdiv r3, r1, r0
+; CHECK-NEXT: mls r3, r3, r0, r1
+; CHECK-NEXT: vmov.s8 r0, q1[10]
+; CHECK-NEXT: vmov.s8 r1, q0[10]
+; CHECK-NEXT: sdiv r4, r1, r0
+; CHECK-NEXT: mls r0, r4, r0, r1
+; CHECK-NEXT: vmov.s8 r1, q1[11]
+; CHECK-NEXT: vmov.s8 r4, q0[11]
; CHECK-NEXT: sdiv r5, r4, r1
; CHECK-NEXT: mls r1, r5, r1, r4
-; CHECK-NEXT: vmov.u8 r4, q0[8]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sdiv r5, r4, r8
-; CHECK-NEXT: mls r4, r5, r8, r4
-; CHECK-NEXT: vmov.u8 r5, q1[0]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r4, q1[8]
+; CHECK-NEXT: vmov.s8 r5, q0[8]
+; CHECK-NEXT: sdiv r6, r5, r4
+; CHECK-NEXT: mls r4, r6, r4, r5
+; CHECK-NEXT: vmov.s8 r5, q1[0]
+; CHECK-NEXT: vmov.s8 r6, q0[0]
; CHECK-NEXT: sdiv r7, r6, r5
; CHECK-NEXT: mls r5, r7, r5, r6
-; CHECK-NEXT: vmov.u8 r6, q1[1]
-; CHECK-NEXT: vmov.u8 r7, q0[1]
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sxtb r7, r7
-; CHECK-NEXT: vmov.8 q2[0], r5
+; CHECK-NEXT: vmov.s8 r6, q1[1]
+; CHECK-NEXT: vmov.s8 r7, q0[1]
; CHECK-NEXT: sdiv r2, r7, r6
-; CHECK-NEXT: vmov.u8 r5, q0[2]
+; CHECK-NEXT: vmov.8 q2[0], r5
; CHECK-NEXT: mls r2, r2, r6, r7
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[2]
; CHECK-NEXT: vmov.8 q2[1], r2
-; CHECK-NEXT: vmov.u8 r2, q1[2]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q1[2]
; CHECK-NEXT: sdiv r6, r5, r2
; CHECK-NEXT: mls r2, r6, r2, r5
-; CHECK-NEXT: vmov.u8 r5, q0[3]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[3]
; CHECK-NEXT: vmov.8 q2[2], r2
-; CHECK-NEXT: vmov.u8 r2, q1[3]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q1[3]
; CHECK-NEXT: sdiv r6, r5, r2
; CHECK-NEXT: mls r2, r6, r2, r5
-; CHECK-NEXT: vmov.u8 r5, q0[4]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[4]
; CHECK-NEXT: vmov.8 q2[3], r2
-; CHECK-NEXT: vmov.u8 r2, q1[4]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q1[4]
; CHECK-NEXT: sdiv r6, r5, r2
; CHECK-NEXT: mls r2, r6, r2, r5
-; CHECK-NEXT: vmov.u8 r5, q0[5]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[5]
; CHECK-NEXT: vmov.8 q2[4], r2
-; CHECK-NEXT: vmov.u8 r2, q1[5]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q1[5]
; CHECK-NEXT: sdiv r6, r5, r2
; CHECK-NEXT: mls r2, r6, r2, r5
-; CHECK-NEXT: vmov.u8 r5, q0[6]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[6]
; CHECK-NEXT: vmov.8 q2[5], r2
-; CHECK-NEXT: vmov.u8 r2, q1[6]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q1[6]
; CHECK-NEXT: sdiv r6, r5, r2
; CHECK-NEXT: mls r2, r6, r2, r5
-; CHECK-NEXT: vmov.u8 r5, q0[7]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[7]
; CHECK-NEXT: vmov.8 q2[6], r2
-; CHECK-NEXT: vmov.u8 r2, q1[7]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q1[7]
; CHECK-NEXT: sdiv r6, r5, r2
; CHECK-NEXT: mls r2, r6, r2, r5
-; CHECK-NEXT: vmov.u8 r5, q0[9]
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r5, q0[9]
; CHECK-NEXT: vmov.8 q2[7], r2
-; CHECK-NEXT: vmov.u8 r2, q1[9]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: vmov.8 q2[8], r4
+; CHECK-NEXT: vmov.s8 r2, q1[9]
; CHECK-NEXT: sdiv r6, r5, r2
+; CHECK-NEXT: vmov.8 q2[8], r4
; CHECK-NEXT: mls r2, r6, r2, r5
; CHECK-NEXT: vmov.8 q2[9], r2
; CHECK-NEXT: vmov.8 q2[10], r0
; CHECK-NEXT: vmov.8 q2[11], r1
-; CHECK-NEXT: vmov.8 q2[12], r9
+; CHECK-NEXT: vmov.8 q2[12], r8
; CHECK-NEXT: vmov.8 q2[13], r3
; CHECK-NEXT: vmov.8 q2[14], r12
; CHECK-NEXT: vmov.8 q2[15], lr
; CHECK-NEXT: vmov q0, q2
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%out = srem <16 x i8> %in1, %in2
ret <16 x i8> %out
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
index 851be7124e5b..93c8af4928fa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
@@ -91,10 +91,8 @@ entry:
define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
; CHECK-MVE-LABEL: foo_half_int16:
; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
-; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
-; CHECK-MVE-NEXT: sxth r0, r0
-; CHECK-MVE-NEXT: sxth r1, r1
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[0]
+; CHECK-MVE-NEXT: vmov.s16 r1, q0[1]
; CHECK-MVE-NEXT: vmov s4, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4
; CHECK-MVE-NEXT: vmov r0, s4
@@ -102,39 +100,33 @@ define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4
; CHECK-MVE-NEXT: vmov r1, s4
; CHECK-MVE-NEXT: vmov.16 q1[0], r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[2]
; CHECK-MVE-NEXT: vmov.16 q1[1], r1
-; CHECK-MVE-NEXT: sxth r0, r0
; CHECK-MVE-NEXT: vmov s8, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.16 q1[2], r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
-; CHECK-MVE-NEXT: sxth r0, r0
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[3]
; CHECK-MVE-NEXT: vmov s8, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.16 q1[3], r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
-; CHECK-MVE-NEXT: sxth r0, r0
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[4]
; CHECK-MVE-NEXT: vmov s8, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.16 q1[4], r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
-; CHECK-MVE-NEXT: sxth r0, r0
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[5]
; CHECK-MVE-NEXT: vmov s8, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.16 q1[5], r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
-; CHECK-MVE-NEXT: sxth r0, r0
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[6]
; CHECK-MVE-NEXT: vmov s8, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.16 q1[6], r0
-; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
-; CHECK-MVE-NEXT: sxth r0, r0
+; CHECK-MVE-NEXT: vmov.s16 r0, q0[7]
; CHECK-MVE-NEXT: vmov s0, r0
; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0
; CHECK-MVE-NEXT: vmov r0, s0
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
index 49bd421d17aa..4b48861a6fc1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
@@ -178,10 +178,8 @@ entry:
define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
; CHECK-LABEL: add_v8i16_v8i64_sext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.u16 r0, q0[1]
-; CHECK-NEXT: vmov.u16 r1, q0[0]
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: sxth r1, r1
+; CHECK-NEXT: vmov.s16 r0, q0[1]
+; CHECK-NEXT: vmov.s16 r1, q0[0]
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: asrs r2, r0, #31
; CHECK-NEXT: asrs r1, r1, #31
@@ -190,11 +188,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u16 r3, q0[2]
+; CHECK-NEXT: vmov.s16 r3, q0[2]
; CHECK-NEXT: adc.w r12, r1, r0, asr #31
-; CHECK-NEXT: vmov.u16 r1, q0[3]
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r1, q0[3]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -205,11 +201,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u16 r3, q0[4]
+; CHECK-NEXT: vmov.s16 r3, q0[4]
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
-; CHECK-NEXT: vmov.u16 r1, q0[5]
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r1, q0[5]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -220,13 +214,11 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
+; CHECK-NEXT: vmov.s16 r3, q0[7]
; CHECK-NEXT: adc.w r0, r0, r1, asr #31
-; CHECK-NEXT: vmov.u16 r1, q0[6]
-; CHECK-NEXT: sxth r1, r1
+; CHECK-NEXT: vmov.s16 r1, q0[6]
; CHECK-NEXT: adds r2, r2, r1
; CHECK-NEXT: adc.w r1, r0, r1, asr #31
-; CHECK-NEXT: vmov.u16 r0, q0[7]
-; CHECK-NEXT: sxth r3, r0
; CHECK-NEXT: adds r0, r2, r3
; CHECK-NEXT: adc.w r1, r1, r3, asr #31
; CHECK-NEXT: bx lr
@@ -545,10 +537,8 @@ entry:
define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-LABEL: add_v16i8_v16i64_sext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.u8 r0, q0[1]
-; CHECK-NEXT: vmov.u8 r1, q0[0]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r0, q0[1]
+; CHECK-NEXT: vmov.s8 r1, q0[0]
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: asrs r2, r0, #31
; CHECK-NEXT: asrs r1, r1, #31
@@ -557,11 +547,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q0[2]
+; CHECK-NEXT: vmov.s8 r3, q0[2]
; CHECK-NEXT: adc.w r12, r1, r0, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[3]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r1, q0[3]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -572,11 +560,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q0[4]
+; CHECK-NEXT: vmov.s8 r3, q0[4]
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[5]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r1, q0[5]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -587,11 +573,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q0[6]
+; CHECK-NEXT: vmov.s8 r3, q0[6]
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[7]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r1, q0[7]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -602,11 +586,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q0[8]
+; CHECK-NEXT: vmov.s8 r3, q0[8]
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[9]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r1, q0[9]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -617,11 +599,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q0[10]
+; CHECK-NEXT: vmov.s8 r3, q0[10]
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[11]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r1, q0[11]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -632,11 +612,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q0[12]
+; CHECK-NEXT: vmov.s8 r3, q0[12]
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[13]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r1, q0[13]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
; CHECK-NEXT: asrs r0, r1, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -647,13 +625,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds r2, r2, r3
+; CHECK-NEXT: vmov.s8 r3, q0[15]
; CHECK-NEXT: adc.w r0, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r1, q0[14]
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r1, q0[14]
; CHECK-NEXT: adds r2, r2, r1
; CHECK-NEXT: adc.w r1, r0, r1, asr #31
-; CHECK-NEXT: vmov.u8 r0, q0[15]
-; CHECK-NEXT: sxtb r3, r0
; CHECK-NEXT: adds r0, r2, r3
; CHECK-NEXT: adc.w r1, r1, r3, asr #31
; CHECK-NEXT: bx lr
@@ -1051,10 +1027,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vmov.u16 r2, q0[1]
-; CHECK-NEXT: vmov.u16 r3, q0[0]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r2, q0[1]
+; CHECK-NEXT: vmov.s16 r3, q0[0]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: asr.w r12, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1063,11 +1037,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r12, s5
; CHECK-NEXT: adds.w lr, lr, r3
-; CHECK-NEXT: vmov.u16 r3, q0[2]
+; CHECK-NEXT: vmov.s16 r3, q0[2]
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
-; CHECK-NEXT: vmov.u16 r2, q0[3]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r2, q0[3]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: asrs r4, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1078,11 +1050,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
; CHECK-NEXT: adc.w r12, r12, r3
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adds.w lr, r4, r3
-; CHECK-NEXT: vmov.u16 r4, q0[5]
+; CHECK-NEXT: vmov.s16 r4, q0[5]
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
-; CHECK-NEXT: vmov.u16 r2, q0[4]
-; CHECK-NEXT: sxth r4, r4
-; CHECK-NEXT: sxth r2, r2
+; CHECK-NEXT: vmov.s16 r2, q0[4]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: asrs r3, r4, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1094,12 +1064,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: adc.w r3, r12, r4, asr #31
-; CHECK-NEXT: vmov.u16 r4, q0[6]
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: vmov.s16 r4, q0[6]
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
-; CHECK-NEXT: vmov.u16 r4, q0[7]
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: vmov.s16 r4, q0[7]
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
; CHECK-NEXT: adds r0, r0, r2
@@ -1447,10 +1415,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vmov.u8 r2, q0[1]
-; CHECK-NEXT: vmov.u8 r3, q0[0]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q0[1]
+; CHECK-NEXT: vmov.s8 r3, q0[0]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: asr.w r12, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1459,11 +1425,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r12, s5
; CHECK-NEXT: adds.w lr, lr, r3
-; CHECK-NEXT: vmov.u8 r3, q0[2]
+; CHECK-NEXT: vmov.s8 r3, q0[2]
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
-; CHECK-NEXT: vmov.u8 r2, q0[3]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q0[3]
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: asrs r4, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1474,11 +1438,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: adc.w r12, r12, r3
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: adds.w lr, r4, r3
-; CHECK-NEXT: vmov.u8 r4, q0[5]
+; CHECK-NEXT: vmov.s8 r4, q0[5]
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
-; CHECK-NEXT: vmov.u8 r2, q0[4]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q0[4]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: asrs r3, r4, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1489,11 +1451,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds.w lr, r3, r2
-; CHECK-NEXT: vmov.u8 r2, q0[6]
+; CHECK-NEXT: vmov.s8 r2, q0[6]
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
-; CHECK-NEXT: vmov.u8 r4, q0[7]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r4, q0[7]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: asrs r3, r4, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1504,11 +1464,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds.w lr, r3, r2
-; CHECK-NEXT: vmov.u8 r2, q0[8]
+; CHECK-NEXT: vmov.s8 r2, q0[8]
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
-; CHECK-NEXT: vmov.u8 r4, q0[9]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r4, q0[9]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: asrs r3, r4, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1519,11 +1477,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds.w lr, r3, r2
-; CHECK-NEXT: vmov.u8 r2, q0[10]
+; CHECK-NEXT: vmov.s8 r2, q0[10]
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
-; CHECK-NEXT: vmov.u8 r4, q0[11]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r4, q0[11]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: asrs r3, r4, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1534,11 +1490,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds.w lr, r3, r2
-; CHECK-NEXT: vmov.u8 r2, q0[12]
+; CHECK-NEXT: vmov.s8 r2, q0[12]
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
-; CHECK-NEXT: vmov.u8 r4, q0[13]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r4, q0[13]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: asrs r3, r4, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1550,12 +1504,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: adc.w r3, r12, r4, asr #31
-; CHECK-NEXT: vmov.u8 r4, q0[14]
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r4, q0[14]
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
-; CHECK-NEXT: vmov.u8 r4, q0[15]
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r4, q0[15]
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
; CHECK-NEXT: adds r0, r0, r2
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
index dc120b98961b..0d6d88c13ab7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
@@ -317,10 +317,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
-; CHECK-NEXT: vmov.u16 r1, q0[1]
-; CHECK-NEXT: vmov.u16 r2, q0[0]
-; CHECK-NEXT: sxth r1, r1
-; CHECK-NEXT: sxth r2, r2
+; CHECK-NEXT: vmov.s16 r1, q0[1]
+; CHECK-NEXT: vmov.s16 r2, q0[0]
; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -338,10 +336,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
-; CHECK-NEXT: vmov.u16 r0, q0[3]
-; CHECK-NEXT: vmov.u16 r3, q0[2]
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r0, q0[3]
+; CHECK-NEXT: vmov.s16 r3, q0[2]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -369,10 +365,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
-; CHECK-NEXT: vmov.u16 r0, q0[5]
-; CHECK-NEXT: vmov.u16 r3, q0[4]
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r0, q0[5]
+; CHECK-NEXT: vmov.s16 r3, q0[4]
; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -392,10 +386,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
; CHECK-NEXT: adc.w r1, r1, r12
; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
-; CHECK-NEXT: vmov.u16 r2, q0[7]
-; CHECK-NEXT: vmov.u16 r3, q0[6]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r3, r3
+; CHECK-NEXT: vmov.s16 r2, q0[7]
+; CHECK-NEXT: vmov.s16 r3, q0[6]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1050,10 +1042,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: vmov q5[2], q5[0], r2, r1
; CHECK-NEXT: vmov q5[3], q5[1], r2, r1
-; CHECK-NEXT: vmov.u8 r1, q0[1]
-; CHECK-NEXT: vmov.u8 r2, q0[0]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r1, q0[1]
+; CHECK-NEXT: vmov.s8 r2, q0[0]
; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: asrs r2, r2, #31
@@ -1071,10 +1061,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: vmov q5[2], q5[0], r0, r3
; CHECK-NEXT: vmov q5[3], q5[1], r0, r3
-; CHECK-NEXT: vmov.u8 r0, q0[3]
-; CHECK-NEXT: vmov.u8 r3, q0[2]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q0[3]
+; CHECK-NEXT: vmov.s8 r3, q0[2]
; CHECK-NEXT: vmov q6[2], q6[0], r3, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1102,10 +1090,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q4[2], q4[0], r0, r3
; CHECK-NEXT: vmov q4[3], q4[1], r0, r3
-; CHECK-NEXT: vmov.u8 r0, q0[5]
-; CHECK-NEXT: vmov.u8 r3, q0[4]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q0[5]
+; CHECK-NEXT: vmov.s8 r3, q0[4]
; CHECK-NEXT: vmov q5[2], q5[0], r3, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1125,10 +1111,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: adc.w r1, r1, r12
; CHECK-NEXT: vmov q4[2], q4[0], r2, r3
; CHECK-NEXT: vmov q4[3], q4[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q0[7]
-; CHECK-NEXT: vmov.u8 r3, q0[6]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q0[7]
+; CHECK-NEXT: vmov.s8 r3, q0[6]
; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1174,10 +1158,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
-; CHECK-NEXT: vmov.u8 r0, q0[9]
-; CHECK-NEXT: vmov.u8 r3, q0[8]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q0[9]
+; CHECK-NEXT: vmov.s8 r3, q0[8]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1197,10 +1179,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: adc.w r1, r1, r12
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q0[11]
-; CHECK-NEXT: vmov.u8 r3, q0[10]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q0[11]
+; CHECK-NEXT: vmov.s8 r3, q0[10]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1228,10 +1208,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
-; CHECK-NEXT: vmov.u8 r0, q0[13]
-; CHECK-NEXT: vmov.u8 r3, q0[12]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q0[13]
+; CHECK-NEXT: vmov.s8 r3, q0[12]
; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1251,10 +1229,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
; CHECK-NEXT: adc.w r1, r1, r12
; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q0[15]
-; CHECK-NEXT: vmov.u8 r3, q0[14]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q0[15]
+; CHECK-NEXT: vmov.s8 r3, q0[14]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r3, r3, #31
@@ -1988,39 +1964,35 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
; CHECK-NEXT: vmov.u16 r2, q1[3]
; CHECK-NEXT: vmov.u16 r3, q1[1]
; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
+; CHECK-NEXT: vmov.s16 r2, q0[0]
; CHECK-NEXT: vcmp.i32 ne, q2, zr
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r2, r12, #1
-; CHECK-NEXT: ubfx r3, r12, #4, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: ubfx r3, lr, #4, #1
+; CHECK-NEXT: rsb.w r12, r3, #0
+; CHECK-NEXT: and r3, lr, #1
; CHECK-NEXT: rsbs r3, r3, #0
-; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
-; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
-; CHECK-NEXT: vmov.u16 r2, q0[1]
-; CHECK-NEXT: vmov.u16 r3, q0[0]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r3, r3
-; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
-; CHECK-NEXT: asrs r2, r2, #31
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r12
+; CHECK-NEXT: vmov q2[3], q2[1], r3, r12
+; CHECK-NEXT: vmov.s16 r3, q0[1]
+; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
; CHECK-NEXT: asrs r3, r3, #31
-; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
+; CHECK-NEXT: asrs r2, r2, #31
+; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
; CHECK-NEXT: vand q2, q3, q2
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: vmov r4, s8
-; CHECK-NEXT: vmov lr, s11
+; CHECK-NEXT: vmov r12, s11
; CHECK-NEXT: vmov r3, s9
; CHECK-NEXT: adds r5, r4, r2
-; CHECK-NEXT: ubfx r4, r12, #12, #1
-; CHECK-NEXT: ubfx r2, r12, #8, #1
+; CHECK-NEXT: ubfx r4, lr, #12, #1
+; CHECK-NEXT: ubfx r2, lr, #8, #1
; CHECK-NEXT: rsb.w r4, r4, #0
; CHECK-NEXT: rsb.w r2, r2, #0
-; CHECK-NEXT: adc.w r3, r3, lr
+; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
; CHECK-NEXT: vmov q2[3], q2[1], r2, r4
-; CHECK-NEXT: vmov.u16 r2, q0[3]
-; CHECK-NEXT: vmov.u16 r4, q0[2]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: vmov.s16 r2, q0[3]
+; CHECK-NEXT: vmov.s16 r4, q0[2]
; CHECK-NEXT: vmov q3[2], q3[0], r4, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2048,10 +2020,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
; CHECK-NEXT: rsbs r4, r4, #0
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: vmov q1[3], q1[1], r2, r4
-; CHECK-NEXT: vmov.u16 r2, q0[5]
-; CHECK-NEXT: vmov.u16 r4, q0[4]
-; CHECK-NEXT: sxth r2, r2
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: vmov.s16 r2, q0[5]
+; CHECK-NEXT: vmov.s16 r4, q0[4]
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2071,10 +2041,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov q1[2], q1[0], r5, r4
; CHECK-NEXT: vmov q1[3], q1[1], r5, r4
-; CHECK-NEXT: vmov.u16 r5, q0[7]
-; CHECK-NEXT: vmov.u16 r4, q0[6]
-; CHECK-NEXT: sxth r5, r5
-; CHECK-NEXT: sxth r4, r4
+; CHECK-NEXT: vmov.s16 r5, q0[7]
+; CHECK-NEXT: vmov.s16 r4, q0[6]
; CHECK-NEXT: vmov q0[2], q0[0], r4, r5
; CHECK-NEXT: asrs r5, r5, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2611,39 +2579,35 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: vmov.u16 r2, q4[3]
; CHECK-NEXT: vmov.u16 r3, q4[1]
; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
+; CHECK-NEXT: vmov.s8 r2, q0[0]
; CHECK-NEXT: vcmp.i32 ne, q5, zr
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r2, r12, #1
-; CHECK-NEXT: ubfx r3, r12, #4, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: ubfx r3, lr, #4, #1
+; CHECK-NEXT: rsb.w r12, r3, #0
+; CHECK-NEXT: and r3, lr, #1
; CHECK-NEXT: rsbs r3, r3, #0
-; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
-; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q0[1]
-; CHECK-NEXT: vmov.u8 r3, q0[0]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
-; CHECK-NEXT: asrs r2, r2, #31
+; CHECK-NEXT: vmov q5[2], q5[0], r3, r12
+; CHECK-NEXT: vmov q5[3], q5[1], r3, r12
+; CHECK-NEXT: vmov.s8 r3, q0[1]
+; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
; CHECK-NEXT: asrs r3, r3, #31
-; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
+; CHECK-NEXT: asrs r2, r2, #31
+; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
; CHECK-NEXT: vand q5, q6, q5
; CHECK-NEXT: vmov r2, s22
; CHECK-NEXT: vmov r4, s20
-; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r12, s23
; CHECK-NEXT: vmov r3, s21
; CHECK-NEXT: adds r5, r4, r2
-; CHECK-NEXT: ubfx r4, r12, #12, #1
-; CHECK-NEXT: ubfx r2, r12, #8, #1
+; CHECK-NEXT: ubfx r4, lr, #12, #1
+; CHECK-NEXT: ubfx r2, lr, #8, #1
; CHECK-NEXT: rsb.w r4, r4, #0
; CHECK-NEXT: rsb.w r2, r2, #0
-; CHECK-NEXT: adc.w r3, r3, lr
+; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov q5[2], q5[0], r2, r4
; CHECK-NEXT: vmov q5[3], q5[1], r2, r4
-; CHECK-NEXT: vmov.u8 r2, q0[3]
-; CHECK-NEXT: vmov.u8 r4, q0[2]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r2, q0[3]
+; CHECK-NEXT: vmov.s8 r4, q0[2]
; CHECK-NEXT: vmov q6[2], q6[0], r4, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2671,10 +2635,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: rsbs r4, r4, #0
; CHECK-NEXT: vmov q4[2], q4[0], r2, r4
; CHECK-NEXT: vmov q4[3], q4[1], r2, r4
-; CHECK-NEXT: vmov.u8 r2, q0[5]
-; CHECK-NEXT: vmov.u8 r4, q0[4]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r2, q0[5]
+; CHECK-NEXT: vmov.s8 r4, q0[4]
; CHECK-NEXT: vmov q5[2], q5[0], r4, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2694,10 +2656,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov q4[2], q4[0], r5, r4
; CHECK-NEXT: vmov q4[3], q4[1], r5, r4
-; CHECK-NEXT: vmov.u8 r5, q0[7]
-; CHECK-NEXT: vmov.u8 r4, q0[6]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r5, q0[7]
+; CHECK-NEXT: vmov.s8 r4, q0[6]
; CHECK-NEXT: vmov q5[2], q5[0], r4, r5
; CHECK-NEXT: asrs r5, r5, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2743,10 +2703,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: rsbs r4, r4, #0
; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
; CHECK-NEXT: vmov q2[3], q2[1], r2, r4
-; CHECK-NEXT: vmov.u8 r2, q0[9]
-; CHECK-NEXT: vmov.u8 r4, q0[8]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r2, q0[9]
+; CHECK-NEXT: vmov.s8 r4, q0[8]
; CHECK-NEXT: vmov q3[2], q3[0], r4, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2766,10 +2724,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov q2[2], q2[0], r5, r4
; CHECK-NEXT: vmov q2[3], q2[1], r5, r4
-; CHECK-NEXT: vmov.u8 r5, q0[11]
-; CHECK-NEXT: vmov.u8 r4, q0[10]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r5, q0[11]
+; CHECK-NEXT: vmov.s8 r4, q0[10]
; CHECK-NEXT: vmov q3[2], q3[0], r4, r5
; CHECK-NEXT: asrs r5, r5, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2797,10 +2753,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: rsbs r4, r4, #0
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
; CHECK-NEXT: vmov q1[3], q1[1], r2, r4
-; CHECK-NEXT: vmov.u8 r2, q0[13]
-; CHECK-NEXT: vmov.u8 r4, q0[12]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r2, q0[13]
+; CHECK-NEXT: vmov.s8 r4, q0[12]
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: asrs r2, r2, #31
; CHECK-NEXT: asrs r4, r4, #31
@@ -2820,10 +2774,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: vmov q1[2], q1[0], r5, r4
; CHECK-NEXT: vmov q1[3], q1[1], r5, r4
-; CHECK-NEXT: vmov.u8 r5, q0[15]
-; CHECK-NEXT: vmov.u8 r4, q0[14]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r5, q0[15]
+; CHECK-NEXT: vmov.s8 r4, q0[14]
; CHECK-NEXT: vmov q0[2], q0[0], r4, r5
; CHECK-NEXT: asrs r5, r5, #31
; CHECK-NEXT: asrs r4, r4, #31
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
index 82ef43d96fe3..0b157cf511db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
@@ -737,14 +737,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: vmov.u8 r0, q1[1]
-; CHECK-NEXT: vmov.u8 r1, q0[1]
-; CHECK-NEXT: vmov.u8 r2, q1[0]
-; CHECK-NEXT: vmov.u8 r3, q0[0]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q1[1]
+; CHECK-NEXT: vmov.s8 r1, q0[1]
+; CHECK-NEXT: vmov.s8 r2, q1[0]
+; CHECK-NEXT: vmov.s8 r3, q0[0]
; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vmov q2[2], q2[0], r2, r0
@@ -753,15 +749,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: vmov r3, s8
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: adds.w lr, r3, r2
-; CHECK-NEXT: vmov.u8 r3, q0[3]
+; CHECK-NEXT: vmov.s8 r3, q0[3]
; CHECK-NEXT: adc.w r12, r0, r1
-; CHECK-NEXT: vmov.u8 r1, q1[3]
-; CHECK-NEXT: vmov.u8 r0, q1[2]
-; CHECK-NEXT: vmov.u8 r2, q0[2]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r1, q1[3]
+; CHECK-NEXT: vmov.s8 r0, q1[2]
+; CHECK-NEXT: vmov.s8 r2, q0[2]
; CHECK-NEXT: smull r1, r3, r3, r1
; CHECK-NEXT: smull r0, r2, r2, r0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
@@ -772,15 +764,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: adds.w r1, r1, lr
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds.w lr, r1, r2
-; CHECK-NEXT: vmov.u8 r2, q1[5]
+; CHECK-NEXT: vmov.s8 r2, q1[5]
; CHECK-NEXT: adc.w r12, r0, r3
-; CHECK-NEXT: vmov.u8 r3, q0[5]
-; CHECK-NEXT: vmov.u8 r0, q1[4]
-; CHECK-NEXT: vmov.u8 r1, q0[4]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r3, q0[5]
+; CHECK-NEXT: vmov.s8 r0, q1[4]
+; CHECK-NEXT: vmov.s8 r1, q0[4]
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
@@ -791,15 +779,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: adds.w r1, r1, lr
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds.w lr, r1, r2
-; CHECK-NEXT: vmov.u8 r2, q1[7]
+; CHECK-NEXT: vmov.s8 r2, q1[7]
; CHECK-NEXT: adc.w r12, r0, r3
-; CHECK-NEXT: vmov.u8 r3, q0[7]
-; CHECK-NEXT: vmov.u8 r0, q1[6]
-; CHECK-NEXT: vmov.u8 r1, q0[6]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r3, q0[7]
+; CHECK-NEXT: vmov.s8 r0, q1[6]
+; CHECK-NEXT: vmov.s8 r1, q0[6]
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
@@ -810,15 +794,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: adds.w r1, r1, lr
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds.w lr, r1, r2
-; CHECK-NEXT: vmov.u8 r2, q1[9]
+; CHECK-NEXT: vmov.s8 r2, q1[9]
; CHECK-NEXT: adc.w r12, r0, r3
-; CHECK-NEXT: vmov.u8 r3, q0[9]
-; CHECK-NEXT: vmov.u8 r0, q1[8]
-; CHECK-NEXT: vmov.u8 r1, q0[8]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r3, q0[9]
+; CHECK-NEXT: vmov.s8 r0, q1[8]
+; CHECK-NEXT: vmov.s8 r1, q0[8]
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
@@ -829,15 +809,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: adds.w r1, r1, lr
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds.w lr, r1, r2
-; CHECK-NEXT: vmov.u8 r2, q1[11]
+; CHECK-NEXT: vmov.s8 r2, q1[11]
; CHECK-NEXT: adc.w r12, r0, r3
-; CHECK-NEXT: vmov.u8 r3, q0[11]
-; CHECK-NEXT: vmov.u8 r0, q1[10]
-; CHECK-NEXT: vmov.u8 r1, q0[10]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r3, q0[11]
+; CHECK-NEXT: vmov.s8 r0, q1[10]
+; CHECK-NEXT: vmov.s8 r1, q0[10]
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
@@ -848,15 +824,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: adds.w r1, r1, lr
; CHECK-NEXT: adc.w r0, r0, r12
; CHECK-NEXT: adds.w lr, r1, r2
-; CHECK-NEXT: vmov.u8 r2, q1[13]
+; CHECK-NEXT: vmov.s8 r2, q1[13]
; CHECK-NEXT: adc.w r12, r0, r3
-; CHECK-NEXT: vmov.u8 r3, q0[13]
-; CHECK-NEXT: vmov.u8 r0, q1[12]
-; CHECK-NEXT: vmov.u8 r1, q0[12]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: vmov.s8 r3, q0[13]
+; CHECK-NEXT: vmov.s8 r0, q1[12]
+; CHECK-NEXT: vmov.s8 r1, q0[12]
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
@@ -868,15 +840,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: adds r0, r0, r1
; CHECK-NEXT: adc.w r1, r2, r3
-; CHECK-NEXT: vmov.u8 r2, q1[14]
-; CHECK-NEXT: vmov.u8 r3, q0[14]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[14]
+; CHECK-NEXT: vmov.s8 r3, q0[14]
; CHECK-NEXT: smlal r0, r1, r3, r2
-; CHECK-NEXT: vmov.u8 r2, q1[15]
-; CHECK-NEXT: vmov.u8 r3, q0[15]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[15]
+; CHECK-NEXT: vmov.s8 r3, q0[15]
; CHECK-NEXT: smlal r0, r1, r3, r2
; CHECK-NEXT: pop {r7, pc}
entry:
@@ -1690,20 +1658,14 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: vmov.u8 r2, q1[1]
-; CHECK-NEXT: vmov.u8 r3, q0[1]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[1]
+; CHECK-NEXT: vmov.s8 r3, q0[1]
; CHECK-NEXT: smull r12, r3, r3, r2
-; CHECK-NEXT: vmov.u8 r2, q1[0]
-; CHECK-NEXT: sxtb.w lr, r2
-; CHECK-NEXT: vmov.u8 r2, q0[0]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: vmov.u8 r4, q1[2]
+; CHECK-NEXT: vmov.s8 lr, q1[0]
+; CHECK-NEXT: vmov.s8 r2, q0[0]
+; CHECK-NEXT: vmov.s8 r4, q1[2]
+; CHECK-NEXT: vmov.s8 r5, q0[2]
; CHECK-NEXT: smull r2, lr, r2, lr
-; CHECK-NEXT: vmov.u8 r5, q0[2]
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r5, r5
; CHECK-NEXT: vmov q2[2], q2[0], r2, r12
; CHECK-NEXT: smull r4, r5, r5, r4
; CHECK-NEXT: vmov q2[3], q2[1], lr, r3
@@ -1711,11 +1673,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: vmov r12, s9
; CHECK-NEXT: adds.w lr, lr, r2
-; CHECK-NEXT: vmov.u8 r2, q1[3]
+; CHECK-NEXT: vmov.s8 r2, q1[3]
; CHECK-NEXT: adc.w r12, r12, r3
-; CHECK-NEXT: vmov.u8 r3, q0[3]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r3, q0[3]
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: vmov q2[3], q2[1], r5, r3
@@ -1725,15 +1685,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: vmov r4, s10
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds.w lr, r5, r4
-; CHECK-NEXT: vmov.u8 r4, q0[5]
+; CHECK-NEXT: vmov.s8 r4, q0[5]
; CHECK-NEXT: adc.w r12, r2, r3
-; CHECK-NEXT: vmov.u8 r3, q1[5]
-; CHECK-NEXT: vmov.u8 r2, q1[4]
-; CHECK-NEXT: vmov.u8 r5, q0[4]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r5, r5
+; CHECK-NEXT: vmov.s8 r3, q1[5]
+; CHECK-NEXT: vmov.s8 r2, q1[4]
+; CHECK-NEXT: vmov.s8 r5, q0[4]
; CHECK-NEXT: smull r3, r4, r4, r3
; CHECK-NEXT: smull r2, r5, r5, r2
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
@@ -1744,15 +1700,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds.w lr, r3, r5
-; CHECK-NEXT: vmov.u8 r5, q1[7]
+; CHECK-NEXT: vmov.s8 r5, q1[7]
; CHECK-NEXT: adc.w r12, r2, r4
-; CHECK-NEXT: vmov.u8 r4, q0[7]
-; CHECK-NEXT: vmov.u8 r2, q1[6]
-; CHECK-NEXT: vmov.u8 r3, q0[6]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r4, q0[7]
+; CHECK-NEXT: vmov.s8 r2, q1[6]
+; CHECK-NEXT: vmov.s8 r3, q0[6]
; CHECK-NEXT: smull r5, r4, r4, r5
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
@@ -1763,15 +1715,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds.w lr, r3, r5
-; CHECK-NEXT: vmov.u8 r5, q1[9]
+; CHECK-NEXT: vmov.s8 r5, q1[9]
; CHECK-NEXT: adc.w r12, r2, r4
-; CHECK-NEXT: vmov.u8 r4, q0[9]
-; CHECK-NEXT: vmov.u8 r2, q1[8]
-; CHECK-NEXT: vmov.u8 r3, q0[8]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r4, q0[9]
+; CHECK-NEXT: vmov.s8 r2, q1[8]
+; CHECK-NEXT: vmov.s8 r3, q0[8]
; CHECK-NEXT: smull r5, r4, r4, r5
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
@@ -1782,15 +1730,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds.w lr, r3, r5
-; CHECK-NEXT: vmov.u8 r5, q1[11]
+; CHECK-NEXT: vmov.s8 r5, q1[11]
; CHECK-NEXT: adc.w r12, r2, r4
-; CHECK-NEXT: vmov.u8 r4, q0[11]
-; CHECK-NEXT: vmov.u8 r2, q1[10]
-; CHECK-NEXT: vmov.u8 r3, q0[10]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r4, q0[11]
+; CHECK-NEXT: vmov.s8 r2, q1[10]
+; CHECK-NEXT: vmov.s8 r3, q0[10]
; CHECK-NEXT: smull r5, r4, r4, r5
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
@@ -1801,15 +1745,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds.w lr, r3, r5
-; CHECK-NEXT: vmov.u8 r5, q1[13]
+; CHECK-NEXT: vmov.s8 r5, q1[13]
; CHECK-NEXT: adc.w r12, r2, r4
-; CHECK-NEXT: vmov.u8 r4, q0[13]
-; CHECK-NEXT: vmov.u8 r2, q1[12]
-; CHECK-NEXT: vmov.u8 r3, q0[12]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r4, q0[13]
+; CHECK-NEXT: vmov.s8 r2, q1[12]
+; CHECK-NEXT: vmov.s8 r3, q0[12]
; CHECK-NEXT: smull r5, r4, r4, r5
; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
@@ -1820,16 +1760,12 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds r3, r3, r5
-; CHECK-NEXT: vmov.u8 r5, q1[14]
+; CHECK-NEXT: vmov.s8 r5, q1[14]
; CHECK-NEXT: adcs r2, r4
-; CHECK-NEXT: vmov.u8 r4, q0[14]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r4, q0[14]
; CHECK-NEXT: smlal r3, r2, r4, r5
-; CHECK-NEXT: vmov.u8 r5, q1[15]
-; CHECK-NEXT: vmov.u8 r4, q0[15]
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r5, q1[15]
+; CHECK-NEXT: vmov.s8 r4, q0[15]
; CHECK-NEXT: smlal r3, r2, r4, r5
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
index f20dc9480ce1..69aa577149ca 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
@@ -1127,11 +1127,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: vcmp.i8 eq, q2, zr
; CHECK-NEXT: vmov.i8 q2, #0x0
; CHECK-NEXT: vmov.i8 q3, #0xff
-; CHECK-NEXT: vmov.u8 r3, q1[0]
+; CHECK-NEXT: vmov.s8 r3, q1[0]
; CHECK-NEXT: vpsel q4, q3, q2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r4, q0[4]
; CHECK-NEXT: vmov.u8 r0, q4[0]
-; CHECK-NEXT: vmov.u8 r4, q0[4]
; CHECK-NEXT: vmov.16 q5[0], r0
; CHECK-NEXT: vmov.u8 r0, q4[1]
; CHECK-NEXT: vmov.16 q5[1], r0
@@ -1147,7 +1146,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: vmov.16 q5[6], r0
; CHECK-NEXT: vmov.u8 r0, q4[7]
; CHECK-NEXT: vmov.16 q5[7], r0
-; CHECK-NEXT: sxtb r4, r4
; CHECK-NEXT: vcmp.i16 ne, q5, zr
; CHECK-NEXT: vpsel q5, q3, q2
; CHECK-NEXT: vmov.u16 r0, q5[2]
@@ -1164,13 +1162,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
; CHECK-NEXT: vmov q6[3], q6[1], r2, r1
-; CHECK-NEXT: vmov.u8 r1, q1[1]
-; CHECK-NEXT: vmov.u8 r2, q0[1]
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r1, q1[1]
+; CHECK-NEXT: vmov.s8 r2, q0[1]
; CHECK-NEXT: smull r1, r12, r2, r1
-; CHECK-NEXT: vmov.u8 r2, q0[0]
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q0[0]
; CHECK-NEXT: smull r2, r3, r2, r3
; CHECK-NEXT: vmov q7[2], q7[0], r2, r1
; CHECK-NEXT: vmov q7[3], q7[1], r3, r12
@@ -1184,17 +1179,13 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: ubfx r0, r0, #8, #1
; CHECK-NEXT: rsb.w r3, r3, #0
; CHECK-NEXT: rsb.w r0, r0, #0
-; CHECK-NEXT: vmov.u8 r1, q1[2]
-; CHECK-NEXT: vmov q6[2], q6[0], r0, r3
; CHECK-NEXT: adc.w r12, r12, r2
+; CHECK-NEXT: vmov q6[2], q6[0], r0, r3
+; CHECK-NEXT: vmov.s8 r1, q1[2]
; CHECK-NEXT: vmov q6[3], q6[1], r0, r3
-; CHECK-NEXT: vmov.u8 r0, q1[3]
-; CHECK-NEXT: vmov.u8 r3, q0[3]
-; CHECK-NEXT: vmov.u8 r2, q0[2]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r1, r1
-; CHECK-NEXT: sxtb r2, r2
+; CHECK-NEXT: vmov.s8 r2, q0[2]
+; CHECK-NEXT: vmov.s8 r0, q1[3]
+; CHECK-NEXT: vmov.s8 r3, q0[3]
; CHECK-NEXT: smull r0, r3, r3, r0
; CHECK-NEXT: smull r1, r2, r2, r1
; CHECK-NEXT: vmov q7[2], q7[0], r1, r0
@@ -1207,15 +1198,14 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: adc.w r2, r12, r0
; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: adds.w r12, r1, r0
-; CHECK-NEXT: vmov.u8 r1, q1[4]
+; CHECK-NEXT: vmov.s8 r1, q1[4]
; CHECK-NEXT: adc.w lr, r2, r3
; CHECK-NEXT: vmov.u16 r2, q5[6]
; CHECK-NEXT: vmov.u16 r3, q5[4]
-; CHECK-NEXT: sxtb r1, r1
+; CHECK-NEXT: smull r1, r4, r4, r1
; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
; CHECK-NEXT: vmov.u16 r2, q5[7]
; CHECK-NEXT: vmov.u16 r3, q5[5]
-; CHECK-NEXT: smull r1, r4, r4, r1
; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
; CHECK-NEXT: vcmp.i32 ne, q6, zr
; CHECK-NEXT: vmrs r2, p0
@@ -1225,10 +1215,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q5[2], q5[0], r0, r3
; CHECK-NEXT: vmov q5[3], q5[1], r0, r3
-; CHECK-NEXT: vmov.u8 r0, q1[5]
-; CHECK-NEXT: vmov.u8 r3, q0[5]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r0, q1[5]
+; CHECK-NEXT: vmov.s8 r3, q0[5]
; CHECK-NEXT: smull r0, r3, r3, r0
; CHECK-NEXT: vmov q6[2], q6[0], r1, r0
; CHECK-NEXT: vmov q6[3], q6[1], r4, r3
@@ -1240,23 +1228,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: adds.w r1, r1, r12
; CHECK-NEXT: adc.w r0, r0, lr
; CHECK-NEXT: adds r1, r1, r4
-; CHECK-NEXT: vmov.u8 r4, q1[6]
+; CHECK-NEXT: vmov.s8 r4, q1[6]
; CHECK-NEXT: adc.w r12, r0, r3
; CHECK-NEXT: ubfx r3, r2, #12, #1
; CHECK-NEXT: ubfx r2, r2, #8, #1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmov.u8 r0, q0[6]
+; CHECK-NEXT: vmov.s8 r0, q0[6]
; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: smull r0, r4, r0, r4
; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q1[7]
-; CHECK-NEXT: vmov.u8 r3, q0[7]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[7]
+; CHECK-NEXT: vmov.s8 r3, q0[7]
; CHECK-NEXT: smull r2, r3, r3, r2
-; CHECK-NEXT: smull r0, r4, r0, r4
; CHECK-NEXT: vmov q6[2], q6[0], r0, r2
; CHECK-NEXT: vmov q6[3], q6[1], r4, r3
; CHECK-NEXT: vand q5, q6, q5
@@ -1267,7 +1251,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: adc.w r2, r12, r0
; CHECK-NEXT: vmov r0, s22
; CHECK-NEXT: adds.w r12, r1, r0
-; CHECK-NEXT: vmov.u8 r0, q1[8]
+; CHECK-NEXT: vmov.s8 r0, q1[8]
; CHECK-NEXT: adc.w lr, r2, r3
; CHECK-NEXT: vmov.u8 r2, q4[8]
; CHECK-NEXT: vmov.16 q5[0], r2
@@ -1285,17 +1269,15 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: vmov.16 q5[6], r2
; CHECK-NEXT: vmov.u8 r2, q4[15]
; CHECK-NEXT: vmov.16 q5[7], r2
-; CHECK-NEXT: vmov.u8 r1, q0[8]
+; CHECK-NEXT: vmov.s8 r1, q0[8]
; CHECK-NEXT: vcmp.i16 ne, q5, zr
-; CHECK-NEXT: sxtb r0, r0
+; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vpsel q2, q3, q2
-; CHECK-NEXT: sxtb r1, r1
; CHECK-NEXT: vmov.u16 r2, q2[2]
; CHECK-NEXT: vmov.u16 r3, q2[0]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
; CHECK-NEXT: vmov.u16 r2, q2[3]
; CHECK-NEXT: vmov.u16 r3, q2[1]
-; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
; CHECK-NEXT: vcmp.i32 ne, q3, zr
; CHECK-NEXT: vmrs r2, p0
@@ -1305,10 +1287,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q3[2], q3[0], r4, r3
; CHECK-NEXT: vmov q3[3], q3[1], r4, r3
-; CHECK-NEXT: vmov.u8 r3, q1[9]
-; CHECK-NEXT: vmov.u8 r4, q0[9]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r3, q1[9]
+; CHECK-NEXT: vmov.s8 r4, q0[9]
; CHECK-NEXT: smull r3, r4, r4, r3
; CHECK-NEXT: vmov q4[2], q4[0], r0, r3
; CHECK-NEXT: vmov q4[3], q4[1], r1, r4
@@ -1320,23 +1300,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: adds.w r1, r1, r12
; CHECK-NEXT: adc.w r0, r0, lr
; CHECK-NEXT: adds r1, r1, r4
-; CHECK-NEXT: vmov.u8 r4, q1[10]
+; CHECK-NEXT: vmov.s8 r4, q1[10]
; CHECK-NEXT: adc.w r12, r0, r3
; CHECK-NEXT: ubfx r3, r2, #12, #1
; CHECK-NEXT: ubfx r2, r2, #8, #1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmov.u8 r0, q0[10]
+; CHECK-NEXT: vmov.s8 r0, q0[10]
; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: smull r0, r4, r0, r4
; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q1[11]
-; CHECK-NEXT: vmov.u8 r3, q0[11]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[11]
+; CHECK-NEXT: vmov.s8 r3, q0[11]
; CHECK-NEXT: smull r2, r3, r3, r2
-; CHECK-NEXT: smull r0, r4, r0, r4
; CHECK-NEXT: vmov q4[2], q4[0], r0, r2
; CHECK-NEXT: vmov q4[3], q4[1], r4, r3
; CHECK-NEXT: vand q3, q4, q3
@@ -1347,19 +1323,17 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: adc.w r2, r12, r0
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: adds.w r12, r1, r0
-; CHECK-NEXT: vmov.u8 r0, q1[12]
+; CHECK-NEXT: vmov.s8 r0, q1[12]
; CHECK-NEXT: adc.w lr, r2, r3
; CHECK-NEXT: vmov.u16 r2, q2[6]
; CHECK-NEXT: vmov.u16 r3, q2[4]
-; CHECK-NEXT: vmov.u8 r1, q0[12]
+; CHECK-NEXT: vmov.s8 r1, q0[12]
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
; CHECK-NEXT: vmov.u16 r2, q2[7]
; CHECK-NEXT: vmov.u16 r3, q2[5]
-; CHECK-NEXT: sxtb r0, r0
+; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
-; CHECK-NEXT: sxtb r1, r1
; CHECK-NEXT: vcmp.i32 ne, q3, zr
-; CHECK-NEXT: smull r0, r1, r1, r0
; CHECK-NEXT: vmrs r2, p0
; CHECK-NEXT: and r4, r2, #1
; CHECK-NEXT: ubfx r3, r2, #4, #1
@@ -1367,10 +1341,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q2[2], q2[0], r4, r3
; CHECK-NEXT: vmov q2[3], q2[1], r4, r3
-; CHECK-NEXT: vmov.u8 r3, q1[13]
-; CHECK-NEXT: vmov.u8 r4, q0[13]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r3, q1[13]
+; CHECK-NEXT: vmov.s8 r4, q0[13]
; CHECK-NEXT: smull r3, r4, r4, r3
; CHECK-NEXT: vmov q3[2], q3[0], r0, r3
; CHECK-NEXT: vmov q3[3], q3[1], r1, r4
@@ -1382,23 +1354,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
; CHECK-NEXT: adds.w r1, r1, r12
; CHECK-NEXT: adc.w r0, r0, lr
; CHECK-NEXT: adds r1, r1, r4
-; CHECK-NEXT: vmov.u8 r4, q1[14]
+; CHECK-NEXT: vmov.s8 r4, q1[14]
; CHECK-NEXT: adc.w r12, r0, r3
; CHECK-NEXT: ubfx r3, r2, #12, #1
; CHECK-NEXT: ubfx r2, r2, #8, #1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmov.u8 r0, q0[14]
+; CHECK-NEXT: vmov.s8 r0, q0[14]
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: smull r0, r4, r0, r4
; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q1[15]
-; CHECK-NEXT: vmov.u8 r3, q0[15]
-; CHECK-NEXT: sxtb r0, r0
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[15]
+; CHECK-NEXT: vmov.s8 r3, q0[15]
; CHECK-NEXT: smull r2, r3, r3, r2
-; CHECK-NEXT: smull r0, r4, r0, r4
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r4, r3
; CHECK-NEXT: vand q0, q0, q2
@@ -2637,11 +2605,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: vcmp.i8 eq, q2, zr
; CHECK-NEXT: vmov.i8 q2, #0x0
; CHECK-NEXT: vmov.i8 q3, #0xff
-; CHECK-NEXT: vmov.u8 r4, q0[0]
+; CHECK-NEXT: vmov.s8 r4, q0[0]
; CHECK-NEXT: vpsel q4, q3, q2
-; CHECK-NEXT: sxtb r4, r4
+; CHECK-NEXT: vmov.s8 r5, q0[2]
; CHECK-NEXT: vmov.u8 r2, q4[0]
-; CHECK-NEXT: vmov.u8 r5, q0[2]
; CHECK-NEXT: vmov.16 q5[0], r2
; CHECK-NEXT: vmov.u8 r2, q4[1]
; CHECK-NEXT: vmov.16 q5[1], r2
@@ -2657,7 +2624,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: vmov.16 q5[6], r2
; CHECK-NEXT: vmov.u8 r2, q4[7]
; CHECK-NEXT: vmov.16 q5[7], r2
-; CHECK-NEXT: sxtb r5, r5
; CHECK-NEXT: vcmp.i16 ne, q5, zr
; CHECK-NEXT: vpsel q5, q3, q2
; CHECK-NEXT: vmov.u16 r2, q5[2]
@@ -2674,13 +2640,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
-; CHECK-NEXT: vmov.u8 r2, q1[1]
-; CHECK-NEXT: vmov.u8 r3, q0[1]
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r2, q1[1]
+; CHECK-NEXT: vmov.s8 r3, q0[1]
; CHECK-NEXT: smull r2, lr, r3, r2
-; CHECK-NEXT: vmov.u8 r3, q1[0]
-; CHECK-NEXT: sxtb r3, r3
+; CHECK-NEXT: vmov.s8 r3, q1[0]
; CHECK-NEXT: smull r3, r4, r4, r3
; CHECK-NEXT: vmov q7[2], q7[0], r3, r2
; CHECK-NEXT: vmov q7[3], q7[1], r4, lr
@@ -2696,223 +2659,196 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
; CHECK-NEXT: rsb.w r2, r2, #0
; CHECK-NEXT: adc.w lr, lr, r3
; CHECK-NEXT: vmov q6[2], q6[0], r2, r4
-; CHECK-NEXT: vmov.u8 r3, q1[2]
+; CHECK-NEXT: vmov.s8 r3, q1[2]
; CHECK-NEXT: vmov q6[3], q6[1], r2, r4
-; CHECK-NEXT: vmov.u8 r2, q1[3]
-; CHECK-NEXT: vmov.u8 r4, q0[3]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: smull r2, r4, r4, r2
+; CHECK-NEXT: vmov.s8 r2, q1[3]
+; CHECK-NEXT: vmov.s8 r4, q0[3]
; CHECK-NEXT: smull r3, r5, r5, r3
+; CHECK-NEXT: smull r2, r4, r4, r2
; CHECK-NEXT: vmov q7[2], q7[0], r3, r2
; CHECK-NEXT: vmov q7[3], q7[1], r5, r4
-; CHECK-NEXT: vmov.u8 r4, q1[4]
; CHECK-NEXT: vand q6, q7, q6
-; CHECK-NEXT: sxtb r4, r4
; CHECK-NEXT: vmov r3, s24
; CHECK-NEXT: vmov r2, s25
-; CHECK-NEXT: vmov r5, s26
+; CHECK-NEXT: vmov r5, s27
; CHECK-NEXT: adds r3, r3, r6
-; CHECK-NEXT: vmov r6, s27
-; CHECK-NEXT: adc.w r2, r2, lr
-; CHECK-NEXT: adds.w r12, r3, r5
-; CHECK-NEXT: vmov.u8 r3, q0[4]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: smull r3, r4, r3, r4
-; CHECK-NEXT: adc.w lr, r2, r6
-; CHECK-NEXT: vmov.u16 r2, q5[6]
-; CHECK-NEXT: vmov.u16 r6, q5[4]
-; CHECK-NEXT: vmov q6[2], q6[0], r6, r2
-; CHECK-NEXT: vmov.u16 r2, q5[7]
-; CHECK-NEXT: vmov.u16 r6, q5[5]
-; CHECK-NEXT: vmov q6[3], q6[1], r6, r2
+; CHECK-NEXT: adc.w r6, lr, r2
+; CHECK-NEXT: vmov r2, s26
+; CHECK-NEXT: adds.w r12, r3, r2
+; CHECK-NEXT: vmov.s8 r2, q1[4]
+; CHECK-NEXT: adc.w lr, r6, r5
+; CHECK-NEXT: vmov.u16 r6, q5[6]
+; CHECK-NEXT: vmov.u16 r5, q5[4]
+; CHECK-NEXT: vmov.s8 r3, q0[4]
+; CHECK-NEXT: vmov q6[2], q6[0], r5, r6
+; CHECK-NEXT: vmov.u16 r6, q5[7]
+; CHECK-NEXT: vmov.u16 r5, q5[5]
+; CHECK-NEXT: smull r2, r3, r3, r2
+; CHECK-NEXT: vmov q6[3], q6[1], r5, r6
; CHECK-NEXT: vcmp.i32 ne, q6, zr
-; CHECK-NEXT: vmrs r2, p0
-; CHECK-NEXT: and r5, r2, #1
-; CHECK-NEXT: ubfx r6, r2, #4, #1
+; CHECK-NEXT: vmrs r6, p0
+; CHECK-NEXT: and r4, r6, #1
+; CHECK-NEXT: ubfx r5, r6, #4, #1
+; CHECK-NEXT: rsbs r4, r4, #0
; CHECK-NEXT: rsbs r5, r5, #0
-; CHECK-NEXT: rsbs r6, r6, #0
-; CHECK-NEXT: vmov q5[2], q5[0], r5, r6
-; CHECK-NEXT: vmov q5[3], q5[1], r5, r6
-; CHECK-NEXT: vmov.u8 r6, q1[5]
-; CHECK-NEXT: vmov.u8 r5, q0[5]
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: smull r6, r5, r5, r6
-; CHECK-NEXT: vmov q6[2], q6[0], r3, r6
-; CHECK-NEXT: vmov q6[3], q6[1], r4, r5
+; CHECK-NEXT: vmov q5[2], q5[0], r4, r5
+; CHECK-NEXT: vmov q5[3], q5[1], r4, r5
+; CHECK-NEXT: vmov.s8 r5, q1[5]
+; CHECK-NEXT: vmov.s8 r4, q0[5]
+; CHECK-NEXT: smull r5, r4, r4, r5
+; CHECK-NEXT: vmov q6[2], q6[0], r2, r5
+; CHECK-NEXT: vmov q6[3], q6[1], r3, r4
; CHECK-NEXT: vand q5, q6, q5
-; CHECK-NEXT: vmov r4, s20
-; CHECK-NEXT: vmov r3, s21
-; CHECK-NEXT: vmov r5, s23
-; CHECK-NEXT: adds.w r6, r12, r4
+; CHECK-NEXT: vmov r3, s20
+; CHECK-NEXT: vmov r2, s21
; CHECK-NEXT: vmov r4, s22
-; CHECK-NEXT: adc.w r3, r3, lr
-; CHECK-NEXT: adds r6, r6, r4
-; CHECK-NEXT: vmov.u8 r4, q1[6]
-; CHECK-NEXT: adc.w r12, r3, r5
-; CHECK-NEXT: ubfx r5, r2, #12, #1
-; CHECK-NEXT: ubfx r2, r2, #8, #1
+; CHECK-NEXT: vmov r5, s23
+; CHECK-NEXT: adds.w r3, r3, r12
+; CHECK-NEXT: adc.w r2, r2, lr
+; CHECK-NEXT: adds r3, r3, r4
+; CHECK-NEXT: vmov.s8 r4, q1[6]
+; CHECK-NEXT: adc.w r12, r2, r5
+; CHECK-NEXT: ubfx r5, r6, #12, #1
+; CHECK-NEXT: ubfx r6, r6, #8, #1
; CHECK-NEXT: rsbs r5, r5, #0
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmov.u8 r3, q0[6]
-; CHECK-NEXT: vmov q5[2], q5[0], r2, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov q5[3], q5[1], r2, r5
-; CHECK-NEXT: vmov.u8 r2, q1[7]
-; CHECK-NEXT: vmov.u8 r5, q0[7]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: smull r2, r5, r5, r2
-; CHECK-NEXT: smull r3, r4, r3, r4
-; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
+; CHECK-NEXT: rsbs r6, r6, #0
+; CHECK-NEXT: vmov.s8 r2, q0[6]
+; CHECK-NEXT: vmov q5[2], q5[0], r6, r5
+; CHECK-NEXT: smull r2, r4, r2, r4
+; CHECK-NEXT: vmov q5[3], q5[1], r6, r5
+; CHECK-NEXT: vmov.s8 r6, q1[7]
+; CHECK-NEXT: vmov.s8 r5, q0[7]
+; CHECK-NEXT: smull r6, r5, r5, r6
+; CHECK-NEXT: vmov q6[2], q6[0], r2, r6
; CHECK-NEXT: vmov q6[3], q6[1], r4, r5
-; CHECK-NEXT: vmov.u8 r4, q1[8]
; CHECK-NEXT: vand q5, q6, q5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov r3, s20
+; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: vmov r2, s21
-; CHECK-NEXT: vmov r5, s22
+; CHECK-NEXT: vmov r5, s23
; CHECK-NEXT: adds r3, r3, r6
-; CHECK-NEXT: vmov r6, s23
-; CHECK-NEXT: adc.w r2, r2, r12
-; CHECK-NEXT: adds.w r12, r3, r5
-; CHECK-NEXT: vmov.u8 r3, q0[8]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: smull r3, r4, r3, r4
-; CHECK-NEXT: adc.w lr, r2, r6
-; CHECK-NEXT: vmov.u8 r2, q4[8]
-; CHECK-NEXT: vmov.16 q5[0], r2
-; CHECK-NEXT: vmov.u8 r2, q4[9]
-; CHECK-NEXT: vmov.16 q5[1], r2
-; CHECK-NEXT: vmov.u8 r2, q4[10]
-; CHECK-NEXT: vmov.16 q5[2], r2
-; CHECK-NEXT: vmov.u8 r2, q4[11]
-; CHECK-NEXT: vmov.16 q5[3], r2
-; CHECK-NEXT: vmov.u8 r2, q4[12]
-; CHECK-NEXT: vmov.16 q5[4], r2
-; CHECK-NEXT: vmov.u8 r2, q4[13]
-; CHECK-NEXT: vmov.16 q5[5], r2
-; CHECK-NEXT: vmov.u8 r2, q4[14]
-; CHECK-NEXT: vmov.16 q5[6], r2
-; CHECK-NEXT: vmov.u8 r2, q4[15]
-; CHECK-NEXT: vmov.16 q5[7], r2
+; CHECK-NEXT: adc.w r6, r12, r2
+; CHECK-NEXT: vmov r2, s22
+; CHECK-NEXT: adds.w r12, r3, r2
+; CHECK-NEXT: vmov.s8 r2, q1[8]
+; CHECK-NEXT: adc.w lr, r6, r5
+; CHECK-NEXT: vmov.u8 r6, q4[8]
+; CHECK-NEXT: vmov.16 q5[0], r6
+; CHECK-NEXT: vmov.u8 r6, q4[9]
+; CHECK-NEXT: vmov.16 q5[1], r6
+; CHECK-NEXT: vmov.u8 r6, q4[10]
+; CHECK-NEXT: vmov.16 q5[2], r6
+; CHECK-NEXT: vmov.u8 r6, q4[11]
+; CHECK-NEXT: vmov.16 q5[3], r6
+; CHECK-NEXT: vmov.u8 r6, q4[12]
+; CHECK-NEXT: vmov.16 q5[4], r6
+; CHECK-NEXT: vmov.u8 r6, q4[13]
+; CHECK-NEXT: vmov.16 q5[5], r6
+; CHECK-NEXT: vmov.u8 r6, q4[14]
+; CHECK-NEXT: vmov.16 q5[6], r6
+; CHECK-NEXT: vmov.u8 r6, q4[15]
+; CHECK-NEXT: vmov.16 q5[7], r6
+; CHECK-NEXT: vmov.s8 r3, q0[8]
; CHECK-NEXT: vcmp.i16 ne, q5, zr
+; CHECK-NEXT: smull r2, r3, r3, r2
; CHECK-NEXT: vpsel q2, q3, q2
-; CHECK-NEXT: vmov.u16 r2, q2[2]
-; CHECK-NEXT: vmov.u16 r6, q2[0]
-; CHECK-NEXT: vmov q3[2], q3[0], r6, r2
-; CHECK-NEXT: vmov.u16 r2, q2[3]
-; CHECK-NEXT: vmov.u16 r6, q2[1]
-; CHECK-NEXT: vmov q3[3], q3[1], r6, r2
-; CHECK-NEXT: vcmp.i32 ne, q3, zr
-; CHECK-NEXT: vmrs r2, p0
-; CHECK-NEXT: and r5, r2, #1
-; CHECK-NEXT: ubfx r6, r2, #4, #1
-; CHECK-NEXT: rsbs r5, r5, #0
-; CHECK-NEXT: rsbs r6, r6, #0
+; CHECK-NEXT: vmov.u16 r6, q2[2]
+; CHECK-NEXT: vmov.u16 r5, q2[0]
; CHECK-NEXT: vmov q3[2], q3[0], r5, r6
+; CHECK-NEXT: vmov.u16 r6, q2[3]
+; CHECK-NEXT: vmov.u16 r5, q2[1]
; CHECK-NEXT: vmov q3[3], q3[1], r5, r6
-; CHECK-NEXT: vmov.u8 r6, q1[9]
-; CHECK-NEXT: vmov.u8 r5, q0[9]
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: smull r6, r5, r5, r6
-; CHECK-NEXT: vmov q4[2], q4[0], r3, r6
-; CHECK-NEXT: vmov q4[3], q4[1], r4, r5
+; CHECK-NEXT: vcmp.i32 ne, q3, zr
+; CHECK-NEXT: vmrs r6, p0
+; CHECK-NEXT: and r4, r6, #1
+; CHECK-NEXT: ubfx r5, r6, #4, #1
+; CHECK-NEXT: rsbs r4, r4, #0
+; CHECK-NEXT: rsbs r5, r5, #0
+; CHECK-NEXT: vmov q3[2], q3[0], r4, r5
+; CHECK-NEXT: vmov q3[3], q3[1], r4, r5
+; CHECK-NEXT: vmov.s8 r5, q1[9]
+; CHECK-NEXT: vmov.s8 r4, q0[9]
+; CHECK-NEXT: smull r5, r4, r4, r5
+; CHECK-NEXT: vmov q4[2], q4[0], r2, r5
+; CHECK-NEXT: vmov q4[3], q4[1], r3, r4
; CHECK-NEXT: vand q3, q4, q3
-; CHECK-NEXT: vmov r4, s12
-; CHECK-NEXT: vmov r3, s13
-; CHECK-NEXT: vmov r5, s15
-; CHECK-NEXT: adds.w r6, r12, r4
+; CHECK-NEXT: vmov r3, s12
+; CHECK-NEXT: vmov r2, s13
; CHECK-NEXT: vmov r4, s14
-; CHECK-NEXT: adc.w r3, r3, lr
-; CHECK-NEXT: adds r6, r6, r4
-; CHECK-NEXT: vmov.u8 r4, q1[10]
-; CHECK-NEXT: adc.w r12, r3, r5
-; CHECK-NEXT: ubfx r5, r2, #12, #1
-; CHECK-NEXT: ubfx r2, r2, #8, #1
+; CHECK-NEXT: vmov r5, s15
+; CHECK-NEXT: adds.w r3, r3, r12
+; CHECK-NEXT: adc.w r2, r2, lr
+; CHECK-NEXT: adds r3, r3, r4
+; CHECK-NEXT: vmov.s8 r4, q1[10]
+; CHECK-NEXT: adc.w r12, r2, r5
+; CHECK-NEXT: ubfx r5, r6, #12, #1
+; CHECK-NEXT: ubfx r6, r6, #8, #1
; CHECK-NEXT: rsbs r5, r5, #0
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmov.u8 r3, q0[10]
-; CHECK-NEXT: vmov q3[2], q3[0], r2, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov q3[3], q3[1], r2, r5
-; CHECK-NEXT: vmov.u8 r2, q1[11]
-; CHECK-NEXT: vmov.u8 r5, q0[11]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: smull r2, r5, r5, r2
-; CHECK-NEXT: smull r3, r4, r3, r4
-; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
+; CHECK-NEXT: rsbs r6, r6, #0
+; CHECK-NEXT: vmov.s8 r2, q0[10]
+; CHECK-NEXT: vmov q3[2], q3[0], r6, r5
+; CHECK-NEXT: smull r2, r4, r2, r4
+; CHECK-NEXT: vmov q3[3], q3[1], r6, r5
+; CHECK-NEXT: vmov.s8 r6, q1[11]
+; CHECK-NEXT: vmov.s8 r5, q0[11]
+; CHECK-NEXT: smull r6, r5, r5, r6
+; CHECK-NEXT: vmov q4[2], q4[0], r2, r6
; CHECK-NEXT: vmov q4[3], q4[1], r4, r5
-; CHECK-NEXT: vmov.u8 r4, q1[12]
; CHECK-NEXT: vand q3, q4, q3
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov r3, s12
+; CHECK-NEXT: vmov r6, s12
; CHECK-NEXT: vmov r2, s13
-; CHECK-NEXT: vmov r5, s14
+; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: adds r3, r3, r6
-; CHECK-NEXT: vmov r6, s15
-; CHECK-NEXT: adc.w r2, r2, r12
-; CHECK-NEXT: adds.w r12, r3, r5
-; CHECK-NEXT: vmov.u8 r3, q0[12]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: smull r3, r4, r3, r4
-; CHECK-NEXT: adc.w lr, r2, r6
-; CHECK-NEXT: vmov.u16 r2, q2[6]
-; CHECK-NEXT: vmov.u16 r6, q2[4]
-; CHECK-NEXT: vmov q3[2], q3[0], r6, r2
-; CHECK-NEXT: vmov.u16 r2, q2[7]
-; CHECK-NEXT: vmov.u16 r6, q2[5]
-; CHECK-NEXT: vmov q3[3], q3[1], r6, r2
+; CHECK-NEXT: adc.w r6, r12, r2
+; CHECK-NEXT: vmov r2, s14
+; CHECK-NEXT: adds.w r12, r3, r2
+; CHECK-NEXT: vmov.s8 r2, q1[12]
+; CHECK-NEXT: adc.w lr, r6, r5
+; CHECK-NEXT: vmov.u16 r6, q2[6]
+; CHECK-NEXT: vmov.u16 r5, q2[4]
+; CHECK-NEXT: vmov.s8 r3, q0[12]
+; CHECK-NEXT: vmov q3[2], q3[0], r5, r6
+; CHECK-NEXT: vmov.u16 r6, q2[7]
+; CHECK-NEXT: vmov.u16 r5, q2[5]
+; CHECK-NEXT: smull r2, r3, r3, r2
+; CHECK-NEXT: vmov q3[3], q3[1], r5, r6
; CHECK-NEXT: vcmp.i32 ne, q3, zr
-; CHECK-NEXT: vmrs r2, p0
-; CHECK-NEXT: and r5, r2, #1
-; CHECK-NEXT: ubfx r6, r2, #4, #1
+; CHECK-NEXT: vmrs r6, p0
+; CHECK-NEXT: and r4, r6, #1
+; CHECK-NEXT: ubfx r5, r6, #4, #1
+; CHECK-NEXT: rsbs r4, r4, #0
; CHECK-NEXT: rsbs r5, r5, #0
-; CHECK-NEXT: rsbs r6, r6, #0
-; CHECK-NEXT: vmov q2[2], q2[0], r5, r6
-; CHECK-NEXT: vmov q2[3], q2[1], r5, r6
-; CHECK-NEXT: vmov.u8 r6, q1[13]
-; CHECK-NEXT: vmov.u8 r5, q0[13]
-; CHECK-NEXT: sxtb r6, r6
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: smull r6, r5, r5, r6
-; CHECK-NEXT: vmov q3[2], q3[0], r3, r6
-; CHECK-NEXT: vmov q3[3], q3[1], r4, r5
+; CHECK-NEXT: vmov q2[2], q2[0], r4, r5
+; CHECK-NEXT: vmov q2[3], q2[1], r4, r5
+; CHECK-NEXT: vmov.s8 r5, q1[13]
+; CHECK-NEXT: vmov.s8 r4, q0[13]
+; CHECK-NEXT: smull r5, r4, r4, r5
+; CHECK-NEXT: vmov q3[2], q3[0], r2, r5
+; CHECK-NEXT: vmov q3[3], q3[1], r3, r4
; CHECK-NEXT: vand q2, q3, q2
-; CHECK-NEXT: vmov r4, s8
-; CHECK-NEXT: vmov r3, s9
-; CHECK-NEXT: vmov r5, s11
-; CHECK-NEXT: adds.w r6, r12, r4
+; CHECK-NEXT: vmov r3, s8
+; CHECK-NEXT: vmov r2, s9
; CHECK-NEXT: vmov r4, s10
-; CHECK-NEXT: adc.w r3, r3, lr
-; CHECK-NEXT: adds r6, r6, r4
-; CHECK-NEXT: vmov.u8 r4, q1[14]
-; CHECK-NEXT: adc.w r12, r3, r5
-; CHECK-NEXT: ubfx r5, r2, #12, #1
-; CHECK-NEXT: ubfx r2, r2, #8, #1
+; CHECK-NEXT: vmov r5, s11
+; CHECK-NEXT: adds.w r3, r3, r12
+; CHECK-NEXT: adc.w r2, r2, lr
+; CHECK-NEXT: adds r3, r3, r4
+; CHECK-NEXT: vmov.s8 r4, q1[14]
+; CHECK-NEXT: adc.w r12, r2, r5
+; CHECK-NEXT: ubfx r5, r6, #12, #1
+; CHECK-NEXT: ubfx r6, r6, #8, #1
; CHECK-NEXT: rsbs r5, r5, #0
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmov.u8 r3, q0[14]
-; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
-; CHECK-NEXT: sxtb r4, r4
-; CHECK-NEXT: vmov q2[3], q2[1], r2, r5
-; CHECK-NEXT: vmov.u8 r2, q1[15]
-; CHECK-NEXT: vmov.u8 r5, q0[15]
-; CHECK-NEXT: sxtb r3, r3
-; CHECK-NEXT: sxtb r2, r2
-; CHECK-NEXT: sxtb r5, r5
-; CHECK-NEXT: smull r2, r5, r5, r2
-; CHECK-NEXT: smull r3, r4, r3, r4
-; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
+; CHECK-NEXT: rsbs r6, r6, #0
+; CHECK-NEXT: vmov.s8 r2, q0[14]
+; CHECK-NEXT: vmov q2[2], q2[0], r6, r5
+; CHECK-NEXT: smull r2, r4, r2, r4
+; CHECK-NEXT: vmov q2[3], q2[1], r6, r5
+; CHECK-NEXT: vmov.s8 r6, q1[15]
+; CHECK-NEXT: vmov.s8 r5, q0[15]
+; CHECK-NEXT: smull r6, r5, r5, r6
+; CHECK-NEXT: vmov q0[2], q0[0], r2, r6
; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
; CHECK-NEXT: vand q0, q0, q2
-; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r6, s0
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov r5, s2
; CHECK-NEXT: adds r3, r3, r6
More information about the llvm-commits
mailing list