[llvm] r371419 - [ARM] Fix loads and stores for predicate vectors
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 09:35:49 PDT 2019
Author: dmgreen
Date: Mon Sep 9 09:35:49 2019
New Revision: 371419
URL: http://llvm.org/viewvc/llvm-project?rev=371419&view=rev
Log:
[ARM] Fix loads and stores for predicate vectors
These predicate vectors can usually be loaded and stored with a single
instruction, a VSTR_P0. However this instruction will store the entire P0
predicate, 16 bits, zeroextended to 32bits. Each lane of the the
v4i1/v8i1/v16i1 representing 4/2/1 bits.
As far as I understand, when llvm says "store this v4i1", it really does need
to store 4 bits (or 8, that being the size of a byte, with this bottom 4 as the
interesting bits). For example a bitcast from a v8i1 to a i8 is defined as a
store followed by a load, which is how the code is expanded.
So this instead lowers the v4i1/v8i1 load/store through some shuffles to get
the bits into the correct positions. This, as you might imagine, is not as
efficient as a single instruction. But I believe it is needed for correctness.
v16i1 equally should not load/store 32bits, only storing the 16bits of data.
Stack loads/stores are still using the VSTR_P0 (as can be seen by the test not
changing). This is fine as they are self-consistent, it is only "externally
observable loads/stores" (from our point of view) that need to be corrected.
Differential revision: https://reviews.llvm.org/D67085
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
llvm/trunk/test/CodeGen/Thumb2/mve-masked-ldst.ll
llvm/trunk/test/CodeGen/Thumb2/mve-masked-load.ll
llvm/trunk/test/CodeGen/Thumb2/mve-masked-store.ll
llvm/trunk/test/CodeGen/Thumb2/mve-pred-bitcast.ll
llvm/trunk/test/CodeGen/Thumb2/mve-pred-loadstore.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Sep 9 09:35:49 2019
@@ -378,6 +378,8 @@ void ARMTargetLowering::addMVEVectorType
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
}
}
@@ -8783,6 +8785,65 @@ void ARMTargetLowering::ExpandDIV_Window
Results.push_back(Upper);
}
+static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ EVT MemVT = LD->getMemoryVT();
+ assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ "Expected a predicate type!");
+ assert(MemVT == Op.getValueType());
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Expected a non-extending load");
+ assert(LD->isUnindexed() && "Expected a unindexed load");
+
+ // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
+ // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
+ // need to make sure that 8/4 bits are actually loaded into the correct
+ // place, which means loading the value and then shuffling the values into
+ // the bottom bits of the predicate.
+ // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
+ // for BE).
+
+ SDLoc dl(Op);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
+ LD->getMemOperand());
+ SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load);
+ if (MemVT != MVT::v16i1)
+ Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
+ DAG.getConstant(0, dl, MVT::i32));
+ return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
+}
+
+static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT MemVT = ST->getMemoryVT();
+ assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ "Expected a predicate type!");
+ assert(MemVT == ST->getValue().getValueType());
+ assert(!ST->isTruncatingStore() && "Expected a non-extending store");
+ assert(ST->isUnindexed() && "Expected a unindexed store");
+
+ // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
+ // unset and a scalar store.
+ SDLoc dl(Op);
+ SDValue Build = ST->getValue();
+ if (MemVT != MVT::v16i1) {
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
+ DAG.getConstant(I, dl, MVT::i32)));
+ for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
+ Ops.push_back(DAG.getUNDEF(MVT::i32));
+ Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
+ }
+ SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
+ return DAG.getTruncStore(
+ ST->getChain(), dl, GRP, ST->getBasePtr(),
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
+ ST->getMemOperand());
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
// Acquire/Release load/store is not legal for targets without a dmb or
@@ -8982,6 +9043,10 @@ SDValue ARMTargetLowering::LowerOperatio
case ISD::UADDO:
case ISD::USUBO:
return LowerUnsignedALUO(Op, DAG);
+ case ISD::LOAD:
+ return LowerPredicateLoad(Op, DAG);
+ case ISD::STORE:
+ return LowerPredicateStore(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Mon Sep 9 09:35:49 2019
@@ -4999,24 +4999,6 @@ let Predicates = [HasMVEInt, IsBE] in {
def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
}
-let Predicates = [HasMVEInt] in {
- // Predicate loads
- def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
- (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
- def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
- (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
- def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
- (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
-
- // Predicate stores
- def : Pat<(store (v4i1 VCCR:$val), t2addrmode_imm7<2>:$addr),
- (VSTR_P0_off VCCR:$val, t2addrmode_imm7<2>:$addr)>;
- def : Pat<(store (v8i1 VCCR:$val), t2addrmode_imm7<2>:$addr),
- (VSTR_P0_off VCCR:$val, t2addrmode_imm7<2>:$addr)>;
- def : Pat<(store (v16i1 VCCR:$val), t2addrmode_imm7<2>:$addr),
- (VSTR_P0_off VCCR:$val, t2addrmode_imm7<2>:$addr)>;
-}
-
// Widening/Narrowing Loads/Stores
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-masked-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-masked-ldst.ll?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-masked-ldst.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-masked-ldst.ll Mon Sep 9 09:35:49 2019
@@ -8,11 +8,23 @@ define void @foo_v4i32_v4i32(<4 x i32> *
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrne r3, [r2]
@@ -29,9 +41,21 @@ define void @foo_v4i32_v4i32(<4 x i32> *
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrmi r1, [r2, #12]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -64,11 +88,23 @@ define void @foo_sext_v4i32_v4i8(<4 x i3
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
@@ -85,11 +121,23 @@ define void @foo_sext_v4i32_v4i8(<4 x i3
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrbmi r1, [r2, #3]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmovlb.s8 q0, q0
-; CHECK-NEXT: vstr p0, [r1]
; CHECK-NEXT: vmovlb.s16 q0, q0
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -123,11 +171,23 @@ define void @foo_sext_v4i32_v4i16(<4 x i
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrhne r3, [r2]
@@ -144,10 +204,22 @@ define void @foo_sext_v4i32_v4i16(<4 x i
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrhmi r1, [r2, #6]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmovlb.s16 q0, q0
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -181,12 +253,24 @@ define void @foo_zext_v4i32_v4i8(<4 x i3
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov.i32 q1, #0xff
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
@@ -203,10 +287,22 @@ define void @foo_zext_v4i32_v4i8(<4 x i3
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrbmi r1, [r2, #3]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -240,11 +336,23 @@ define void @foo_zext_v4i32_v4i16(<4 x i
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrhne r3, [r2]
@@ -261,10 +369,22 @@ define void @foo_zext_v4i32_v4i16(<4 x i
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrhmi r1, [r2, #6]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmovlb.u16 q0, q0
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -298,12 +418,36 @@ define void @foo_v8i16_v8i16(<8 x i16> *
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: add r3, sp, #8
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #8]
-; CHECK-NEXT: lsls r3, r1, #31
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #4, #1
+; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #5, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #6, #1
+; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r3
+; CHECK-NEXT: lsls r3, r3, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrhne r3, [r2]
; CHECK-NEXT: vmovne.16 q0[0], r3
@@ -335,10 +479,34 @@ define void @foo_v8i16_v8i16(<8 x i16> *
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrhmi r1, [r2, #14]
; CHECK-NEXT: vmovmi.16 q0[7], r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
-; CHECK-NEXT: lsls r2, r1, #31
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: vmrs r1, p0
+; CHECK-NEXT: and r3, r1, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r1, #2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r1, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #2, #1
+; CHECK-NEXT: ubfx r3, r1, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #3, #1
+; CHECK-NEXT: ubfx r3, r1, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #4, #1
+; CHECK-NEXT: ubfx r3, r1, #10, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #5, #1
+; CHECK-NEXT: ubfx r3, r1, #12, #1
+; CHECK-NEXT: ubfx r1, r1, #14, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r2, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne.u16 r2, q0[0]
; CHECK-NEXT: strhne r2, [r0]
@@ -386,12 +554,36 @@ define void @foo_sext_v8i16_v8i8(<8 x i1
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: add r3, sp, #8
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #8]
-; CHECK-NEXT: lsls r3, r1, #31
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #4, #1
+; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #5, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #6, #1
+; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r3
+; CHECK-NEXT: lsls r3, r3, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
; CHECK-NEXT: vmovne.16 q0[0], r3
@@ -423,11 +615,35 @@ define void @foo_sext_v8i16_v8i8(<8 x i1
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrbmi r1, [r2, #7]
; CHECK-NEXT: vmovmi.16 q0[7], r1
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: vmrs r1, p0
; CHECK-NEXT: vmovlb.s8 q0, q0
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
-; CHECK-NEXT: lsls r2, r1, #31
+; CHECK-NEXT: and r3, r1, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r1, #2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r1, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #2, #1
+; CHECK-NEXT: ubfx r3, r1, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #3, #1
+; CHECK-NEXT: ubfx r3, r1, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #4, #1
+; CHECK-NEXT: ubfx r3, r1, #10, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #5, #1
+; CHECK-NEXT: ubfx r3, r1, #12, #1
+; CHECK-NEXT: ubfx r1, r1, #14, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r2, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne.u16 r2, q0[0]
; CHECK-NEXT: strhne r2, [r0]
@@ -476,12 +692,36 @@ define void @foo_zext_v8i16_v8i8(<8 x i1
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: add r3, sp, #8
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #8]
-; CHECK-NEXT: lsls r3, r1, #31
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #4, #1
+; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #5, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #6, #1
+; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r3
+; CHECK-NEXT: lsls r3, r3, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
; CHECK-NEXT: vmovne.16 q0[0], r3
@@ -513,11 +753,35 @@ define void @foo_zext_v8i16_v8i8(<8 x i1
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrbmi r1, [r2, #7]
; CHECK-NEXT: vmovmi.16 q0[7], r1
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: vmrs r1, p0
; CHECK-NEXT: vmovlb.u8 q0, q0
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
-; CHECK-NEXT: lsls r2, r1, #31
+; CHECK-NEXT: and r3, r1, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r1, #2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r1, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #2, #1
+; CHECK-NEXT: ubfx r3, r1, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #3, #1
+; CHECK-NEXT: ubfx r3, r1, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #4, #1
+; CHECK-NEXT: ubfx r3, r1, #10, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #5, #1
+; CHECK-NEXT: ubfx r3, r1, #12, #1
+; CHECK-NEXT: ubfx r1, r1, #14, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r2, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne.u16 r2, q0[0]
; CHECK-NEXT: strhne r2, [r0]
@@ -573,13 +837,12 @@ define void @foo_v16i8_v16i8(<16 x i8> *
; CHECK-NEXT: bfc r4, #0, #4
; CHECK-NEXT: mov sp, r4
; CHECK-NEXT: vldrb.u8 q0, [r1]
-; CHECK-NEXT: add r3, sp, #16
; CHECK-NEXT: sub.w r4, r7, #8
; CHECK-NEXT: vcmp.s8 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrh.w r1, [sp, #16]
-; CHECK-NEXT: lsls r3, r1, #31
+; CHECK-NEXT: vmrs r3, p0
+; CHECK-NEXT: uxth r1, r3
+; CHECK-NEXT: lsls r3, r3, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
; CHECK-NEXT: vmovne.8 q0[0], r3
@@ -643,10 +906,9 @@ define void @foo_v16i8_v16i8(<16 x i8> *
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrbmi r1, [r2, #15]
; CHECK-NEXT: vmovmi.8 q0[15], r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrh.w r1, [sp]
-; CHECK-NEXT: lsls r2, r1, #31
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: uxth r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne.u8 r2, q0[0]
; CHECK-NEXT: strbne r2, [r0]
@@ -726,12 +988,36 @@ define void @foo_trunc_v8i8_v8i16(<8 x i
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: add r3, sp, #8
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #8]
-; CHECK-NEXT: lsls r3, r1, #31
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #4, #1
+; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #5, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #6, #1
+; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r3
+; CHECK-NEXT: lsls r3, r3, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrhne r3, [r2]
; CHECK-NEXT: vmovne.16 q0[0], r3
@@ -763,10 +1049,34 @@ define void @foo_trunc_v8i8_v8i16(<8 x i
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrhmi r1, [r2, #14]
; CHECK-NEXT: vmovmi.16 q0[7], r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
-; CHECK-NEXT: lsls r2, r1, #31
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: vmrs r1, p0
+; CHECK-NEXT: and r3, r1, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r1, #2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r1, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #2, #1
+; CHECK-NEXT: ubfx r3, r1, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #3, #1
+; CHECK-NEXT: ubfx r3, r1, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #4, #1
+; CHECK-NEXT: ubfx r3, r1, #10, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #5, #1
+; CHECK-NEXT: ubfx r3, r1, #12, #1
+; CHECK-NEXT: ubfx r1, r1, #14, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r2, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne.u16 r2, q0[0]
; CHECK-NEXT: strbne r2, [r0]
@@ -815,11 +1125,23 @@ define void @foo_trunc_v4i8_v4i32(<4 x i
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrne r3, [r2]
@@ -836,9 +1158,21 @@ define void @foo_trunc_v4i8_v4i32(<4 x i
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrmi r1, [r2, #12]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -872,11 +1206,23 @@ define void @foo_trunc_v4i16_v4i32(<4 x
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrne r3, [r2]
@@ -893,9 +1239,21 @@ define void @foo_trunc_v4i16_v4i32(<4 x
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrmi r1, [r2, #12]
; CHECK-NEXT: vmovmi.32 q0[3], r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -929,11 +1287,23 @@ define void @foo_v4f32_v4f32(<4 x float>
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: add r3, sp, #4
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #4]
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: it ne
; CHECK-NEXT: vldrne s0, [r2]
@@ -946,9 +1316,21 @@ define void @foo_v4f32_v4f32(<4 x float>
; CHECK-NEXT: lsls r1, r1, #28
; CHECK-NEXT: it mi
; CHECK-NEXT: vldrmi s3, [r2, #12]
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: and r3, r2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: bfi r1, r2, #3, #1
+; CHECK-NEXT: and r1, r1, #15
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: it ne
; CHECK-NEXT: vstrne s0, [r0]
@@ -977,12 +1359,36 @@ define void @foo_v8f16_v8f16(<8 x half>
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: add r3, sp, #8
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vstr p0, [r3]
-; CHECK-NEXT: ldrb.w r1, [sp, #8]
-; CHECK-NEXT: lsls r3, r1, #31
+; CHECK-NEXT: vmrs r12, p0
+; CHECK-NEXT: and r1, r12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #0, #1
+; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #1, #1
+; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #2, #1
+; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #3, #1
+; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #4, #1
+; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #5, #1
+; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #6, #1
+; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r3
+; CHECK-NEXT: lsls r3, r3, #31
; CHECK-NEXT: bne .LBB13_18
; CHECK-NEXT: @ %bb.1: @ %else
; CHECK-NEXT: lsls r3, r1, #30
@@ -1010,10 +1416,34 @@ define void @foo_v8f16_v8f16(<8 x half>
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vmov.16 q0[7], r1
; CHECK-NEXT: .LBB13_9: @ %else20
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vstr p0, [r1]
-; CHECK-NEXT: ldrb.w r1, [sp]
-; CHECK-NEXT: lsls r2, r1, #31
+; CHECK-NEXT: vmrs r1, p0
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: and r3, r1, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r1, #2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r1, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #2, #1
+; CHECK-NEXT: ubfx r3, r1, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #3, #1
+; CHECK-NEXT: ubfx r3, r1, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #4, #1
+; CHECK-NEXT: ubfx r3, r1, #10, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #5, #1
+; CHECK-NEXT: ubfx r3, r1, #12, #1
+; CHECK-NEXT: ubfx r1, r1, #14, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #6, #1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r2, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: bne .LBB13_25
; CHECK-NEXT: @ %bb.10: @ %else23
; CHECK-NEXT: lsls r2, r1, #30
@@ -1072,13 +1502,13 @@ define void @foo_v8f16_v8f16(<8 x half>
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov.16 q0[5], r3
; CHECK-NEXT: lsls r3, r1, #25
-; CHECK-NEXT: bpl .LBB13_7
+; CHECK-NEXT: bpl.w .LBB13_7
; CHECK-NEXT: .LBB13_24: @ %cond.load16
; CHECK-NEXT: vldr.16 s4, [r2, #12]
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov.16 q0[6], r3
; CHECK-NEXT: lsls r1, r1, #24
-; CHECK-NEXT: bmi .LBB13_8
+; CHECK-NEXT: bmi.w .LBB13_8
; CHECK-NEXT: b .LBB13_9
; CHECK-NEXT: .LBB13_25: @ %cond.store
; CHECK-NEXT: vstr.16 s0, [r0]
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-masked-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-masked-load.ll?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-masked-load.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-masked-load.ll Mon Sep 9 09:35:49 2019
@@ -7,17 +7,29 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: mov.w r12, #0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #3, #1
+; CHECK-LE-NEXT: and r1, r2, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: beq .LBB0_2
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
-; CHECK-LE-NEXT: movs r2, #0
-; CHECK-LE-NEXT: ldr r3, [r0]
-; CHECK-LE-NEXT: vdup.32 q0, r2
-; CHECK-LE-NEXT: vmov.32 q0[0], r3
+; CHECK-LE-NEXT: ldr r2, [r0]
+; CHECK-LE-NEXT: vdup.32 q0, r12
+; CHECK-LE-NEXT: vmov.32 q0[0], r2
; CHECK-LE-NEXT: b .LBB0_3
; CHECK-LE-NEXT: .LBB0_2:
; CHECK-LE-NEXT: vmov.i32 q0, #0x0
@@ -42,17 +54,29 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: mov.w r12, #0
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #3, #1
+; CHECK-BE-NEXT: and r1, r2, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: beq .LBB0_2
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
-; CHECK-BE-NEXT: movs r2, #0
-; CHECK-BE-NEXT: ldr r3, [r0]
-; CHECK-BE-NEXT: vdup.32 q1, r2
-; CHECK-BE-NEXT: vmov.32 q1[0], r3
+; CHECK-BE-NEXT: ldr r2, [r0]
+; CHECK-BE-NEXT: vdup.32 q1, r12
+; CHECK-BE-NEXT: vmov.32 q1[0], r2
; CHECK-BE-NEXT: b .LBB0_3
; CHECK-BE-NEXT: .LBB0_2:
; CHECK-BE-NEXT: vmov.i32 q1, #0x0
@@ -84,10 +108,22 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r2, [r0]
@@ -112,11 +148,23 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrne r2, [r0]
@@ -148,10 +196,22 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r2, [r0]
@@ -176,11 +236,23 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrne r2, [r0]
@@ -211,10 +283,22 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r2, [r0]
@@ -239,10 +323,22 @@ define arm_aapcs_vfpcc <4 x i32> @masked
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrne r2, [r0]
@@ -274,11 +370,23 @@ define arm_aapcs_vfpcc i8* @masked_v4i32
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: adds r0, #4
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r2, r2, #15
; CHECK-LE-NEXT: lsls r3, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r3, [r0]
@@ -304,12 +412,24 @@ define arm_aapcs_vfpcc i8* @masked_v4i32
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
; CHECK-BE-NEXT: adds r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r2, r2, #15
; CHECK-BE-NEXT: lsls r3, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrne r3, [r0]
@@ -345,11 +465,23 @@ define arm_aapcs_vfpcc i8* @masked_v4i32
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: add.w r12, r0, #4
-; CHECK-LE-NEXT: ldrb.w r3, [sp]
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r3, r2, #15
; CHECK-LE-NEXT: lsls r2, r3, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r2, [r0]
@@ -376,12 +508,24 @@ define arm_aapcs_vfpcc i8* @masked_v4i32
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: add.w r12, r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r3, [sp]
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r3, r2, #15
; CHECK-BE-NEXT: lsls r2, r3, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrne r2, [r0]
@@ -419,17 +563,41 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: mov.w r12, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r2, r3, #0
+; CHECK-LE-NEXT: movs r3, #0
+; CHECK-LE-NEXT: bfi r3, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #2, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #3, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #4, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #5, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r3, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r3
+; CHECK-LE-NEXT: lsls r2, r3, #31
; CHECK-LE-NEXT: beq .LBB6_2
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
-; CHECK-LE-NEXT: movs r2, #0
-; CHECK-LE-NEXT: ldrh r3, [r0]
-; CHECK-LE-NEXT: vdup.16 q0, r2
-; CHECK-LE-NEXT: vmov.16 q0[0], r3
+; CHECK-LE-NEXT: ldrh r2, [r0]
+; CHECK-LE-NEXT: vdup.16 q0, r12
+; CHECK-LE-NEXT: vmov.16 q0[0], r2
; CHECK-LE-NEXT: b .LBB6_3
; CHECK-LE-NEXT: .LBB6_2:
; CHECK-LE-NEXT: vmov.i32 q0, #0x0
@@ -470,17 +638,41 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r2, r3, #0
+; CHECK-BE-NEXT: movs r3, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #2, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #3, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #4, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #5, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r3, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r3
+; CHECK-BE-NEXT: lsls r2, r3, #31
; CHECK-BE-NEXT: beq .LBB6_2
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
-; CHECK-BE-NEXT: movs r2, #0
-; CHECK-BE-NEXT: ldrh r3, [r0]
-; CHECK-BE-NEXT: vdup.16 q1, r2
-; CHECK-BE-NEXT: vmov.16 q1[0], r3
+; CHECK-BE-NEXT: ldrh r2, [r0]
+; CHECK-BE-NEXT: vdup.16 q1, r12
+; CHECK-BE-NEXT: vmov.16 q1[0], r2
; CHECK-BE-NEXT: b .LBB6_3
; CHECK-BE-NEXT: .LBB6_2:
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
@@ -529,11 +721,35 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrhne r2, [r0]
; CHECK-LE-NEXT: vmovne.16 q0[0], r2
@@ -573,12 +789,36 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrhne r2, [r0]
; CHECK-BE-NEXT: vmovne.16 q1[0], r2
@@ -625,11 +865,35 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrhne r2, [r0]
; CHECK-LE-NEXT: vmovne.16 q0[0], r2
@@ -669,12 +933,36 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrhne r2, [r0]
; CHECK-BE-NEXT: vmovne.16 q1[0], r2
@@ -720,11 +1008,35 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrhne r2, [r0]
; CHECK-LE-NEXT: vmovne.16 q0[0], r2
@@ -764,11 +1076,35 @@ define arm_aapcs_vfpcc <8 x i16> @masked
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrhne r2, [r0]
; CHECK-BE-NEXT: vmovne.16 q1[0], r2
@@ -817,12 +1153,36 @@ define i8* @masked_v8i16_preinc(i8* %x,
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: adds r0, #4
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
-; CHECK-LE-NEXT: lsls r3, r2, #31
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r2, r12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #2, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #3, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #4, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #5, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #6, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #7, #1
+; CHECK-LE-NEXT: uxtb r2, r3
+; CHECK-LE-NEXT: lsls r3, r3, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrhne r3, [r0]
; CHECK-LE-NEXT: vmovne.16 q0[0], r3
@@ -865,13 +1225,37 @@ define i8* @masked_v8i16_preinc(i8* %x,
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: adds r0, #4
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: @ implicit-def: $q0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
-; CHECK-BE-NEXT: lsls r3, r2, #31
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r2, r12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #2, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #3, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #4, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #5, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #6, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #7, #1
+; CHECK-BE-NEXT: uxtb r2, r3
+; CHECK-BE-NEXT: lsls r3, r3, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrhne r3, [r0]
; CHECK-BE-NEXT: vmovne.16 q0[0], r3
@@ -922,12 +1306,36 @@ define arm_aapcs_vfpcc i8* @masked_v8i16
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: add.w r12, r0, #4
-; CHECK-LE-NEXT: ldrb.w r3, [sp]
-; CHECK-LE-NEXT: lsls r2, r3, #31
+; CHECK-LE-NEXT: bfi r2, r3, #7, #1
+; CHECK-LE-NEXT: uxtb r3, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrhne r2, [r0]
; CHECK-LE-NEXT: vmovne.16 q0[0], r2
@@ -969,13 +1377,37 @@ define arm_aapcs_vfpcc i8* @masked_v8i16
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: add.w r12, r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r3, [sp]
-; CHECK-BE-NEXT: lsls r2, r3, #31
+; CHECK-BE-NEXT: bfi r2, r3, #7, #1
+; CHECK-BE-NEXT: uxtb r3, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrhne r2, [r0]
; CHECK-BE-NEXT: vmovne.16 q0[0], r2
@@ -1034,11 +1466,10 @@ define arm_aapcs_vfpcc <16 x i8> @masked
; CHECK-LE-NEXT: mov r4, sp
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrh.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: uxth r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: beq .LBB12_2
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
; CHECK-LE-NEXT: movs r2, #0
@@ -1125,11 +1556,10 @@ define arm_aapcs_vfpcc <16 x i8> @masked
; CHECK-BE-NEXT: bfc r4, #0, #4
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrh.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: uxth r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: beq .LBB12_2
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
; CHECK-BE-NEXT: movs r2, #0
@@ -1224,12 +1654,11 @@ define arm_aapcs_vfpcc <16 x i8> @masked
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: ldrh.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: uxth r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrbne r2, [r0]
; CHECK-LE-NEXT: vmovne.8 q0[0], r2
@@ -1308,13 +1737,12 @@ define arm_aapcs_vfpcc <16 x i8> @masked
; CHECK-BE-NEXT: bfc r4, #0, #4
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: sub.w r4, r7, #8
; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrh.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: uxth r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrbne r2, [r0]
; CHECK-BE-NEXT: vmovne.8 q1[0], r2
@@ -1399,12 +1827,11 @@ define arm_aapcs_vfpcc <16 x i8> @masked
; CHECK-LE-NEXT: mov r4, sp
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: ldrh.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: uxth r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrbne r2, [r0]
; CHECK-LE-NEXT: vmovne.8 q0[0], r2
@@ -1483,12 +1910,11 @@ define arm_aapcs_vfpcc <16 x i8> @masked
; CHECK-BE-NEXT: bfc r4, #0, #4
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
-; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrh.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: uxth r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrbne r2, [r0]
; CHECK-BE-NEXT: vmovne.8 q1[0], r2
@@ -1574,13 +2000,12 @@ define arm_aapcs_vfpcc i8* @masked_v16i8
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: adds r0, #4
-; CHECK-LE-NEXT: ldrh.w r2, [sp]
+; CHECK-LE-NEXT: vmrs r3, p0
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: lsls r3, r2, #31
+; CHECK-LE-NEXT: uxth r2, r3
+; CHECK-LE-NEXT: lsls r3, r3, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrbne r3, [r0]
; CHECK-LE-NEXT: vmovne.8 q0[0], r3
@@ -1660,14 +2085,13 @@ define arm_aapcs_vfpcc i8* @masked_v16i8
; CHECK-BE-NEXT: bfc r4, #0, #4
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
-; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
; CHECK-BE-NEXT: adds r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
+; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: ldrh.w r2, [sp]
-; CHECK-BE-NEXT: lsls r3, r2, #31
+; CHECK-BE-NEXT: vmrs r3, p0
+; CHECK-BE-NEXT: uxth r2, r3
+; CHECK-BE-NEXT: lsls r3, r3, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrbne r3, [r0]
; CHECK-BE-NEXT: vmovne.8 q0[0], r3
@@ -1757,13 +2181,12 @@ define arm_aapcs_vfpcc i8* @masked_v16i8
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: ldrh.w r3, [sp]
+; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: add.w r12, r0, #4
-; CHECK-LE-NEXT: lsls r2, r3, #31
+; CHECK-LE-NEXT: uxth r3, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrbne r2, [r0]
; CHECK-LE-NEXT: vmovne.8 q0[0], r2
@@ -1844,14 +2267,13 @@ define arm_aapcs_vfpcc i8* @masked_v16i8
; CHECK-BE-NEXT: bfc r4, #0, #4
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
-; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: vstr p0, [r2]
+; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: add.w r12, r0, #4
-; CHECK-BE-NEXT: ldrh.w r3, [sp]
-; CHECK-BE-NEXT: lsls r2, r3, #31
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: uxth r3, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrbne r2, [r0]
; CHECK-BE-NEXT: vmovne.8 q0[0], r2
@@ -1935,10 +2357,22 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: beq .LBB17_2
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
@@ -1972,10 +2406,22 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: beq .LBB17_2
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
@@ -2016,10 +2462,22 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vldrne s0, [r0]
@@ -2040,11 +2498,23 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vldrne s4, [r0]
@@ -2072,10 +2542,22 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: ldrne r2, [r0]
@@ -2100,11 +2582,23 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: ldrne r2, [r0]
@@ -2135,10 +2629,22 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vldrne s4, [r0]
@@ -2160,11 +2666,23 @@ define arm_aapcs_vfpcc <4 x float> @mask
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q2, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr
; CHECK-BE-NEXT: vrev64.32 q2, q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vldrne s8, [r0]
@@ -2192,11 +2710,23 @@ define arm_aapcs_vfpcc i8* @masked_v4f32
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: adds r0, #4
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r2, r2, #15
; CHECK-LE-NEXT: lsls r3, r2, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vldrne s0, [r0]
@@ -2218,12 +2748,24 @@ define arm_aapcs_vfpcc i8* @masked_v4f32
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
; CHECK-BE-NEXT: adds r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r2, r2, #15
; CHECK-BE-NEXT: lsls r3, r2, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vldrne s0, [r0]
@@ -2255,11 +2797,23 @@ define arm_aapcs_vfpcc i8* @masked_v4f32
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: add.w r12, r0, #4
-; CHECK-LE-NEXT: ldrb.w r3, [sp]
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r3, r2, #15
; CHECK-LE-NEXT: lsls r2, r3, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vldrne s0, [r0]
@@ -2282,12 +2836,24 @@ define arm_aapcs_vfpcc i8* @masked_v4f32
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: add.w r12, r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r3, [sp]
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r3, r2, #15
; CHECK-BE-NEXT: lsls r2, r3, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vldrne s0, [r0]
@@ -2320,11 +2886,35 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: beq .LBB23_2
; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
; CHECK-LE-NEXT: vldr.16 s0, .LCPI23_0
@@ -2411,11 +3001,35 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: beq .LBB23_2
; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
; CHECK-BE-NEXT: vldr.16 s0, .LCPI23_0
@@ -2509,11 +3123,35 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: mov r1, sp
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB24_9
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
@@ -2591,12 +3229,36 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB24_10
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
@@ -2680,11 +3342,35 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-LE-NEXT: .pad #40
; CHECK-LE-NEXT: sub sp, #40
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: add r1, sp, #32
-; CHECK-LE-NEXT: vstr p0, [r1]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r1, [sp, #32]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB25_9
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
@@ -2778,12 +3464,36 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-BE-NEXT: .pad #40
; CHECK-BE-NEXT: sub sp, #40
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: add r1, sp, #32
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp, #32]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB25_10
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
@@ -2882,11 +3592,35 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB26_10
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
@@ -2966,12 +3700,36 @@ define arm_aapcs_vfpcc <8 x half> @maske
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q2, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr
; CHECK-BE-NEXT: vrev64.16 q2, q1
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB26_10
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
@@ -3057,12 +3815,36 @@ define arm_aapcs_vfpcc i8* @masked_v8f16
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r3, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: adds r0, #4
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: lsls r3, r2, #31
+; CHECK-LE-NEXT: and r2, r12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #2, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #3, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #4, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #5, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #6, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #7, #1
+; CHECK-LE-NEXT: uxtb r2, r3
+; CHECK-LE-NEXT: lsls r3, r3, #31
; CHECK-LE-NEXT: bne .LBB27_10
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r3, r2, #30
@@ -3140,13 +3922,37 @@ define arm_aapcs_vfpcc i8* @masked_v8f16
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: adds r0, #4
-; CHECK-BE-NEXT: vstr p0, [r2]
+; CHECK-BE-NEXT: vmrs r12, p0
; CHECK-BE-NEXT: @ implicit-def: $q0
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
-; CHECK-BE-NEXT: lsls r3, r2, #31
+; CHECK-BE-NEXT: and r2, r12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #2, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #3, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #4, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #5, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #6, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #7, #1
+; CHECK-BE-NEXT: uxtb r2, r3
+; CHECK-BE-NEXT: lsls r3, r3, #31
; CHECK-BE-NEXT: bne .LBB27_10
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r3, r2, #30
@@ -3234,11 +4040,35 @@ define arm_aapcs_vfpcc i8* @masked_v8f16
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r12, p0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: ldrb.w r3, [sp]
-; CHECK-LE-NEXT: lsls r2, r3, #31
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #7, #1
+; CHECK-LE-NEXT: uxtb r3, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB28_12
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r3, #30
@@ -3314,12 +4144,36 @@ define arm_aapcs_vfpcc i8* @masked_v8f16
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: @ implicit-def: $q0
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r3, [sp]
-; CHECK-BE-NEXT: lsls r2, r3, #31
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #7, #1
+; CHECK-BE-NEXT: uxtb r3, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB28_12
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r3, #30
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-masked-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-masked-store.ll?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-masked-store.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-masked-store.ll Mon Sep 9 09:35:49 2019
@@ -7,10 +7,22 @@ define arm_aapcs_vfpcc void @masked_v4i3
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne r2, s0
@@ -35,10 +47,22 @@ define arm_aapcs_vfpcc void @masked_v4i3
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne r2, s4
@@ -68,10 +92,22 @@ define arm_aapcs_vfpcc void @masked_v4i3
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne r2, s0
@@ -96,10 +132,22 @@ define arm_aapcs_vfpcc void @masked_v4i3
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne r2, s4
@@ -132,24 +180,36 @@ define i8* @masked_v4i32_pre(i8* %y, i8*
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: adds r0, #4
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: add r2, sp, #4
+; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r1, r2, #15
+; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: vmovne r1, s0
-; CHECK-LE-NEXT: strne r1, [r0]
-; CHECK-LE-NEXT: lsls r1, r2, #30
+; CHECK-LE-NEXT: vmovne r2, s0
+; CHECK-LE-NEXT: strne r2, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi r1, s1
-; CHECK-LE-NEXT: strmi r1, [r0, #4]
-; CHECK-LE-NEXT: lsls r1, r2, #29
+; CHECK-LE-NEXT: vmovmi r2, s1
+; CHECK-LE-NEXT: strmi r2, [r0, #4]
+; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi r1, s2
-; CHECK-LE-NEXT: strmi r1, [r0, #8]
-; CHECK-LE-NEXT: lsls r1, r2, #28
+; CHECK-LE-NEXT: vmovmi r2, s2
+; CHECK-LE-NEXT: strmi r2, [r0, #8]
+; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r1, s3
; CHECK-LE-NEXT: strmi r1, [r0, #12]
@@ -163,25 +223,37 @@ define i8* @masked_v4i32_pre(i8* %y, i8*
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: adds r0, #4
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: add r2, sp, #4
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r1, r2, #15
+; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
-; CHECK-BE-NEXT: vmovne r1, s0
-; CHECK-BE-NEXT: strne r1, [r0]
-; CHECK-BE-NEXT: lsls r1, r2, #30
+; CHECK-BE-NEXT: vmovne r2, s0
+; CHECK-BE-NEXT: strne r2, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi r1, s1
-; CHECK-BE-NEXT: strmi r1, [r0, #4]
-; CHECK-BE-NEXT: lsls r1, r2, #29
+; CHECK-BE-NEXT: vmovmi r2, s1
+; CHECK-BE-NEXT: strmi r2, [r0, #4]
+; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi r1, s2
-; CHECK-BE-NEXT: strmi r1, [r0, #8]
-; CHECK-BE-NEXT: lsls r1, r2, #28
+; CHECK-BE-NEXT: vmovmi r2, s2
+; CHECK-BE-NEXT: strmi r2, [r0, #8]
+; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r1, s3
; CHECK-BE-NEXT: strmi r1, [r0, #12]
@@ -204,11 +276,23 @@ define i8* @masked_v4i32_post(i8* %y, i8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: add r2, sp, #4
+; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r2, r2, #15
; CHECK-LE-NEXT: lsls r1, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne r1, s0
@@ -236,12 +320,24 @@ define i8* @masked_v4i32_post(i8* %y, i8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: add r2, sp, #4
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r2, r2, #15
; CHECK-BE-NEXT: lsls r1, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne r1, s0
@@ -278,11 +374,35 @@ define arm_aapcs_vfpcc void @masked_v8i1
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne.u16 r2, q0[0]
; CHECK-LE-NEXT: strhne r2, [r0]
@@ -322,11 +442,35 @@ define arm_aapcs_vfpcc void @masked_v8i1
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne.u16 r2, q1[0]
; CHECK-BE-NEXT: strhne r2, [r0]
@@ -371,11 +515,35 @@ define arm_aapcs_vfpcc void @masked_v8i1
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne.u16 r2, q0[0]
; CHECK-LE-NEXT: strhne r2, [r0]
@@ -415,11 +583,35 @@ define arm_aapcs_vfpcc void @masked_v8i1
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne.u16 r2, q1[0]
; CHECK-BE-NEXT: strhne r2, [r0]
@@ -467,40 +659,64 @@ define i8* @masked_v8i16_pre(i8* %y, i8*
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: adds r0, #4
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
+; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: bfi r2, r3, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: vmovne.u16 r1, q0[0]
-; CHECK-LE-NEXT: strhne r1, [r0]
-; CHECK-LE-NEXT: lsls r1, r2, #30
+; CHECK-LE-NEXT: vmovne.u16 r2, q0[0]
+; CHECK-LE-NEXT: strhne r2, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u16 r1, q0[1]
-; CHECK-LE-NEXT: strhmi r1, [r0, #2]
-; CHECK-LE-NEXT: lsls r1, r2, #29
+; CHECK-LE-NEXT: vmovmi.u16 r2, q0[1]
+; CHECK-LE-NEXT: strhmi r2, [r0, #2]
+; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u16 r1, q0[2]
-; CHECK-LE-NEXT: strhmi r1, [r0, #4]
-; CHECK-LE-NEXT: lsls r1, r2, #28
+; CHECK-LE-NEXT: vmovmi.u16 r2, q0[2]
+; CHECK-LE-NEXT: strhmi r2, [r0, #4]
+; CHECK-LE-NEXT: lsls r2, r1, #28
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u16 r1, q0[3]
-; CHECK-LE-NEXT: strhmi r1, [r0, #6]
-; CHECK-LE-NEXT: lsls r1, r2, #27
+; CHECK-LE-NEXT: vmovmi.u16 r2, q0[3]
+; CHECK-LE-NEXT: strhmi r2, [r0, #6]
+; CHECK-LE-NEXT: lsls r2, r1, #27
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u16 r1, q0[4]
-; CHECK-LE-NEXT: strhmi r1, [r0, #8]
-; CHECK-LE-NEXT: lsls r1, r2, #26
+; CHECK-LE-NEXT: vmovmi.u16 r2, q0[4]
+; CHECK-LE-NEXT: strhmi r2, [r0, #8]
+; CHECK-LE-NEXT: lsls r2, r1, #26
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u16 r1, q0[5]
-; CHECK-LE-NEXT: strhmi r1, [r0, #10]
-; CHECK-LE-NEXT: lsls r1, r2, #25
+; CHECK-LE-NEXT: vmovmi.u16 r2, q0[5]
+; CHECK-LE-NEXT: strhmi r2, [r0, #10]
+; CHECK-LE-NEXT: lsls r2, r1, #25
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u16 r1, q0[6]
-; CHECK-LE-NEXT: strhmi r1, [r0, #12]
-; CHECK-LE-NEXT: lsls r1, r2, #24
+; CHECK-LE-NEXT: vmovmi.u16 r2, q0[6]
+; CHECK-LE-NEXT: strhmi r2, [r0, #12]
+; CHECK-LE-NEXT: lsls r1, r1, #24
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r1, q0[7]
; CHECK-LE-NEXT: strhmi r1, [r0, #14]
@@ -514,41 +730,65 @@ define i8* @masked_v8i16_pre(i8* %y, i8*
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: adds r0, #4
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: bfi r2, r3, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
-; CHECK-BE-NEXT: vmovne.u16 r1, q0[0]
-; CHECK-BE-NEXT: strhne r1, [r0]
-; CHECK-BE-NEXT: lsls r1, r2, #30
+; CHECK-BE-NEXT: vmovne.u16 r2, q0[0]
+; CHECK-BE-NEXT: strhne r2, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u16 r1, q0[1]
-; CHECK-BE-NEXT: strhmi r1, [r0, #2]
-; CHECK-BE-NEXT: lsls r1, r2, #29
+; CHECK-BE-NEXT: vmovmi.u16 r2, q0[1]
+; CHECK-BE-NEXT: strhmi r2, [r0, #2]
+; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u16 r1, q0[2]
-; CHECK-BE-NEXT: strhmi r1, [r0, #4]
-; CHECK-BE-NEXT: lsls r1, r2, #28
+; CHECK-BE-NEXT: vmovmi.u16 r2, q0[2]
+; CHECK-BE-NEXT: strhmi r2, [r0, #4]
+; CHECK-BE-NEXT: lsls r2, r1, #28
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u16 r1, q0[3]
-; CHECK-BE-NEXT: strhmi r1, [r0, #6]
-; CHECK-BE-NEXT: lsls r1, r2, #27
+; CHECK-BE-NEXT: vmovmi.u16 r2, q0[3]
+; CHECK-BE-NEXT: strhmi r2, [r0, #6]
+; CHECK-BE-NEXT: lsls r2, r1, #27
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u16 r1, q0[4]
-; CHECK-BE-NEXT: strhmi r1, [r0, #8]
-; CHECK-BE-NEXT: lsls r1, r2, #26
+; CHECK-BE-NEXT: vmovmi.u16 r2, q0[4]
+; CHECK-BE-NEXT: strhmi r2, [r0, #8]
+; CHECK-BE-NEXT: lsls r2, r1, #26
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u16 r1, q0[5]
-; CHECK-BE-NEXT: strhmi r1, [r0, #10]
-; CHECK-BE-NEXT: lsls r1, r2, #25
+; CHECK-BE-NEXT: vmovmi.u16 r2, q0[5]
+; CHECK-BE-NEXT: strhmi r2, [r0, #10]
+; CHECK-BE-NEXT: lsls r2, r1, #25
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u16 r1, q0[6]
-; CHECK-BE-NEXT: strhmi r1, [r0, #12]
-; CHECK-BE-NEXT: lsls r1, r2, #24
+; CHECK-BE-NEXT: vmovmi.u16 r2, q0[6]
+; CHECK-BE-NEXT: strhmi r2, [r0, #12]
+; CHECK-BE-NEXT: lsls r1, r1, #24
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r1, q0[7]
; CHECK-BE-NEXT: strhmi r1, [r0, #14]
@@ -571,12 +811,36 @@ define i8* @masked_v8i16_post(i8* %y, i8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r2, r12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #2, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #3, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #4, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #5, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #6, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: bfi r3, r2, #7, #1
+; CHECK-LE-NEXT: lsls r1, r3, #31
+; CHECK-LE-NEXT: uxtb r2, r3
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne.u16 r1, q0[0]
; CHECK-LE-NEXT: strhne r1, [r0]
@@ -600,8 +864,8 @@ define i8* @masked_v8i16_post(i8* %y, i8
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r1, q0[5]
; CHECK-LE-NEXT: strhmi r1, [r0, #10]
-; CHECK-LE-NEXT: adds r1, r0, #4
; CHECK-LE-NEXT: lsls r3, r2, #25
+; CHECK-LE-NEXT: add.w r1, r0, #4
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r3, q0[6]
; CHECK-LE-NEXT: strhmi r3, [r0, #12]
@@ -619,13 +883,37 @@ define i8* @masked_v8i16_post(i8* %y, i8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r2, r12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #2, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #3, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #4, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #5, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #6, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: bfi r3, r2, #7, #1
+; CHECK-BE-NEXT: lsls r1, r3, #31
+; CHECK-BE-NEXT: uxtb r2, r3
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne.u16 r1, q0[0]
; CHECK-BE-NEXT: strhne r1, [r0]
@@ -649,8 +937,8 @@ define i8* @masked_v8i16_post(i8* %y, i8
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r1, q0[5]
; CHECK-BE-NEXT: strhmi r1, [r0, #10]
-; CHECK-BE-NEXT: adds r1, r0, #4
; CHECK-BE-NEXT: lsls r3, r2, #25
+; CHECK-BE-NEXT: add.w r1, r0, #4
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r3, q0[6]
; CHECK-BE-NEXT: strhmi r3, [r0, #12]
@@ -684,12 +972,11 @@ define arm_aapcs_vfpcc void @masked_v16i
; CHECK-LE-NEXT: mov r4, sp
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: ldrh.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: uxth r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne.u8 r2, q0[0]
; CHECK-LE-NEXT: strbne r2, [r0]
@@ -768,12 +1055,11 @@ define arm_aapcs_vfpcc void @masked_v16i
; CHECK-BE-NEXT: bfc r4, #0, #4
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: mov r1, sp
-; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrh.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: uxth r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne.u8 r2, q1[0]
; CHECK-BE-NEXT: strbne r2, [r0]
@@ -860,73 +1146,72 @@ define i8* @masked_v16i8_pre(i8* %y, i8*
; CHECK-LE-NEXT: vldr d1, [r7, #8]
; CHECK-LE-NEXT: adds r0, #4
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
-; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrh.w r2, [sp]
+; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: uxth r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: vmovne.u8 r1, q0[0]
-; CHECK-LE-NEXT: strbne r1, [r0]
-; CHECK-LE-NEXT: lsls r1, r2, #30
+; CHECK-LE-NEXT: vmovne.u8 r2, q0[0]
+; CHECK-LE-NEXT: strbne r2, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[1]
-; CHECK-LE-NEXT: strbmi r1, [r0, #1]
-; CHECK-LE-NEXT: lsls r1, r2, #29
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[1]
+; CHECK-LE-NEXT: strbmi r2, [r0, #1]
+; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[2]
-; CHECK-LE-NEXT: strbmi r1, [r0, #2]
-; CHECK-LE-NEXT: lsls r1, r2, #28
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[2]
+; CHECK-LE-NEXT: strbmi r2, [r0, #2]
+; CHECK-LE-NEXT: lsls r2, r1, #28
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[3]
-; CHECK-LE-NEXT: strbmi r1, [r0, #3]
-; CHECK-LE-NEXT: lsls r1, r2, #27
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[3]
+; CHECK-LE-NEXT: strbmi r2, [r0, #3]
+; CHECK-LE-NEXT: lsls r2, r1, #27
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[4]
-; CHECK-LE-NEXT: strbmi r1, [r0, #4]
-; CHECK-LE-NEXT: lsls r1, r2, #26
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[4]
+; CHECK-LE-NEXT: strbmi r2, [r0, #4]
+; CHECK-LE-NEXT: lsls r2, r1, #26
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[5]
-; CHECK-LE-NEXT: strbmi r1, [r0, #5]
-; CHECK-LE-NEXT: lsls r1, r2, #25
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[5]
+; CHECK-LE-NEXT: strbmi r2, [r0, #5]
+; CHECK-LE-NEXT: lsls r2, r1, #25
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[6]
-; CHECK-LE-NEXT: strbmi r1, [r0, #6]
-; CHECK-LE-NEXT: lsls r1, r2, #24
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[6]
+; CHECK-LE-NEXT: strbmi r2, [r0, #6]
+; CHECK-LE-NEXT: lsls r2, r1, #24
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[7]
-; CHECK-LE-NEXT: strbmi r1, [r0, #7]
-; CHECK-LE-NEXT: lsls r1, r2, #23
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[7]
+; CHECK-LE-NEXT: strbmi r2, [r0, #7]
+; CHECK-LE-NEXT: lsls r2, r1, #23
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[8]
-; CHECK-LE-NEXT: strbmi r1, [r0, #8]
-; CHECK-LE-NEXT: lsls r1, r2, #22
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[8]
+; CHECK-LE-NEXT: strbmi r2, [r0, #8]
+; CHECK-LE-NEXT: lsls r2, r1, #22
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[9]
-; CHECK-LE-NEXT: strbmi r1, [r0, #9]
-; CHECK-LE-NEXT: lsls r1, r2, #21
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[9]
+; CHECK-LE-NEXT: strbmi r2, [r0, #9]
+; CHECK-LE-NEXT: lsls r2, r1, #21
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[10]
-; CHECK-LE-NEXT: strbmi r1, [r0, #10]
-; CHECK-LE-NEXT: lsls r1, r2, #20
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[10]
+; CHECK-LE-NEXT: strbmi r2, [r0, #10]
+; CHECK-LE-NEXT: lsls r2, r1, #20
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[11]
-; CHECK-LE-NEXT: strbmi r1, [r0, #11]
-; CHECK-LE-NEXT: lsls r1, r2, #19
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[11]
+; CHECK-LE-NEXT: strbmi r2, [r0, #11]
+; CHECK-LE-NEXT: lsls r2, r1, #19
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[12]
-; CHECK-LE-NEXT: strbmi r1, [r0, #12]
-; CHECK-LE-NEXT: lsls r1, r2, #18
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[12]
+; CHECK-LE-NEXT: strbmi r2, [r0, #12]
+; CHECK-LE-NEXT: lsls r2, r1, #18
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[13]
-; CHECK-LE-NEXT: strbmi r1, [r0, #13]
-; CHECK-LE-NEXT: lsls r1, r2, #17
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[13]
+; CHECK-LE-NEXT: strbmi r2, [r0, #13]
+; CHECK-LE-NEXT: lsls r2, r1, #17
; CHECK-LE-NEXT: itt mi
-; CHECK-LE-NEXT: vmovmi.u8 r1, q0[14]
-; CHECK-LE-NEXT: strbmi r1, [r0, #14]
-; CHECK-LE-NEXT: lsls r1, r2, #16
+; CHECK-LE-NEXT: vmovmi.u8 r2, q0[14]
+; CHECK-LE-NEXT: strbmi r2, [r0, #14]
+; CHECK-LE-NEXT: lsls r1, r1, #16
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u8 r1, q0[15]
; CHECK-LE-NEXT: strbmi r1, [r0, #15]
@@ -947,74 +1232,73 @@ define i8* @masked_v16i8_pre(i8* %y, i8*
; CHECK-BE-NEXT: vldr d1, [r7, #8]
; CHECK-BE-NEXT: adds r0, #4
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
-; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrh.w r2, [sp]
+; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vldrb.u8 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: uxth r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
-; CHECK-BE-NEXT: vmovne.u8 r1, q0[0]
-; CHECK-BE-NEXT: strbne r1, [r0]
-; CHECK-BE-NEXT: lsls r1, r2, #30
+; CHECK-BE-NEXT: vmovne.u8 r2, q0[0]
+; CHECK-BE-NEXT: strbne r2, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[1]
-; CHECK-BE-NEXT: strbmi r1, [r0, #1]
-; CHECK-BE-NEXT: lsls r1, r2, #29
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[1]
+; CHECK-BE-NEXT: strbmi r2, [r0, #1]
+; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[2]
-; CHECK-BE-NEXT: strbmi r1, [r0, #2]
-; CHECK-BE-NEXT: lsls r1, r2, #28
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[2]
+; CHECK-BE-NEXT: strbmi r2, [r0, #2]
+; CHECK-BE-NEXT: lsls r2, r1, #28
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[3]
-; CHECK-BE-NEXT: strbmi r1, [r0, #3]
-; CHECK-BE-NEXT: lsls r1, r2, #27
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[3]
+; CHECK-BE-NEXT: strbmi r2, [r0, #3]
+; CHECK-BE-NEXT: lsls r2, r1, #27
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[4]
-; CHECK-BE-NEXT: strbmi r1, [r0, #4]
-; CHECK-BE-NEXT: lsls r1, r2, #26
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[4]
+; CHECK-BE-NEXT: strbmi r2, [r0, #4]
+; CHECK-BE-NEXT: lsls r2, r1, #26
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[5]
-; CHECK-BE-NEXT: strbmi r1, [r0, #5]
-; CHECK-BE-NEXT: lsls r1, r2, #25
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[5]
+; CHECK-BE-NEXT: strbmi r2, [r0, #5]
+; CHECK-BE-NEXT: lsls r2, r1, #25
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[6]
-; CHECK-BE-NEXT: strbmi r1, [r0, #6]
-; CHECK-BE-NEXT: lsls r1, r2, #24
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[6]
+; CHECK-BE-NEXT: strbmi r2, [r0, #6]
+; CHECK-BE-NEXT: lsls r2, r1, #24
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[7]
-; CHECK-BE-NEXT: strbmi r1, [r0, #7]
-; CHECK-BE-NEXT: lsls r1, r2, #23
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[7]
+; CHECK-BE-NEXT: strbmi r2, [r0, #7]
+; CHECK-BE-NEXT: lsls r2, r1, #23
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[8]
-; CHECK-BE-NEXT: strbmi r1, [r0, #8]
-; CHECK-BE-NEXT: lsls r1, r2, #22
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[8]
+; CHECK-BE-NEXT: strbmi r2, [r0, #8]
+; CHECK-BE-NEXT: lsls r2, r1, #22
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[9]
-; CHECK-BE-NEXT: strbmi r1, [r0, #9]
-; CHECK-BE-NEXT: lsls r1, r2, #21
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[9]
+; CHECK-BE-NEXT: strbmi r2, [r0, #9]
+; CHECK-BE-NEXT: lsls r2, r1, #21
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[10]
-; CHECK-BE-NEXT: strbmi r1, [r0, #10]
-; CHECK-BE-NEXT: lsls r1, r2, #20
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[10]
+; CHECK-BE-NEXT: strbmi r2, [r0, #10]
+; CHECK-BE-NEXT: lsls r2, r1, #20
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[11]
-; CHECK-BE-NEXT: strbmi r1, [r0, #11]
-; CHECK-BE-NEXT: lsls r1, r2, #19
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[11]
+; CHECK-BE-NEXT: strbmi r2, [r0, #11]
+; CHECK-BE-NEXT: lsls r2, r1, #19
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[12]
-; CHECK-BE-NEXT: strbmi r1, [r0, #12]
-; CHECK-BE-NEXT: lsls r1, r2, #18
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[12]
+; CHECK-BE-NEXT: strbmi r2, [r0, #12]
+; CHECK-BE-NEXT: lsls r2, r1, #18
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[13]
-; CHECK-BE-NEXT: strbmi r1, [r0, #13]
-; CHECK-BE-NEXT: lsls r1, r2, #17
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[13]
+; CHECK-BE-NEXT: strbmi r2, [r0, #13]
+; CHECK-BE-NEXT: lsls r2, r1, #17
; CHECK-BE-NEXT: itt mi
-; CHECK-BE-NEXT: vmovmi.u8 r1, q0[14]
-; CHECK-BE-NEXT: strbmi r1, [r0, #14]
-; CHECK-BE-NEXT: lsls r1, r2, #16
+; CHECK-BE-NEXT: vmovmi.u8 r2, q0[14]
+; CHECK-BE-NEXT: strbmi r2, [r0, #14]
+; CHECK-BE-NEXT: lsls r1, r1, #16
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u8 r1, q0[15]
; CHECK-BE-NEXT: strbmi r1, [r0, #15]
@@ -1045,12 +1329,11 @@ define i8* @masked_v16i8_post(i8* %y, i8
; CHECK-LE-NEXT: vldr d1, [r7, #8]
; CHECK-LE-NEXT: sub.w r4, r7, #8
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrh.w r2, [sp]
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: uxth r2, r1
+; CHECK-LE-NEXT: lsls r1, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne.u8 r1, q0[0]
; CHECK-LE-NEXT: strbne r1, [r0]
@@ -1106,8 +1389,8 @@ define i8* @masked_v16i8_post(i8* %y, i8
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u8 r1, q0[13]
; CHECK-LE-NEXT: strbmi r1, [r0, #13]
-; CHECK-LE-NEXT: adds r1, r0, #4
; CHECK-LE-NEXT: lsls r3, r2, #17
+; CHECK-LE-NEXT: add.w r1, r0, #4
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u8 r3, q0[14]
; CHECK-LE-NEXT: strbmi r3, [r0, #14]
@@ -1133,13 +1416,12 @@ define i8* @masked_v16i8_post(i8* %y, i8
; CHECK-BE-NEXT: vldr d1, [r7, #8]
; CHECK-BE-NEXT: sub.w r4, r7, #8
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
; CHECK-BE-NEXT: vrev64.8 q1, q0
-; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrh.w r2, [sp]
; CHECK-BE-NEXT: vldrb.u8 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: uxth r2, r1
+; CHECK-BE-NEXT: lsls r1, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne.u8 r1, q0[0]
; CHECK-BE-NEXT: strbne r1, [r0]
@@ -1195,8 +1477,8 @@ define i8* @masked_v16i8_post(i8* %y, i8
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u8 r1, q0[13]
; CHECK-BE-NEXT: strbmi r1, [r0, #13]
-; CHECK-BE-NEXT: adds r1, r0, #4
; CHECK-BE-NEXT: lsls r3, r2, #17
+; CHECK-BE-NEXT: add.w r1, r0, #4
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u8 r3, q0[14]
; CHECK-BE-NEXT: strbmi r3, [r0, #14]
@@ -1223,10 +1505,22 @@ define arm_aapcs_vfpcc void @masked_v4f3
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vstrne s0, [r0]
@@ -1247,11 +1541,23 @@ define arm_aapcs_vfpcc void @masked_v4f3
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q2, q1
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vstrne s4, [r0]
@@ -1277,10 +1583,22 @@ define arm_aapcs_vfpcc void @masked_v4f3
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #20
; CHECK-LE-NEXT: sub sp, #20
-; CHECK-LE-NEXT: add r1, sp, #16
; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp, #16]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: ittt ne
; CHECK-LE-NEXT: vstrne s0, [sp, #12]
@@ -1309,11 +1627,23 @@ define arm_aapcs_vfpcc void @masked_v4f3
; CHECK-BE-NEXT: .pad #20
; CHECK-BE-NEXT: sub sp, #20
; CHECK-BE-NEXT: vrev64.32 q2, q1
-; CHECK-BE-NEXT: add r1, sp, #16
+; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp, #16]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r3, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: and r1, r1, #15
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: ittt ne
; CHECK-BE-NEXT: vstrne s4, [sp, #12]
@@ -1350,21 +1680,33 @@ define i8* @masked_v4f32_pre(i8* %y, i8*
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: adds r0, #4
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: add r2, sp, #4
+; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r1, r2, #15
+; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vstrne s0, [r0]
-; CHECK-LE-NEXT: lsls r1, r2, #30
+; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: it mi
; CHECK-LE-NEXT: vstrmi s1, [r0, #4]
-; CHECK-LE-NEXT: lsls r1, r2, #29
+; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: it mi
; CHECK-LE-NEXT: vstrmi s2, [r0, #8]
-; CHECK-LE-NEXT: lsls r1, r2, #28
+; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: it mi
; CHECK-LE-NEXT: vstrmi s3, [r0, #12]
; CHECK-LE-NEXT: add sp, #8
@@ -1377,22 +1719,34 @@ define i8* @masked_v4f32_pre(i8* %y, i8*
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: adds r0, #4
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: add r2, sp, #4
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r1, r2, #15
+; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vstrne s0, [r0]
-; CHECK-BE-NEXT: lsls r1, r2, #30
+; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: it mi
; CHECK-BE-NEXT: vstrmi s1, [r0, #4]
-; CHECK-BE-NEXT: lsls r1, r2, #29
+; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: it mi
; CHECK-BE-NEXT: vstrmi s2, [r0, #8]
-; CHECK-BE-NEXT: lsls r1, r2, #28
+; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: it mi
; CHECK-BE-NEXT: vstrmi s3, [r0, #12]
; CHECK-BE-NEXT: add sp, #8
@@ -1414,11 +1768,23 @@ define i8* @masked_v4f32_post(i8* %y, i8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: add r2, sp, #4
+; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: and r2, r2, #15
; CHECK-LE-NEXT: lsls r1, r2, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vstrne s0, [r0]
@@ -1442,12 +1808,24 @@ define i8* @masked_v4f32_post(i8* %y, i8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: add r2, sp, #4
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp, #4]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: and r2, r2, #15
; CHECK-BE-NEXT: lsls r1, r2, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vstrne s0, [r0]
@@ -1480,11 +1858,35 @@ define arm_aapcs_vfpcc void @masked_v8f1
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: mov r1, sp
; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB15_9
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
@@ -1552,12 +1954,36 @@ define arm_aapcs_vfpcc void @masked_v8f1
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q2, q1
-; CHECK-BE-NEXT: mov r1, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB15_9
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
@@ -1630,11 +2056,35 @@ define arm_aapcs_vfpcc void @masked_v8f1
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #40
; CHECK-LE-NEXT: sub sp, #40
-; CHECK-LE-NEXT: add r1, sp, #32
; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
-; CHECK-LE-NEXT: vstr p0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp, #32]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: movs r2, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r3, r1, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r2, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB16_9
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
@@ -1718,12 +2168,36 @@ define arm_aapcs_vfpcc void @masked_v8f1
; CHECK-BE-NEXT: .pad #40
; CHECK-BE-NEXT: sub sp, #40
; CHECK-BE-NEXT: vrev64.16 q2, q1
-; CHECK-BE-NEXT: add r1, sp, #32
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
-; CHECK-BE-NEXT: vstr p0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp, #32]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB16_9
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
@@ -1815,12 +2289,36 @@ define i8* @masked_v8f16_pre(i8* %y, i8*
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: adds r0, #4
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
+; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r3, r12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r2, r3, #6, #1
+; CHECK-LE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: ldrb.w r1, [sp]
-; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: bfi r2, r3, #7, #1
+; CHECK-LE-NEXT: uxtb r1, r2
+; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB17_9
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
@@ -1890,13 +2388,37 @@ define i8* @masked_v8f16_pre(i8* %y, i8*
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: adds r0, #4
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r3, r12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r2, r3, #6, #1
+; CHECK-BE-NEXT: ubfx r3, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
-; CHECK-BE-NEXT: ldrb.w r1, [sp]
-; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: bfi r2, r3, #7, #1
+; CHECK-BE-NEXT: uxtb r1, r2
+; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB17_9
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
@@ -1975,12 +2497,36 @@ define i8* @masked_v8f16_post(i8* %y, i8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vldr d1, [sp, #8]
; CHECK-LE-NEXT: vmov d0, r2, r3
-; CHECK-LE-NEXT: mov r2, sp
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r2]
-; CHECK-LE-NEXT: ldrb.w r2, [sp]
+; CHECK-LE-NEXT: vmrs r12, p0
+; CHECK-LE-NEXT: and r2, r12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #2, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #3, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #4, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #5, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #6, #1
+; CHECK-LE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
-; CHECK-LE-NEXT: lsls r1, r2, #31
+; CHECK-LE-NEXT: bfi r3, r2, #7, #1
+; CHECK-LE-NEXT: uxtb r2, r3
+; CHECK-LE-NEXT: lsls r1, r3, #31
; CHECK-LE-NEXT: bne .LBB18_12
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r1, r2, #30
@@ -2046,13 +2592,37 @@ define i8* @masked_v8f16_post(i8* %y, i8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vldr d1, [sp, #8]
; CHECK-BE-NEXT: vmov d0, r3, r2
-; CHECK-BE-NEXT: mov r2, sp
+; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r2]
-; CHECK-BE-NEXT: ldrb.w r2, [sp]
+; CHECK-BE-NEXT: vmrs r12, p0
+; CHECK-BE-NEXT: and r2, r12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #2, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #3, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #4, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #10, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #5, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #6, #1
+; CHECK-BE-NEXT: ubfx r2, r12, #14, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
-; CHECK-BE-NEXT: lsls r1, r2, #31
+; CHECK-BE-NEXT: bfi r3, r2, #7, #1
+; CHECK-BE-NEXT: uxtb r2, r3
+; CHECK-BE-NEXT: lsls r1, r3, #31
; CHECK-BE-NEXT: bne .LBB18_12
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r1, r2, #30
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-pred-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-pred-bitcast.ll?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-pred-bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-pred-bitcast.ll Mon Sep 9 09:35:49 2019
@@ -1,19 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define arm_aapcs_vfpcc <4 x i32> @bitcast_to_v4i1(i4 %b, <4 x i32> %a) {
-; CHECK-LABEL: bitcast_to_v4i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: strb.w r0, [sp]
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: vldr p0, [r0]
-; CHECK-NEXT: vpsel q0, q0, q1
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: bitcast_to_v4i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: and r0, r0, #15
+; CHECK-LE-NEXT: vmov.i8 q1, #0x0
+; CHECK-LE-NEXT: vmov.i8 q2, #0xff
+; CHECK-LE-NEXT: vmsr p0, r0
+; CHECK-LE-NEXT: vpsel q1, q2, q1
+; CHECK-LE-NEXT: vmov.u8 r0, q1[0]
+; CHECK-LE-NEXT: vmov.32 q2[0], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
+; CHECK-LE-NEXT: vmov.32 q2[1], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
+; CHECK-LE-NEXT: vmov.32 q2[2], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
+; CHECK-LE-NEXT: vmov.32 q2[3], r0
+; CHECK-LE-NEXT: vmov.i32 q1, #0x0
+; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
+; CHECK-LE-NEXT: vpsel q0, q0, q1
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: bitcast_to_v4i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: and r0, r0, #15
+; CHECK-BE-NEXT: vmov.i8 q1, #0x0
+; CHECK-BE-NEXT: vmov.i8 q2, #0xff
+; CHECK-BE-NEXT: vmsr p0, r0
+; CHECK-BE-NEXT: vpsel q1, q2, q1
+; CHECK-BE-NEXT: vmov.u8 r0, q1[0]
+; CHECK-BE-NEXT: vmov.32 q2[0], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
+; CHECK-BE-NEXT: vmov.32 q2[1], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
+; CHECK-BE-NEXT: vmov.32 q2[2], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
+; CHECK-BE-NEXT: vmov.32 q2[3], r0
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vpsel q1, q1, q0
+; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
entry:
%c = bitcast i4 %b to <4 x i1>
%s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
@@ -21,17 +57,70 @@ entry:
}
define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
-; CHECK-LABEL: bitcast_to_v8i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: strb.w r0, [sp]
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vldr p0, [r0]
-; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: vpsel q0, q0, q1
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: bitcast_to_v8i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #8
+; CHECK-LE-NEXT: sub sp, #8
+; CHECK-LE-NEXT: uxtb r0, r0
+; CHECK-LE-NEXT: vmov.i8 q1, #0x0
+; CHECK-LE-NEXT: vmov.i8 q2, #0xff
+; CHECK-LE-NEXT: vmsr p0, r0
+; CHECK-LE-NEXT: vpsel q2, q2, q1
+; CHECK-LE-NEXT: vmov.u8 r0, q2[0]
+; CHECK-LE-NEXT: vmov.16 q1[0], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[1]
+; CHECK-LE-NEXT: vmov.16 q1[1], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[2]
+; CHECK-LE-NEXT: vmov.16 q1[2], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[3]
+; CHECK-LE-NEXT: vmov.16 q1[3], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[4]
+; CHECK-LE-NEXT: vmov.16 q1[4], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[5]
+; CHECK-LE-NEXT: vmov.16 q1[5], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[6]
+; CHECK-LE-NEXT: vmov.16 q1[6], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[7]
+; CHECK-LE-NEXT: vmov.16 q1[7], r0
+; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
+; CHECK-LE-NEXT: vmov.i32 q1, #0x0
+; CHECK-LE-NEXT: vpsel q0, q0, q1
+; CHECK-LE-NEXT: add sp, #8
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: bitcast_to_v8i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #8
+; CHECK-BE-NEXT: sub sp, #8
+; CHECK-BE-NEXT: uxtb r0, r0
+; CHECK-BE-NEXT: vmov.i8 q1, #0x0
+; CHECK-BE-NEXT: vmov.i8 q2, #0xff
+; CHECK-BE-NEXT: vmsr p0, r0
+; CHECK-BE-NEXT: vpsel q2, q2, q1
+; CHECK-BE-NEXT: vmov.u8 r0, q2[0]
+; CHECK-BE-NEXT: vmov.16 q1[0], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[1]
+; CHECK-BE-NEXT: vmov.16 q1[1], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[2]
+; CHECK-BE-NEXT: vmov.16 q1[2], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[3]
+; CHECK-BE-NEXT: vmov.16 q1[3], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[4]
+; CHECK-BE-NEXT: vmov.16 q1[4], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[5]
+; CHECK-BE-NEXT: vmov.16 q1[5], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[6]
+; CHECK-BE-NEXT: vmov.16 q1[6], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[7]
+; CHECK-BE-NEXT: vmov.16 q1[7], r0
+; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
+; CHECK-BE-NEXT: vrev64.16 q1, q0
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vrev32.16 q0, q0
+; CHECK-BE-NEXT: vpsel q1, q1, q0
+; CHECK-BE-NEXT: vrev64.16 q0, q1
+; CHECK-BE-NEXT: add sp, #8
+; CHECK-BE-NEXT: bx lr
entry:
%c = bitcast i8 %b to <8 x i1>
%s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
@@ -39,25 +128,46 @@ entry:
}
define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
-; CHECK-LABEL: bitcast_to_v16i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .setfp r7, sp, #8
-; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r4, sp
-; CHECK-NEXT: bfc r4, #0, #4
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: strh.w r0, [sp]
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: sub.w r4, r7, #8
-; CHECK-NEXT: vldr p0, [r0]
-; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: vpsel q0, q0, q1
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop {r4, r6, r7, pc}
+; CHECK-LE-LABEL: bitcast_to_v16i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .save {r4, r6, r7, lr}
+; CHECK-LE-NEXT: push {r4, r6, r7, lr}
+; CHECK-LE-NEXT: .setfp r7, sp, #8
+; CHECK-LE-NEXT: add r7, sp, #8
+; CHECK-LE-NEXT: .pad #16
+; CHECK-LE-NEXT: sub sp, #16
+; CHECK-LE-NEXT: mov r4, sp
+; CHECK-LE-NEXT: bfc r4, #0, #4
+; CHECK-LE-NEXT: mov sp, r4
+; CHECK-LE-NEXT: uxth r0, r0
+; CHECK-LE-NEXT: sub.w r4, r7, #8
+; CHECK-LE-NEXT: vmov.i32 q1, #0x0
+; CHECK-LE-NEXT: vmsr p0, r0
+; CHECK-LE-NEXT: vpsel q0, q0, q1
+; CHECK-LE-NEXT: mov sp, r4
+; CHECK-LE-NEXT: pop {r4, r6, r7, pc}
+;
+; CHECK-BE-LABEL: bitcast_to_v16i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .save {r4, r6, r7, lr}
+; CHECK-BE-NEXT: push {r4, r6, r7, lr}
+; CHECK-BE-NEXT: .setfp r7, sp, #8
+; CHECK-BE-NEXT: add r7, sp, #8
+; CHECK-BE-NEXT: .pad #16
+; CHECK-BE-NEXT: sub sp, #16
+; CHECK-BE-NEXT: mov r4, sp
+; CHECK-BE-NEXT: bfc r4, #0, #4
+; CHECK-BE-NEXT: mov sp, r4
+; CHECK-BE-NEXT: vrev64.8 q1, q0
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: uxth r0, r0
+; CHECK-BE-NEXT: sub.w r4, r7, #8
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vmsr p0, r0
+; CHECK-BE-NEXT: vpsel q1, q1, q0
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: mov sp, r4
+; CHECK-BE-NEXT: pop {r4, r6, r7, pc}
entry:
%c = bitcast i16 %b to <16 x i1>
%s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
@@ -65,20 +175,36 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @bitcast_to_v2i1(i2 %b, <2 x i64> %a) {
-; CHECK-LABEL: bitcast_to_v2i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: and r0, r0, #3
-; CHECK-NEXT: sbfx r1, r0, #0, #1
-; CHECK-NEXT: sbfx r0, r0, #1, #1
-; CHECK-NEXT: vmov.32 q1[0], r1
-; CHECK-NEXT: vmov.32 q1[1], r1
-; CHECK-NEXT: vmov.32 q1[2], r0
-; CHECK-NEXT: vmov.32 q1[3], r0
-; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: bitcast_to_v2i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: and r0, r0, #3
+; CHECK-LE-NEXT: sbfx r1, r0, #0, #1
+; CHECK-LE-NEXT: sbfx r0, r0, #1, #1
+; CHECK-LE-NEXT: vmov.32 q1[0], r1
+; CHECK-LE-NEXT: vmov.32 q1[1], r1
+; CHECK-LE-NEXT: vmov.32 q1[2], r0
+; CHECK-LE-NEXT: vmov.32 q1[3], r0
+; CHECK-LE-NEXT: vand q0, q0, q1
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: bitcast_to_v2i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: and r0, r0, #3
+; CHECK-BE-NEXT: sbfx r1, r0, #0, #1
+; CHECK-BE-NEXT: sbfx r0, r0, #1, #1
+; CHECK-BE-NEXT: vmov.32 q1[0], r1
+; CHECK-BE-NEXT: vmov.32 q1[1], r1
+; CHECK-BE-NEXT: vmov.32 q1[2], r0
+; CHECK-BE-NEXT: vmov.32 q1[3], r0
+; CHECK-BE-NEXT: vrev64.32 q2, q1
+; CHECK-BE-NEXT: vand q0, q0, q2
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
entry:
%c = bitcast i2 %b to <2 x i1>
%s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> zeroinitializer
@@ -87,16 +213,52 @@ entry:
define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) {
-; CHECK-LABEL: bitcast_from_v4i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vstr p0, [r0]
-; CHECK-NEXT: ldrb.w r0, [sp]
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: bitcast_from_v4i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
+; CHECK-LE-NEXT: movs r0, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r2, r1, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #2, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r0, r1, #3, #1
+; CHECK-LE-NEXT: and r0, r0, #15
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: bitcast_from_v4i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: movs r3, #0
+; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
+; CHECK-BE-NEXT: vmrs r0, p0
+; CHECK-BE-NEXT: and r2, r0, #1
+; CHECK-BE-NEXT: ubfx r1, r0, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
+; CHECK-BE-NEXT: bfi r3, r1, #1, #1
+; CHECK-BE-NEXT: ubfx r1, r0, #8, #1
+; CHECK-BE-NEXT: ubfx r0, r0, #12, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r3, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r0, r0, #0
+; CHECK-BE-NEXT: bfi r3, r0, #3, #1
+; CHECK-BE-NEXT: and r0, r3, #15
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <4 x i32> %a, zeroinitializer
%b = bitcast <4 x i1> %c to i4
@@ -104,16 +266,76 @@ entry:
}
define arm_aapcs_vfpcc i8 @bitcast_from_v8i1(<8 x i16> %a) {
-; CHECK-LABEL: bitcast_from_v8i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vstr p0, [r0]
-; CHECK-NEXT: ldrb.w r0, [sp]
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: bitcast_from_v8i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #8
+; CHECK-LE-NEXT: sub sp, #8
+; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
+; CHECK-LE-NEXT: movs r0, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r2, r1, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #2, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #3, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #4, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #10, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #5, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #12, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r0, r2, #6, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r0, r1, #7, #1
+; CHECK-LE-NEXT: uxtb r0, r0
+; CHECK-LE-NEXT: add sp, #8
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: bitcast_from_v8i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #8
+; CHECK-BE-NEXT: sub sp, #8
+; CHECK-BE-NEXT: vrev64.16 q1, q0
+; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: ubfx r0, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r0, #0
+; CHECK-BE-NEXT: and r0, r1, #1
+; CHECK-BE-NEXT: rsbs r3, r0, #0
+; CHECK-BE-NEXT: movs r0, #0
+; CHECK-BE-NEXT: bfi r0, r3, #0, #1
+; CHECK-BE-NEXT: bfi r0, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #2, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #3, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #4, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #10, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #5, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #12, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #6, #1
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r0, r1, #7, #1
+; CHECK-BE-NEXT: uxtb r0, r0
+; CHECK-BE-NEXT: add sp, #8
+; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <8 x i16> %a, zeroinitializer
%b = bitcast <8 x i1> %c to i8
@@ -121,24 +343,42 @@ entry:
}
define arm_aapcs_vfpcc i16 @bitcast_from_v16i1(<16 x i8> %a) {
-; CHECK-LABEL: bitcast_from_v16i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .setfp r7, sp, #8
-; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r4, sp
-; CHECK-NEXT: bfc r4, #0, #4
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: sub.w r4, r7, #8
-; CHECK-NEXT: vcmp.i8 eq, q0, zr
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vstr p0, [r0]
-; CHECK-NEXT: ldrh.w r0, [sp]
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop {r4, r6, r7, pc}
+; CHECK-LE-LABEL: bitcast_from_v16i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .save {r4, r6, r7, lr}
+; CHECK-LE-NEXT: push {r4, r6, r7, lr}
+; CHECK-LE-NEXT: .setfp r7, sp, #8
+; CHECK-LE-NEXT: add r7, sp, #8
+; CHECK-LE-NEXT: .pad #16
+; CHECK-LE-NEXT: sub sp, #16
+; CHECK-LE-NEXT: mov r4, sp
+; CHECK-LE-NEXT: bfc r4, #0, #4
+; CHECK-LE-NEXT: mov sp, r4
+; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
+; CHECK-LE-NEXT: sub.w r4, r7, #8
+; CHECK-LE-NEXT: vmrs r0, p0
+; CHECK-LE-NEXT: uxth r0, r0
+; CHECK-LE-NEXT: mov sp, r4
+; CHECK-LE-NEXT: pop {r4, r6, r7, pc}
+;
+; CHECK-BE-LABEL: bitcast_from_v16i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .save {r4, r6, r7, lr}
+; CHECK-BE-NEXT: push {r4, r6, r7, lr}
+; CHECK-BE-NEXT: .setfp r7, sp, #8
+; CHECK-BE-NEXT: add r7, sp, #8
+; CHECK-BE-NEXT: .pad #16
+; CHECK-BE-NEXT: sub sp, #16
+; CHECK-BE-NEXT: mov r4, sp
+; CHECK-BE-NEXT: bfc r4, #0, #4
+; CHECK-BE-NEXT: mov sp, r4
+; CHECK-BE-NEXT: vrev64.8 q1, q0
+; CHECK-BE-NEXT: sub.w r4, r7, #8
+; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
+; CHECK-BE-NEXT: vmrs r0, p0
+; CHECK-BE-NEXT: uxth r0, r0
+; CHECK-BE-NEXT: mov sp, r4
+; CHECK-BE-NEXT: pop {r4, r6, r7, pc}
entry:
%c = icmp eq <16 x i8> %a, zeroinitializer
%b = bitcast <16 x i1> %c to i16
@@ -146,25 +386,46 @@ entry:
}
define arm_aapcs_vfpcc i2 @bitcast_from_v2i1(<2 x i64> %a) {
-; CHECK-LABEL: bitcast_from_v2i1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: vmov r0, s1
-; CHECK-NEXT: vmov r1, s0
-; CHECK-NEXT: vmov r2, s2
-; CHECK-NEXT: orrs r0, r1
-; CHECK-NEXT: vmov r1, s3
-; CHECK-NEXT: cset r0, eq
-; CHECK-NEXT: orrs r1, r2
-; CHECK-NEXT: cset r1, eq
-; CHECK-NEXT: ands r1, r1, #1
-; CHECK-NEXT: it ne
-; CHECK-NEXT: mvnne r1, #1
-; CHECK-NEXT: bfi r1, r0, #0, #1
-; CHECK-NEXT: and r0, r1, #3
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: bitcast_from_v2i1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: vmov r0, s1
+; CHECK-LE-NEXT: vmov r1, s0
+; CHECK-LE-NEXT: vmov r2, s2
+; CHECK-LE-NEXT: orrs r0, r1
+; CHECK-LE-NEXT: vmov r1, s3
+; CHECK-LE-NEXT: cset r0, eq
+; CHECK-LE-NEXT: orrs r1, r2
+; CHECK-LE-NEXT: cset r1, eq
+; CHECK-LE-NEXT: ands r1, r1, #1
+; CHECK-LE-NEXT: it ne
+; CHECK-LE-NEXT: mvnne r1, #1
+; CHECK-LE-NEXT: bfi r1, r0, #0, #1
+; CHECK-LE-NEXT: and r0, r1, #3
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: bitcast_from_v2i1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vmov r0, s6
+; CHECK-BE-NEXT: vmov r1, s7
+; CHECK-BE-NEXT: vmov r2, s5
+; CHECK-BE-NEXT: orrs r0, r1
+; CHECK-BE-NEXT: vmov r1, s4
+; CHECK-BE-NEXT: cset r0, eq
+; CHECK-BE-NEXT: orrs r1, r2
+; CHECK-BE-NEXT: cset r1, eq
+; CHECK-BE-NEXT: ands r1, r1, #1
+; CHECK-BE-NEXT: it ne
+; CHECK-BE-NEXT: mvnne r1, #1
+; CHECK-BE-NEXT: bfi r1, r0, #0, #1
+; CHECK-BE-NEXT: and r0, r1, #3
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %a, zeroinitializer
%b = bitcast <2 x i1> %c to i2
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-pred-loadstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-pred-loadstore.ll?rev=371419&r1=371418&r2=371419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-pred-loadstore.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-pred-loadstore.ll Mon Sep 9 09:35:49 2019
@@ -5,15 +5,41 @@
define arm_aapcs_vfpcc <4 x i32> @load_v4i1(<4 x i1> *%src, <4 x i32> %a) {
; CHECK-LE-LABEL: load_v4i1:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: vldr p0, [r0]
+; CHECK-LE-NEXT: ldrb r0, [r0]
+; CHECK-LE-NEXT: vmov.i8 q1, #0x0
+; CHECK-LE-NEXT: vmov.i8 q2, #0xff
+; CHECK-LE-NEXT: vmsr p0, r0
+; CHECK-LE-NEXT: vpsel q1, q2, q1
+; CHECK-LE-NEXT: vmov.u8 r0, q1[0]
+; CHECK-LE-NEXT: vmov.32 q2[0], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
+; CHECK-LE-NEXT: vmov.32 q2[1], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
+; CHECK-LE-NEXT: vmov.32 q2[2], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
+; CHECK-LE-NEXT: vmov.32 q2[3], r0
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
+; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
; CHECK-LE-NEXT: vpsel q0, q0, q1
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: load_v4i1:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: vldr p0, [r0]
+; CHECK-BE-NEXT: ldrb r0, [r0]
+; CHECK-BE-NEXT: vmov.i8 q1, #0x0
+; CHECK-BE-NEXT: vmov.i8 q2, #0xff
+; CHECK-BE-NEXT: vmsr p0, r0
+; CHECK-BE-NEXT: vpsel q1, q2, q1
+; CHECK-BE-NEXT: vmov.u8 r0, q1[0]
+; CHECK-BE-NEXT: vmov.32 q2[0], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
+; CHECK-BE-NEXT: vmov.32 q2[1], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
+; CHECK-BE-NEXT: vmov.32 q2[2], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
+; CHECK-BE-NEXT: vmov.32 q2[3], r0
; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.32 q0, q1
@@ -27,16 +53,58 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @load_v8i1(<8 x i1> *%src, <8 x i16> %a) {
; CHECK-LE-LABEL: load_v8i1:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: vldr p0, [r0]
+; CHECK-LE-NEXT: ldrb r0, [r0]
+; CHECK-LE-NEXT: vmov.i8 q1, #0x0
+; CHECK-LE-NEXT: vmov.i8 q2, #0xff
+; CHECK-LE-NEXT: vmsr p0, r0
+; CHECK-LE-NEXT: vpsel q2, q2, q1
+; CHECK-LE-NEXT: vmov.u8 r0, q2[0]
+; CHECK-LE-NEXT: vmov.16 q1[0], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[1]
+; CHECK-LE-NEXT: vmov.16 q1[1], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[2]
+; CHECK-LE-NEXT: vmov.16 q1[2], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[3]
+; CHECK-LE-NEXT: vmov.16 q1[3], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[4]
+; CHECK-LE-NEXT: vmov.16 q1[4], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[5]
+; CHECK-LE-NEXT: vmov.16 q1[5], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[6]
+; CHECK-LE-NEXT: vmov.16 q1[6], r0
+; CHECK-LE-NEXT: vmov.u8 r0, q2[7]
+; CHECK-LE-NEXT: vmov.16 q1[7], r0
+; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
; CHECK-LE-NEXT: vpsel q0, q0, q1
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: load_v8i1:
; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: ldrb r0, [r0]
+; CHECK-BE-NEXT: vmov.i8 q1, #0x0
+; CHECK-BE-NEXT: vmov.i8 q2, #0xff
+; CHECK-BE-NEXT: vmsr p0, r0
+; CHECK-BE-NEXT: vpsel q2, q2, q1
+; CHECK-BE-NEXT: vmov.u8 r0, q2[0]
+; CHECK-BE-NEXT: vmov.16 q1[0], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[1]
+; CHECK-BE-NEXT: vmov.16 q1[1], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[2]
+; CHECK-BE-NEXT: vmov.16 q1[2], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[3]
+; CHECK-BE-NEXT: vmov.16 q1[3], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[4]
+; CHECK-BE-NEXT: vmov.16 q1[4], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[5]
+; CHECK-BE-NEXT: vmov.16 q1[5], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[6]
+; CHECK-BE-NEXT: vmov.16 q1[6], r0
+; CHECK-BE-NEXT: vmov.u8 r0, q2[7]
+; CHECK-BE-NEXT: vmov.16 q1[7], r0
+; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
@@ -50,17 +118,19 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @load_v16i1(<16 x i1> *%src, <16 x i8> %a) {
; CHECK-LE-LABEL: load_v16i1:
; CHECK-LE: @ %bb.0: @ %entry
-; CHECK-LE-NEXT: vldr p0, [r0]
+; CHECK-LE-NEXT: ldrh r0, [r0]
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
+; CHECK-LE-NEXT: vmsr p0, r0
; CHECK-LE-NEXT: vpsel q0, q0, q1
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: load_v16i1:
; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: ldrh r0, [r0]
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -106,14 +176,44 @@ define arm_aapcs_vfpcc void @store_v4i1(
; CHECK-LE-LABEL: store_v4i1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r0]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: and r1, r1, #15
+; CHECK-LE-NEXT: strb r1, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: store_v4i1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r0]
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: rsb.w r12, r2, #0
+; CHECK-BE-NEXT: movs r2, #0
+; CHECK-BE-NEXT: bfi r2, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #12, #1
+; CHECK-BE-NEXT: bfi r2, r12, #1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: rsbs r1, r1, #0
+; CHECK-BE-NEXT: bfi r2, r3, #2, #1
+; CHECK-BE-NEXT: bfi r2, r1, #3, #1
+; CHECK-BE-NEXT: and r1, r2, #15
+; CHECK-BE-NEXT: strb r1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <4 x i32> %a, zeroinitializer
@@ -125,14 +225,66 @@ define arm_aapcs_vfpcc void @store_v8i1(
; CHECK-LE-LABEL: store_v8i1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r0]
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r3, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #6, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #3, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #4, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #10, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #5, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #12, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #14, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #6, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #7, #1
+; CHECK-LE-NEXT: strb r1, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: store_v8i1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r0]
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: ubfx r1, r2, #2, #1
+; CHECK-BE-NEXT: rsb.w r12, r1, #0
+; CHECK-BE-NEXT: and r1, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r1, #0
+; CHECK-BE-NEXT: movs r1, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: bfi r1, r12, #1, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #6, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #3, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #4, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #10, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #5, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #12, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #14, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #6, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #7, #1
+; CHECK-BE-NEXT: strb r1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <8 x i16> %a, zeroinitializer
@@ -144,14 +296,16 @@ define arm_aapcs_vfpcc void @store_v16i1
; CHECK-LE-LABEL: store_v16i1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
-; CHECK-LE-NEXT: vstr p0, [r0]
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: strh r1, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: store_v16i1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
-; CHECK-BE-NEXT: vstr p0, [r0]
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: strh r1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <16 x i8> %a, zeroinitializer
More information about the llvm-commits
mailing list