[llvm] r320962 - [DAGCombine] Move AND nodes to multiple load leaves
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 28 16:26:12 PST 2017
Possibly because the patch treats ANY_EXTEND like ZERO_EXTEND, but that's
not safe.
~Craig
On Thu, Dec 28, 2017 at 4:14 PM, Craig Topper <craig.topper at gmail.com>
wrote:
> I believe this may be responsbile for PR35765
>
> ~Craig
>
> On Mon, Dec 18, 2017 at 2:04 AM, Sam Parker via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: sam_parker
>> Date: Mon Dec 18 02:04:27 2017
>> New Revision: 320962
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=320962&view=rev
>> Log:
>> [DAGCombine] Move AND nodes to multiple load leaves
>>
>> Search from AND nodes to find whether they can be propagated back to
>> loads, so that the AND and load can be combined into a narrow load.
>> We search through OR, XOR and other AND nodes and all bar one of the
>> leaves are required to be loads or constants. The exception node then
>> needs to be masked off meaning that the 'and' isn't removed, but the
>> loads(s) are narrowed still.
>>
>> Differential Revision: https://reviews.llvm.org/D41177
>>
>> Modified:
>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/
>> SelectionDAG/DAGCombiner.cpp?rev=320962&r1=320961&r2=320962&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 18
>> 02:04:27 2017
>> @@ -505,6 +505,14 @@ namespace {
>> bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
>> EVT &ExtVT, unsigned ShAmt = 0);
>>
>> + /// Used by BackwardsPropagateMask to find suitable loads.
>> + bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*>
>> &Loads,
>> + SmallPtrSetImpl<SDNode*> &NodeWithConsts,
>> + ConstantSDNode *Mask, SDNode
>> *&UncombinedNode);
>> + /// Attempt to propagate a given AND node back to load leaves so
>> that they
>> + /// can be combined into narrow loads.
>> + bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
>> +
>> /// Helper function for MergeConsecutiveStores which merges the
>> /// component store chains.
>> SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
>> @@ -3798,6 +3806,132 @@ bool DAGCombiner::isLegalNarrowLoad(Load
>> return true;
>> }
>>
>> +bool DAGCombiner::SearchForAndLoads(SDNode *N,
>> + SmallPtrSetImpl<LoadSDNode*> &Loads,
>> + SmallPtrSetImpl<SDNode*>
>> &NodesWithConsts,
>> + ConstantSDNode *Mask,
>> + SDNode *&NodeToMask) {
>> + // Recursively search for the operands, looking for loads which can be
>> + // narrowed.
>> + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
>> + SDValue Op = N->getOperand(i);
>> +
>> + if (Op.getValueType().isVector())
>> + return false;
>> +
>> + // Some constants may need fixing up later if they are too large.
>> + if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
>> + if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
>> + (Mask->getAPIntValue() & C->getAPIntValue()) !=
>> C->getAPIntValue())
>> + NodesWithConsts.insert(N);
>> + continue;
>> + }
>> +
>> + if (!Op.hasOneUse())
>> + return false;
>> +
>> + switch(Op.getOpcode()) {
>> + case ISD::LOAD: {
>> + auto *Load = cast<LoadSDNode>(Op);
>> + EVT ExtVT;
>> + if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
>> + isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
>> + // Only add this load if we can make it more narrow.
>> + if (ExtVT.bitsLT(Load->getMemoryVT()))
>> + Loads.insert(Load);
>> + continue;
>> + }
>> + return false;
>> + }
>> + case ISD::ZERO_EXTEND:
>> + case ISD::ANY_EXTEND:
>> + case ISD::AssertZext: {
>> + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
>> + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
>> + EVT VT = Op.getOpcode() == ISD::AssertZext ?
>> + cast<VTSDNode>(Op.getOperand(1))->getVT() :
>> + Op.getOperand(0).getValueType();
>> +
>> + // We can accept extending nodes if the mask is wider or an equal
>> + // width to the original type.
>> + if (ExtVT.bitsGE(VT))
>> + continue;
>> + break;
>> + }
>> + case ISD::OR:
>> + case ISD::XOR:
>> + case ISD::AND:
>> + if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
>> + NodeToMask))
>> + return false;
>> + continue;
>> + }
>> +
>> + // Allow one node which will masked along with any loads found.
>> + if (NodeToMask)
>> + return false;
>> + NodeToMask = Op.getNode();
>> + }
>> + return true;
>> +}
>> +
>> +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
>> + auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
>> + if (!Mask)
>> + return false;
>> +
>> + if (!Mask->getAPIntValue().isMask())
>> + return false;
>> +
>> + // No need to do anything if the and directly uses a load.
>> + if (isa<LoadSDNode>(N->getOperand(0)))
>> + return false;
>> +
>> + SmallPtrSet<LoadSDNode*, 8> Loads;
>> + SmallPtrSet<SDNode*, 2> NodesWithConsts;
>> + SDNode *FixupNode = nullptr;
>> + if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
>> + if (Loads.size() == 0)
>> + return false;
>> +
>> + SDValue MaskOp = N->getOperand(1);
>> +
>> + // If it exists, fixup the single node we allow in the tree that
>> needs
>> + // masking.
>> + if (FixupNode) {
>> + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
>> + FixupNode->getValueType(0),
>> + SDValue(FixupNode, 0), MaskOp);
>> + DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
>> + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
>> + MaskOp);
>> + }
>> +
>> + // Narrow any constants that need it.
>> + for (auto *LogicN : NodesWithConsts) {
>> + auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
>> + SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
>> + SDValue(C, 0), MaskOp);
>> + DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
>> + }
>> +
>> + // Create narrow loads.
>> + for (auto *Load : Loads) {
>> + SDValue And = DAG.getNode(ISD::AND, SDLoc(Load),
>> Load->getValueType(0),
>> + SDValue(Load, 0), MaskOp);
>> + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
>> + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
>> + SDValue NewLoad = ReduceLoadWidth(And.getNode());
>> + assert(NewLoad &&
>> + "Shouldn't be masking the load if it can't be narrowed");
>> + CombineTo(Load, NewLoad, NewLoad.getValue(1));
>> + }
>> + DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> SDValue DAGCombiner::visitAND(SDNode *N) {
>> SDValue N0 = N->getOperand(0);
>> SDValue N1 = N->getOperand(1);
>> @@ -3998,6 +4132,16 @@ SDValue DAGCombiner::visitAND(SDNode *N)
>> return SDValue(N, 0);
>> }
>> }
>> +
>> + if (Level >= AfterLegalizeTypes) {
>> + // Attempt to propagate the AND back up to the leaves which, if
>> they're
>> + // loads, can be combined to narrow loads and the AND node can be
>> removed.
>> + // Perform after legalization so that extend nodes will already be
>> + // combined into the loads.
>> + if (BackwardsPropagateMask(N, DAG)) {
>> + return SDValue(N, 0);
>> + }
>> + }
>>
>> if (SDValue Combined = visitANDLike(N0, N1, N))
>> return Combined;
>>
>> Modified: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> ARM/and-load-combine.ll?rev=320962&r1=320961&r2=320962&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll (original)
>> +++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll Mon Dec 18 02:04:27
>> 2017
>> @@ -5,34 +5,30 @@
>> ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s
>> --check-prefix=THUMB2
>>
>> define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture
>> readonly %a,
>> + i16* nocapture
>> readonly %b) {
>> ; ARM-LABEL: cmp_xor8_short_short:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r0, [r0]
>> -; ARM-NEXT: ldrh r1, [r1]
>> -; ARM-NEXT: eor r1, r1, r0
>> +; ARM: ldrb r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: teq r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_xor8_short_short:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> -; ARMEB-NEXT: ldrh r1, [r1]
>> -; ARMEB-NEXT: eor r1, r1, r0
>> +; ARMEB: ldrb r2, [r0, #1]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> +; ARMEB-NEXT: ldrb r1, [r1, #1]
>> +; ARMEB-NEXT: teq r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_xor8_short_short:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r0, [r0]
>> -; THUMB1-NEXT: ldrh r2, [r1]
>> +; THUMB1: ldrb r0, [r0]
>> +; THUMB1-NEXT: ldrb r2, [r1]
>> ; THUMB1-NEXT: eors r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB0_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -40,16 +36,13 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_xor8_short_short:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> -; THUMB2-NEXT: ldrh r1, [r1]
>> -; THUMB2-NEXT: eors r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrb r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> +; THUMB2-NEXT: teq.w r1, r2
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i16* nocapture
>> readonly %b) {
>> entry:
>> %0 = load i16, i16* %a, align 2
>> %1 = load i16, i16* %b, align 2
>> @@ -60,34 +53,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture
>> readonly %a,
>> + i32* nocapture
>> readonly %b) {
>> ; ARM-LABEL: cmp_xor8_short_int:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r0, [r0]
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: eor r1, r1, r0
>> +; ARM: ldrb r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: teq r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_xor8_short_int:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: eor r1, r1, r0
>> +; ARMEB: ldrb r2, [r0, #1]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> +; ARMEB-NEXT: ldrb r1, [r1, #3]
>> +; ARMEB-NEXT: teq r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_xor8_short_int:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> +; THUMB1: ldrb r0, [r0]
>> +; THUMB1-NEXT: ldrb r2, [r1]
>> ; THUMB1-NEXT: eors r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB1_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -95,16 +84,13 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_xor8_short_int:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: eors r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrb r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> +; THUMB2-NEXT: teq.w r1, r2
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture
>> readonly %b) {
>> entry:
>> %0 = load i16, i16* %a, align 2
>> %conv = zext i16 %0 to i32
>> @@ -116,34 +102,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly
>> %a,
>> + i32* nocapture readonly
>> %b) {
>> ; ARM-LABEL: cmp_xor8_int_int:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: eor r1, r1, r0
>> +; ARM: ldrb r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: teq r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_xor8_int_int:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: eor r1, r1, r0
>> +; ARMEB: ldrb r2, [r0, #3]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> +; ARMEB-NEXT: ldrb r1, [r1, #3]
>> +; ARMEB-NEXT: teq r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_xor8_int_int:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> +; THUMB1: ldrb r0, [r0]
>> +; THUMB1-NEXT: ldrb r2, [r1]
>> ; THUMB1-NEXT: eors r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB2_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -151,16 +133,13 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_xor8_int_int:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: eors r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrb r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> +; THUMB2-NEXT: teq.w r1, r2
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly
>> %b) {
>> entry:
>> %0 = load i32, i32* %a, align 4
>> %1 = load i32, i32* %b, align 4
>> @@ -171,36 +150,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a,
>> + i32* nocapture readonly %b) {
>> ; ARM-LABEL: cmp_xor16:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: movw r2, #65535
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: eor r1, r1, r0
>> +; ARM: ldrh r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, r2
>> +; ARM-NEXT: ldrh r1, [r1]
>> +; ARM-NEXT: teq r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_xor16:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: movw r2, #65535
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: eor r1, r1, r0
>> +; ARMEB: ldrh r2, [r0, #2]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, r2
>> +; ARMEB-NEXT: ldrh r1, [r1, #2]
>> +; ARMEB-NEXT: teq r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_xor16:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> +; THUMB1: ldrh r0, [r0]
>> +; THUMB1-NEXT: ldrh r2, [r1]
>> ; THUMB1-NEXT: eors r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #16
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB3_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -208,16 +181,13 @@ define arm_aapcscc zeroext i1 @cmp_xor16
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_xor16:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: eors r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #16
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrh r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrh r1, [r1]
>> +; THUMB2-NEXT: teq.w r1, r2
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly %b) {
>> entry:
>> %0 = load i32, i32* %a, align 4
>> %1 = load i32, i32* %b, align 4
>> @@ -228,34 +198,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture
>> readonly %a,
>> + i16* nocapture
>> readonly %b) {
>> ; ARM-LABEL: cmp_or8_short_short:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r0, [r0]
>> -; ARM-NEXT: ldrh r1, [r1]
>> -; ARM-NEXT: orr r1, r1, r0
>> +; ARM: ldrb r0, [r0]
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: orrs r0, r1, r0
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_or8_short_short:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> -; ARMEB-NEXT: ldrh r1, [r1]
>> -; ARMEB-NEXT: orr r1, r1, r0
>> +; ARMEB: ldrb r0, [r0, #1]
>> +; ARMEB-NEXT: ldrb r1, [r1, #1]
>> +; ARMEB-NEXT: orrs r0, r1, r0
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_or8_short_short:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r0, [r0]
>> -; THUMB1-NEXT: ldrh r2, [r1]
>> +; THUMB1: ldrb r0, [r0]
>> +; THUMB1-NEXT: ldrb r2, [r1]
>> ; THUMB1-NEXT: orrs r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB4_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -263,16 +229,13 @@ define arm_aapcscc zeroext i1 @cmp_or8_s
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_or8_short_short:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> -; THUMB2-NEXT: ldrh r1, [r1]
>> +; THUMB2: ldrb r0, [r0]
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> ; THUMB2-NEXT: orrs r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> ; THUMB2-NEXT: mov.w r0, #0
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i16* nocapture
>> readonly %b) {
>> entry:
>> %0 = load i16, i16* %a, align 2
>> %1 = load i16, i16* %b, align 2
>> @@ -283,34 +246,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly
>> %a,
>> + i32* nocapture readonly
>> %b) {
>> ; ARM-LABEL: cmp_or8_short_int:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r0, [r0]
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: orr r1, r1, r0
>> +; ARM: ldrb r0, [r0]
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: orrs r0, r1, r0
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_or8_short_int:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: orr r1, r1, r0
>> +; ARMEB: ldrb r0, [r0, #1]
>> +; ARMEB-NEXT: ldrb r1, [r1, #3]
>> +; ARMEB-NEXT: orrs r0, r1, r0
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_or8_short_int:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> +; THUMB1: ldrb r0, [r0]
>> +; THUMB1-NEXT: ldrb r2, [r1]
>> ; THUMB1-NEXT: orrs r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB5_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -318,16 +277,13 @@ define arm_aapcscc zeroext i1 @cmp_or8_s
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_or8_short_int:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> +; THUMB2: ldrb r0, [r0]
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> ; THUMB2-NEXT: orrs r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> ; THUMB2-NEXT: mov.w r0, #0
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly
>> %b) {
>> entry:
>> %0 = load i16, i16* %a, align 2
>> %conv = zext i16 %0 to i32
>> @@ -339,34 +295,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly
>> %a,
>> + i32* nocapture readonly
>> %b) {
>> ; ARM-LABEL: cmp_or8_int_int:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: orr r1, r1, r0
>> +; ARM: ldrb r0, [r0]
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: orrs r0, r1, r0
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_or8_int_int:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: orr r1, r1, r0
>> +; ARMEB: ldrb r0, [r0, #3]
>> +; ARMEB-NEXT: ldrb r1, [r1, #3]
>> +; ARMEB-NEXT: orrs r0, r1, r0
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_or8_int_int:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> +; THUMB1: ldrb r0, [r0]
>> +; THUMB1-NEXT: ldrb r2, [r1]
>> ; THUMB1-NEXT: orrs r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB6_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -374,16 +326,13 @@ define arm_aapcscc zeroext i1 @cmp_or8_i
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_or8_int_int:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> +; THUMB2: ldrb r0, [r0]
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> ; THUMB2-NEXT: orrs r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> ; THUMB2-NEXT: mov.w r0, #0
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly
>> %b) {
>> entry:
>> %0 = load i32, i32* %a, align 4
>> %1 = load i32, i32* %b, align 4
>> @@ -394,36 +343,30 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a,
>> + i32* nocapture readonly %b) {
>> ; ARM-LABEL: cmp_or16:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: movw r2, #65535
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: orr r1, r1, r0
>> +; ARM: ldrh r0, [r0]
>> +; ARM-NEXT: ldrh r1, [r1]
>> +; ARM-NEXT: orrs r0, r1, r0
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_or16:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: movw r2, #65535
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: orr r1, r1, r0
>> +; ARMEB: ldrh r0, [r0, #2]
>> +; ARMEB-NEXT: ldrh r1, [r1, #2]
>> +; ARMEB-NEXT: orrs r0, r1, r0
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_or16:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> +; THUMB1: ldrh r0, [r0]
>> +; THUMB1-NEXT: ldrh r2, [r1]
>> ; THUMB1-NEXT: orrs r2, r0
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #16
>> +; THUMB1-NEXT: cmp r2, #0
>> ; THUMB1-NEXT: beq .LBB7_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -431,16 +374,13 @@ define arm_aapcscc zeroext i1 @cmp_or16(
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_or16:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> +; THUMB2: ldrh r0, [r0]
>> +; THUMB2-NEXT: ldrh r1, [r1]
>> ; THUMB2-NEXT: orrs r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #16
>> ; THUMB2-NEXT: mov.w r0, #0
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly %b) {
>> entry:
>> %0 = load i32, i32* %a, align 4
>> %1 = load i32, i32* %b, align 4
>> @@ -451,34 +391,29 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture
>> readonly %a,
>> + i16* nocapture
>> readonly %b) {
>> ; ARM-LABEL: cmp_and8_short_short:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r1, [r1]
>> -; ARM-NEXT: ldrh r0, [r0]
>> -; ARM-NEXT: and r1, r0, r1
>> +; ARM: ldrb r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: tst r2, r1
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_and8_short_short:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r1, [r1]
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> -; ARMEB-NEXT: and r1, r0, r1
>> +; ARMEB: ldrb r2, [r0, #1]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> +; ARMEB-NEXT: ldrb r1, [r1, #1]
>> +; ARMEB-NEXT: tst r2, r1
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_and8_short_short:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r1, [r1]
>> -; THUMB1-NEXT: ldrh r2, [r0]
>> -; THUMB1-NEXT: ands r2, r1
>> +; THUMB1: ldrb r2, [r1]
>> +; THUMB1-NEXT: ldrb r3, [r0]
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: tst r3, r2
>> ; THUMB1-NEXT: beq .LBB8_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -486,16 +421,13 @@ define arm_aapcscc zeroext i1 @cmp_and8_
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_and8_short_short:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r1, [r1]
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> -; THUMB2-NEXT: ands r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrb r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> +; THUMB2-NEXT: tst r2, r1
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i16* nocapture
>> readonly %b) {
>> entry:
>> %0 = load i16, i16* %a, align 2
>> %1 = load i16, i16* %b, align 2
>> @@ -506,34 +438,29 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture
>> readonly %a,
>> + i32* nocapture
>> readonly %b) {
>> ; ARM-LABEL: cmp_and8_short_int:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r0, [r0]
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: and r1, r1, r0
>> +; ARM: ldrb r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: tst r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_and8_short_int:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: and r1, r1, r0
>> +; ARMEB: ldrb r2, [r0, #1]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> +; ARMEB-NEXT: ldrb r1, [r1, #3]
>> +; ARMEB-NEXT: tst r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_and8_short_int:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r0, [r0]
>> -; THUMB1-NEXT: ldr r2, [r1]
>> -; THUMB1-NEXT: ands r2, r0
>> +; THUMB1: ldrb r2, [r0]
>> +; THUMB1-NEXT: ldrb r3, [r1]
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: tst r3, r2
>> ; THUMB1-NEXT: beq .LBB9_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -541,16 +468,13 @@ define arm_aapcscc zeroext i1 @cmp_and8_
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_and8_short_int:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: ands r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrb r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> +; THUMB2-NEXT: tst r1, r2
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture
>> readonly %b) {
>> entry:
>> %0 = load i16, i16* %a, align 2
>> %1 = load i32, i32* %b, align 4
>> @@ -562,34 +486,29 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly
>> %a,
>> + i32* nocapture readonly
>> %b) {
>> ; ARM-LABEL: cmp_and8_int_int:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: and r1, r0, r1
>> +; ARM: ldrb r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, #255
>> +; ARM-NEXT: ldrb r1, [r1]
>> +; ARM-NEXT: tst r2, r1
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_and8_int_int:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: and r1, r0, r1
>> +; ARMEB: ldrb r2, [r0, #3]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, #255
>> +; ARMEB-NEXT: ldrb r1, [r1, #3]
>> +; ARMEB-NEXT: tst r2, r1
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_and8_int_int:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r1, [r1]
>> -; THUMB1-NEXT: ldr r2, [r0]
>> -; THUMB1-NEXT: ands r2, r1
>> +; THUMB1: ldrb r2, [r1]
>> +; THUMB1-NEXT: ldrb r3, [r0]
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #24
>> +; THUMB1-NEXT: tst r3, r2
>> ; THUMB1-NEXT: beq .LBB10_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -597,16 +516,13 @@ define arm_aapcscc zeroext i1 @cmp_and8_
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_and8_int_int:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: ands r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #24
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrb r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrb r1, [r1]
>> +; THUMB2-NEXT: tst r2, r1
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly
>> %b) {
>> entry:
>> %0 = load i32, i32* %a, align 4
>> %1 = load i32, i32* %b, align 4
>> @@ -617,36 +533,29 @@ entry:
>> }
>>
>> define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a,
>> + i32* nocapture readonly %b) {
>> ; ARM-LABEL: cmp_and16:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: movw r2, #65535
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: and r1, r0, r1
>> +; ARM: ldrh r2, [r0]
>> ; ARM-NEXT: mov r0, #0
>> -; ARM-NEXT: tst r1, r2
>> +; ARM-NEXT: ldrh r1, [r1]
>> +; ARM-NEXT: tst r2, r1
>> ; ARM-NEXT: movweq r0, #1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: cmp_and16:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: movw r2, #65535
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: and r1, r0, r1
>> +; ARMEB: ldrh r2, [r0, #2]
>> ; ARMEB-NEXT: mov r0, #0
>> -; ARMEB-NEXT: tst r1, r2
>> +; ARMEB-NEXT: ldrh r1, [r1, #2]
>> +; ARMEB-NEXT: tst r2, r1
>> ; ARMEB-NEXT: movweq r0, #1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: cmp_and16:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r1, [r1]
>> -; THUMB1-NEXT: ldr r2, [r0]
>> -; THUMB1-NEXT: ands r2, r1
>> +; THUMB1: ldrh r2, [r1]
>> +; THUMB1-NEXT: ldrh r3, [r0]
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> -; THUMB1-NEXT: lsls r2, r2, #16
>> +; THUMB1-NEXT: tst r3, r2
>> ; THUMB1-NEXT: beq .LBB11_2
>> ; THUMB1-NEXT: @ %bb.1: @ %entry
>> ; THUMB1-NEXT: mov r0, r1
>> @@ -654,16 +563,13 @@ define arm_aapcscc zeroext i1 @cmp_and16
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: cmp_and16:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: ands r0, r1
>> -; THUMB2-NEXT: lsls r0, r0, #16
>> -; THUMB2-NEXT: mov.w r0, #0
>> +; THUMB2: ldrh r2, [r0]
>> +; THUMB2-NEXT: movs r0, #0
>> +; THUMB2-NEXT: ldrh r1, [r1]
>> +; THUMB2-NEXT: tst r2, r1
>> ; THUMB2-NEXT: it eq
>> ; THUMB2-NEXT: moveq r0, #1
>> ; THUMB2-NEXT: bx lr
>> - i32* nocapture readonly %b) {
>> entry:
>> %0 = load i32, i32* %a, align 4
>> %1 = load i32, i32* %b, align 4
>> @@ -675,35 +581,31 @@ entry:
>>
>> define arm_aapcscc i32 @add_and16(i32* nocapture readonly %a, i32 %y,
>> i32 %z) {
>> ; ARM-LABEL: add_and16:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r0, [r0]
>> -; ARM-NEXT: add r1, r1, r2
>> +; ARM: add r1, r1, r2
>> +; ARM-NEXT: ldrh r0, [r0]
>> +; ARM-NEXT: uxth r1, r1
>> ; ARM-NEXT: orr r0, r0, r1
>> -; ARM-NEXT: uxth r0, r0
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: add_and16:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r0, [r0]
>> -; ARMEB-NEXT: add r1, r1, r2
>> +; ARMEB: add r1, r1, r2
>> +; ARMEB-NEXT: ldrh r0, [r0, #2]
>> +; ARMEB-NEXT: uxth r1, r1
>> ; ARMEB-NEXT: orr r0, r0, r1
>> -; ARMEB-NEXT: uxth r0, r0
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: add_and16:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: adds r1, r1, r2
>> -; THUMB1-NEXT: ldr r0, [r0]
>> +; THUMB1: adds r1, r1, r2
>> +; THUMB1-NEXT: uxth r1, r1
>> +; THUMB1-NEXT: ldrh r0, [r0]
>> ; THUMB1-NEXT: orrs r0, r1
>> -; THUMB1-NEXT: uxth r0, r0
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: add_and16:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r0, [r0]
>> -; THUMB2-NEXT: add r1, r2
>> +; THUMB2: add r1, r2
>> +; THUMB2-NEXT: ldrh r0, [r0]
>> +; THUMB2-NEXT: uxth r1, r1
>> ; THUMB2-NEXT: orrs r0, r1
>> -; THUMB2-NEXT: uxth r0, r0
>> ; THUMB2-NEXT: bx lr
>> entry:
>> %x = load i32, i32* %a, align 4
>> @@ -715,43 +617,39 @@ entry:
>>
>> define arm_aapcscc i32 @test1(i32* %a, i32* %b, i32 %x, i32 %y) {
>> ; ARM-LABEL: test1:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: mul r2, r2, r3
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: ldr r0, [r0]
>> +; ARM: mul r2, r2, r3
>> +; ARM-NEXT: ldrh r1, [r1]
>> +; ARM-NEXT: ldrh r0, [r0]
>> ; ARM-NEXT: eor r0, r0, r1
>> -; ARM-NEXT: orr r0, r0, r2
>> -; ARM-NEXT: uxth r0, r0
>> +; ARM-NEXT: uxth r1, r2
>> +; ARM-NEXT: orr r0, r0, r1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: test1:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: mul r2, r2, r3
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: ldr r0, [r0]
>> +; ARMEB: mul r2, r2, r3
>> +; ARMEB-NEXT: ldrh r1, [r1, #2]
>> +; ARMEB-NEXT: ldrh r0, [r0, #2]
>> ; ARMEB-NEXT: eor r0, r0, r1
>> -; ARMEB-NEXT: orr r0, r0, r2
>> -; ARMEB-NEXT: uxth r0, r0
>> +; ARMEB-NEXT: uxth r1, r2
>> +; ARMEB-NEXT: orr r0, r0, r1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: test1:
>> -; THUMB1: @ %bb.0: @ %entry
>> +; THUMB1: ldrh r1, [r1]
>> +; THUMB1-NEXT: ldrh r4, [r0]
>> +; THUMB1-NEXT: eors r4, r1
>> ; THUMB1-NEXT: muls r2, r3, r2
>> -; THUMB1-NEXT: ldr r1, [r1]
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: eors r0, r1
>> -; THUMB1-NEXT: orrs r0, r2
>> -; THUMB1-NEXT: uxth r0, r0
>> -; THUMB1-NEXT: bx lr
>> +; THUMB1-NEXT: uxth r0, r2
>> +; THUMB1-NEXT: orrs r0, r4
>> +; THUMB1-NEXT: pop
>> ;
>> ; THUMB2-LABEL: test1:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: muls r2, r3, r2
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: ldr r0, [r0]
>> +; THUMB2: ldrh r1, [r1]
>> +; THUMB2-NEXT: ldrh r0, [r0]
>> ; THUMB2-NEXT: eors r0, r1
>> -; THUMB2-NEXT: orrs r0, r2
>> -; THUMB2-NEXT: uxth r0, r0
>> +; THUMB2-NEXT: mul r1, r2, r3
>> +; THUMB2-NEXT: uxth r1, r1
>> +; THUMB2-NEXT: orrs r0, r1
>> ; THUMB2-NEXT: bx lr
>> entry:
>> %0 = load i32, i32* %a, align 4
>> @@ -765,8 +663,7 @@ entry:
>>
>> define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) {
>> ; ARM-LABEL: test2:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r1, [r1]
>> +; ARM: ldr r1, [r1]
>> ; ARM-NEXT: ldr r0, [r0]
>> ; ARM-NEXT: mul r1, r2, r1
>> ; ARM-NEXT: eor r0, r0, r3
>> @@ -775,8 +672,7 @@ define arm_aapcscc i32 @test2(i32* %a, i
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: test2:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r1, [r1]
>> +; ARMEB: ldr r1, [r1]
>> ; ARMEB-NEXT: ldr r0, [r0]
>> ; ARMEB-NEXT: mul r1, r2, r1
>> ; ARMEB-NEXT: eor r0, r0, r3
>> @@ -785,8 +681,7 @@ define arm_aapcscc i32 @test2(i32* %a, i
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: test2:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r1, [r1]
>> +; THUMB1: ldr r1, [r1]
>> ; THUMB1-NEXT: muls r1, r2, r1
>> ; THUMB1-NEXT: ldr r0, [r0]
>> ; THUMB1-NEXT: eors r0, r3
>> @@ -795,8 +690,7 @@ define arm_aapcscc i32 @test2(i32* %a, i
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: test2:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r1, [r1]
>> +; THUMB2: ldr r1, [r1]
>> ; THUMB2-NEXT: ldr r0, [r0]
>> ; THUMB2-NEXT: muls r1, r2, r1
>> ; THUMB2-NEXT: eors r0, r3
>> @@ -815,8 +709,7 @@ entry:
>>
>> define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) {
>> ; ARM-LABEL: test3:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r0, [r0]
>> +; ARM: ldr r0, [r0]
>> ; ARM-NEXT: mul r1, r2, r0
>> ; ARM-NEXT: ldrh r2, [r3]
>> ; ARM-NEXT: eor r0, r0, r2
>> @@ -825,8 +718,7 @@ define arm_aapcscc i32 @test3(i32* %a, i
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: test3:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r0, [r0]
>> +; ARMEB: ldr r0, [r0]
>> ; ARMEB-NEXT: mul r1, r2, r0
>> ; ARMEB-NEXT: ldrh r2, [r3]
>> ; ARMEB-NEXT: eor r0, r0, r2
>> @@ -835,8 +727,7 @@ define arm_aapcscc i32 @test3(i32* %a, i
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: test3:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r0, [r0]
>> +; THUMB1: ldr r0, [r0]
>> ; THUMB1-NEXT: muls r2, r0, r2
>> ; THUMB1-NEXT: ldrh r1, [r3]
>> ; THUMB1-NEXT: eors r1, r0
>> @@ -845,8 +736,7 @@ define arm_aapcscc i32 @test3(i32* %a, i
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: test3:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r0, [r0]
>> +; THUMB2: ldr r0, [r0]
>> ; THUMB2-NEXT: mul r1, r2, r0
>> ; THUMB2-NEXT: ldrh r2, [r3]
>> ; THUMB2-NEXT: eors r0, r2
>> @@ -866,43 +756,39 @@ entry:
>>
>> define arm_aapcscc i32 @test4(i32* %a, i32* %b, i32 %x, i32 %y) {
>> ; ARM-LABEL: test4:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: mul r2, r2, r3
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: ldr r0, [r0]
>> +; ARM: mul r2, r2, r3
>> +; ARM-NEXT: ldrh r1, [r1]
>> +; ARM-NEXT: ldrh r0, [r0]
>> ; ARM-NEXT: eor r0, r0, r1
>> -; ARM-NEXT: orr r0, r0, r2
>> -; ARM-NEXT: uxth r0, r0
>> +; ARM-NEXT: uxth r1, r2
>> +; ARM-NEXT: orr r0, r0, r1
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: test4:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: mul r2, r2, r3
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: ldr r0, [r0]
>> +; ARMEB: mul r2, r2, r3
>> +; ARMEB-NEXT: ldrh r1, [r1, #2]
>> +; ARMEB-NEXT: ldrh r0, [r0, #2]
>> ; ARMEB-NEXT: eor r0, r0, r1
>> -; ARMEB-NEXT: orr r0, r0, r2
>> -; ARMEB-NEXT: uxth r0, r0
>> +; ARMEB-NEXT: uxth r1, r2
>> +; ARMEB-NEXT: orr r0, r0, r1
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: test4:
>> -; THUMB1: @ %bb.0: @ %entry
>> +; THUMB1: ldrh r1, [r1]
>> +; THUMB1-NEXT: ldrh r4, [r0]
>> +; THUMB1-NEXT: eors r4, r1
>> ; THUMB1-NEXT: muls r2, r3, r2
>> -; THUMB1-NEXT: ldr r1, [r1]
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: eors r0, r1
>> -; THUMB1-NEXT: orrs r0, r2
>> -; THUMB1-NEXT: uxth r0, r0
>> -; THUMB1-NEXT: bx lr
>> +; THUMB1-NEXT: uxth r0, r2
>> +; THUMB1-NEXT: orrs r0, r4
>> +; THUMB1-NEXT: pop
>> ;
>> ; THUMB2-LABEL: test4:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: muls r2, r3, r2
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: ldr r0, [r0]
>> +; THUMB2: ldrh r1, [r1]
>> +; THUMB2-NEXT: ldrh r0, [r0]
>> ; THUMB2-NEXT: eors r0, r1
>> -; THUMB2-NEXT: orrs r0, r2
>> -; THUMB2-NEXT: uxth r0, r0
>> +; THUMB2-NEXT: mul r1, r2, r3
>> +; THUMB2-NEXT: uxth r1, r1
>> +; THUMB2-NEXT: orrs r0, r1
>> ; THUMB2-NEXT: bx lr
>> entry:
>> %0 = load i32, i32* %a, align 4
>> @@ -916,43 +802,39 @@ entry:
>>
>> define arm_aapcscc i32 @test5(i32* %a, i32* %b, i32 %x, i16 zeroext %y) {
>> ; ARM-LABEL: test5:
>> -; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r1, [r1]
>> -; ARM-NEXT: ldr r0, [r0]
>> +; ARM: ldr r1, [r1]
>> +; ARM-NEXT: ldrh r0, [r0]
>> ; ARM-NEXT: mul r1, r2, r1
>> ; ARM-NEXT: eor r0, r0, r3
>> +; ARM-NEXT: uxth r1, r1
>> ; ARM-NEXT: orr r0, r0, r1
>> -; ARM-NEXT: uxth r0, r0
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: test5:
>> -; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r1, [r1]
>> -; ARMEB-NEXT: ldr r0, [r0]
>> +; ARMEB: ldr r1, [r1]
>> +; ARMEB-NEXT: ldrh r0, [r0, #2]
>> ; ARMEB-NEXT: mul r1, r2, r1
>> ; ARMEB-NEXT: eor r0, r0, r3
>> +; ARMEB-NEXT: uxth r1, r1
>> ; ARMEB-NEXT: orr r0, r0, r1
>> -; ARMEB-NEXT: uxth r0, r0
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: test5:
>> -; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r1, [r1]
>> -; THUMB1-NEXT: muls r1, r2, r1
>> -; THUMB1-NEXT: ldr r0, [r0]
>> -; THUMB1-NEXT: eors r0, r3
>> -; THUMB1-NEXT: orrs r0, r1
>> +; THUMB1: ldrh r4, [r0]
>> +; THUMB1-NEXT: eors r4, r3
>> +; THUMB1-NEXT: ldr r0, [r1]
>> +; THUMB1-NEXT: muls r0, r2, r0
>> ; THUMB1-NEXT: uxth r0, r0
>> -; THUMB1-NEXT: bx lr
>> +; THUMB1-NEXT: orrs r0, r4
>> +; THUMB1-NEXT: pop
>> ;
>> ; THUMB2-LABEL: test5:
>> -; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r1, [r1]
>> -; THUMB2-NEXT: ldr r0, [r0]
>> +; THUMB2: ldr r1, [r1]
>> +; THUMB2-NEXT: ldrh r0, [r0]
>> ; THUMB2-NEXT: muls r1, r2, r1
>> ; THUMB2-NEXT: eors r0, r3
>> +; THUMB2-NEXT: uxth r1, r1
>> ; THUMB2-NEXT: orrs r0, r1
>> -; THUMB2-NEXT: uxth r0, r0
>> ; THUMB2-NEXT: bx lr
>> entry:
>> %0 = load i32, i32* %a, align 4
>> @@ -1024,10 +906,9 @@ entry:
>> define arm_aapcscc i1 @test7(i16* %x, i16 %y, i8 %z) {
>> ; ARM-LABEL: test7:
>> ; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldrh r0, [r0]
>> +; ARM-NEXT: ldrb r0, [r0]
>> ; ARM-NEXT: uxtb r2, r2
>> -; ARM-NEXT: and r0, r0, r1
>> -; ARM-NEXT: uxtb r1, r0
>> +; ARM-NEXT: and r1, r0, r1
>> ; ARM-NEXT: mov r0, #0
>> ; ARM-NEXT: cmp r1, r2
>> ; ARM-NEXT: movweq r0, #1
>> @@ -1035,10 +916,9 @@ define arm_aapcscc i1 @test7(i16* %x, i1
>> ;
>> ; ARMEB-LABEL: test7:
>> ; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldrh r0, [r0]
>> +; ARMEB-NEXT: ldrb r0, [r0, #1]
>> ; ARMEB-NEXT: uxtb r2, r2
>> -; ARMEB-NEXT: and r0, r0, r1
>> -; ARMEB-NEXT: uxtb r1, r0
>> +; ARMEB-NEXT: and r1, r0, r1
>> ; ARMEB-NEXT: mov r0, #0
>> ; ARMEB-NEXT: cmp r1, r2
>> ; ARMEB-NEXT: movweq r0, #1
>> @@ -1046,9 +926,8 @@ define arm_aapcscc i1 @test7(i16* %x, i1
>> ;
>> ; THUMB1-LABEL: test7:
>> ; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldrh r0, [r0]
>> -; THUMB1-NEXT: ands r0, r1
>> -; THUMB1-NEXT: uxtb r3, r0
>> +; THUMB1-NEXT: ldrb r3, [r0]
>> +; THUMB1-NEXT: ands r3, r1
>> ; THUMB1-NEXT: uxtb r2, r2
>> ; THUMB1-NEXT: movs r0, #1
>> ; THUMB1-NEXT: movs r1, #0
>> @@ -1061,10 +940,9 @@ define arm_aapcscc i1 @test7(i16* %x, i1
>> ;
>> ; THUMB2-LABEL: test7:
>> ; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldrh r0, [r0]
>> +; THUMB2-NEXT: ldrb r0, [r0]
>> ; THUMB2-NEXT: uxtb r2, r2
>> -; THUMB2-NEXT: ands r0, r1
>> -; THUMB2-NEXT: uxtb r1, r0
>> +; THUMB2-NEXT: ands r1, r0
>> ; THUMB2-NEXT: movs r0, #0
>> ; THUMB2-NEXT: cmp r1, r2
>> ; THUMB2-NEXT: it eq
>> @@ -1081,33 +959,30 @@ entry:
>> define arm_aapcscc void @test8(i32* nocapture %p) {
>> ; ARM-LABEL: test8:
>> ; ARM: @ %bb.0: @ %entry
>> -; ARM-NEXT: ldr r1, [r0]
>> -; ARM-NEXT: mvn r1, r1
>> -; ARM-NEXT: uxtb r1, r1
>> +; ARM-NEXT: ldrb r1, [r0]
>> +; ARM-NEXT: eor r1, r1, #255
>> ; ARM-NEXT: str r1, [r0]
>> ; ARM-NEXT: bx lr
>> ;
>> ; ARMEB-LABEL: test8:
>> ; ARMEB: @ %bb.0: @ %entry
>> -; ARMEB-NEXT: ldr r1, [r0]
>> -; ARMEB-NEXT: mvn r1, r1
>> -; ARMEB-NEXT: uxtb r1, r1
>> +; ARMEB-NEXT: ldrb r1, [r0, #3]
>> +; ARMEB-NEXT: eor r1, r1, #255
>> ; ARMEB-NEXT: str r1, [r0]
>> ; ARMEB-NEXT: bx lr
>> ;
>> ; THUMB1-LABEL: test8:
>> ; THUMB1: @ %bb.0: @ %entry
>> -; THUMB1-NEXT: ldr r1, [r0]
>> +; THUMB1-NEXT: ldrb r1, [r0]
>> ; THUMB1-NEXT: movs r2, #255
>> -; THUMB1-NEXT: bics r2, r1
>> +; THUMB1-NEXT: eors r2, r1
>> ; THUMB1-NEXT: str r2, [r0]
>> ; THUMB1-NEXT: bx lr
>> ;
>> ; THUMB2-LABEL: test8:
>> ; THUMB2: @ %bb.0: @ %entry
>> -; THUMB2-NEXT: ldr r1, [r0]
>> -; THUMB2-NEXT: mvns r1, r1
>> -; THUMB2-NEXT: uxtb r1, r1
>> +; THUMB2-NEXT: ldrb r1, [r0]
>> +; THUMB2-NEXT: eor r1, r1, #255
>> ; THUMB2-NEXT: str r1, [r0]
>> ; THUMB2-NEXT: bx lr
>> entry:
>> @@ -1117,3 +992,78 @@ entry:
>> store i32 %and, i32* %p, align 4
>> ret void
>> }
>> +
>> +define arm_aapcscc void @test9(i32* nocapture %p) {
>> +; ARM-LABEL: test9:
>> +; ARM: @ %bb.0: @ %entry
>> +; ARM-NEXT: ldrb r1, [r0]
>> +; ARM-NEXT: eor r1, r1, #255
>> +; ARM-NEXT: str r1, [r0]
>> +; ARM-NEXT: bx lr
>> +;
>> +; ARMEB-LABEL: test9:
>> +; ARMEB: @ %bb.0: @ %entry
>> +; ARMEB-NEXT: ldrb r1, [r0, #3]
>> +; ARMEB-NEXT: eor r1, r1, #255
>> +; ARMEB-NEXT: str r1, [r0]
>> +; ARMEB-NEXT: bx lr
>> +;
>> +; THUMB1-LABEL: test9:
>> +; THUMB1: @ %bb.0: @ %entry
>> +; THUMB1-NEXT: ldrb r1, [r0]
>> +; THUMB1-NEXT: movs r2, #255
>> +; THUMB1-NEXT: eors r2, r1
>> +; THUMB1-NEXT: str r2, [r0]
>> +; THUMB1-NEXT: bx lr
>> +;
>> +; THUMB2-LABEL: test9:
>> +; THUMB2: @ %bb.0: @ %entry
>> +; THUMB2-NEXT: ldrb r1, [r0]
>> +; THUMB2-NEXT: eor r1, r1, #255
>> +; THUMB2-NEXT: str r1, [r0]
>> +; THUMB2-NEXT: bx lr
>> +entry:
>> + %0 = load i32, i32* %p, align 4
>> + %neg = xor i32 %0, -1
>> + %and = and i32 %neg, 255
>> + store i32 %and, i32* %p, align 4
>> + ret void
>> +}
>> +
>> +; ARM-LABEL: test10:
>> +; ARM: @ %bb.0: @ %entry
>> +; ARM-NEXT: ldrb r1, [r0]
>> +; ARM-NEXT: eor r1, r1, #255
>> +; ARM-NEXT: str r1, [r0]
>> +; ARM-NEXT: bx lr
>> +;
>> +; ARMEB-LABEL: test10:
>> +; ARMEB: @ %bb.0: @ %entry
>> +; ARMEB-NEXT: ldrb r1, [r0, #3]
>> +; ARMEB-NEXT: eor r1, r1, #255
>> +; ARMEB-NEXT: str r1, [r0]
>> +; ARMEB-NEXT: bx lr
>> +;
>> +; THUMB1-LABEL: test10:
>> +; THUMB1: @ %bb.0: @ %entry
>> +; THUMB1-NEXT: ldrb r1, [r0]
>> +; THUMB1-NEXT: movs r2, #255
>> +; THUMB1-NEXT: eors r2, r1
>> +; THUMB1-NEXT: str r2, [r0]
>> +; THUMB1-NEXT: bx lr
>> +;
>> +; THUMB2-LABEL: test10:
>> +; THUMB2: @ %bb.0: @ %entry
>> +; THUMB2-NEXT: ldrb r1, [r0]
>> +; THUMB2-NEXT: eor r1, r1, #255
>> +; THUMB2-NEXT: str r1, [r0]
>> +; THUMB2-NEXT: bx lr
>> +define arm_aapcscc void @test10(i32* nocapture %p) {
>> +entry:
>> + %0 = load i32, i32* %p, align 4
>> + %neg = and i32 %0, 255
>> + %and = xor i32 %neg, 255
>> + store i32 %and, i32* %p, align 4
>> + ret void
>> +}
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171228/02b37795/attachment.html>
More information about the llvm-commits
mailing list