[llvm] r320962 - [DAGCombine] Move AND nodes to multiple load leaves

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 28 16:43:39 PST 2017


This is also responsbile for PR35763 which occurs even at O0 due to a
fast-isel abort. Does not look to be related to ANY_EXTEND.

~Craig

On Thu, Dec 28, 2017 at 4:26 PM, Craig Topper <craig.topper at gmail.com>
wrote:

> Possibly because the patch treats ANY_EXTEND like ZERO_EXTEND, but that's
> not safe.
>
> ~Craig
>
> On Thu, Dec 28, 2017 at 4:14 PM, Craig Topper <craig.topper at gmail.com>
> wrote:
>
>> I believe this may be responsbile for PR35765
>>
>> ~Craig
>>
>> On Mon, Dec 18, 2017 at 2:04 AM, Sam Parker via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>>> Author: sam_parker
>>> Date: Mon Dec 18 02:04:27 2017
>>> New Revision: 320962
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=320962&view=rev
>>> Log:
>>> [DAGCombine] Move AND nodes to multiple load leaves
>>>
>>> Search from AND nodes to find whether they can be propagated back to
>>> loads, so that the AND and load can be combined into a narrow load.
>>> We search through OR, XOR and other AND nodes and all bar one of the
>>> leaves are required to be loads or constants. The exception node then
>>> needs to be masked off meaning that the 'and' isn't removed, but the
>>> loads(s) are narrowed still.
>>>
>>> Differential Revision: https://reviews.llvm.org/D41177
>>>
>>> Modified:
>>>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>>     llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
>>>
>>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/S
>>> electionDAG/DAGCombiner.cpp?rev=320962&r1=320961&r2=320962&view=diff
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 18
>>> 02:04:27 2017
>>> @@ -505,6 +505,14 @@ namespace {
>>>      bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
>>>                             EVT &ExtVT, unsigned ShAmt = 0);
>>>
>>> +    /// Used by BackwardsPropagateMask to find suitable loads.
>>> +    bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*>
>>> &Loads,
>>> +                           SmallPtrSetImpl<SDNode*> &NodeWithConsts,
>>> +                           ConstantSDNode *Mask, SDNode
>>> *&UncombinedNode);
>>> +    /// Attempt to propagate a given AND node back to load leaves so
>>> that they
>>> +    /// can be combined into narrow loads.
>>> +    bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
>>> +
>>>      /// Helper function for MergeConsecutiveStores which merges the
>>>      /// component store chains.
>>>      SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
>>> @@ -3798,6 +3806,132 @@ bool DAGCombiner::isLegalNarrowLoad(Load
>>>    return true;
>>>  }
>>>
>>> +bool DAGCombiner::SearchForAndLoads(SDNode *N,
>>> +                                    SmallPtrSetImpl<LoadSDNode*> &Loads,
>>> +                                    SmallPtrSetImpl<SDNode*>
>>> &NodesWithConsts,
>>> +                                    ConstantSDNode *Mask,
>>> +                                    SDNode *&NodeToMask) {
>>> +  // Recursively search for the operands, looking for loads which can be
>>> +  // narrowed.
>>> +  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
>>> +    SDValue Op = N->getOperand(i);
>>> +
>>> +    if (Op.getValueType().isVector())
>>> +      return false;
>>> +
>>> +    // Some constants may need fixing up later if they are too large.
>>> +    if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
>>> +      if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
>>> +          (Mask->getAPIntValue() & C->getAPIntValue()) !=
>>> C->getAPIntValue())
>>> +        NodesWithConsts.insert(N);
>>> +      continue;
>>> +    }
>>> +
>>> +    if (!Op.hasOneUse())
>>> +      return false;
>>> +
>>> +    switch(Op.getOpcode()) {
>>> +    case ISD::LOAD: {
>>> +      auto *Load = cast<LoadSDNode>(Op);
>>> +      EVT ExtVT;
>>> +      if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
>>> +          isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
>>> +        // Only add this load if we can make it more narrow.
>>> +        if (ExtVT.bitsLT(Load->getMemoryVT()))
>>> +          Loads.insert(Load);
>>> +        continue;
>>> +      }
>>> +      return false;
>>> +    }
>>> +    case ISD::ZERO_EXTEND:
>>> +    case ISD::ANY_EXTEND:
>>> +    case ISD::AssertZext: {
>>> +      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
>>> +      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
>>> +      EVT VT = Op.getOpcode() == ISD::AssertZext ?
>>> +        cast<VTSDNode>(Op.getOperand(1))->getVT() :
>>> +        Op.getOperand(0).getValueType();
>>> +
>>> +      // We can accept extending nodes if the mask is wider or an equal
>>> +      // width to the original type.
>>> +      if (ExtVT.bitsGE(VT))
>>> +        continue;
>>> +      break;
>>> +    }
>>> +    case ISD::OR:
>>> +    case ISD::XOR:
>>> +    case ISD::AND:
>>> +      if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts,
>>> Mask,
>>> +                             NodeToMask))
>>> +        return false;
>>> +      continue;
>>> +    }
>>> +
>>> +    // Allow one node which will masked along with any loads found.
>>> +    if (NodeToMask)
>>> +      return false;
>>> +    NodeToMask = Op.getNode();
>>> +  }
>>> +  return true;
>>> +}
>>> +
>>> +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG)
>>> {
>>> +  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
>>> +  if (!Mask)
>>> +    return false;
>>> +
>>> +  if (!Mask->getAPIntValue().isMask())
>>> +    return false;
>>> +
>>> +  // No need to do anything if the and directly uses a load.
>>> +  if (isa<LoadSDNode>(N->getOperand(0)))
>>> +    return false;
>>> +
>>> +  SmallPtrSet<LoadSDNode*, 8> Loads;
>>> +  SmallPtrSet<SDNode*, 2> NodesWithConsts;
>>> +  SDNode *FixupNode = nullptr;
>>> +  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
>>> +    if (Loads.size() == 0)
>>> +      return false;
>>> +
>>> +    SDValue MaskOp = N->getOperand(1);
>>> +
>>> +    // If it exists, fixup the single node we allow in the tree that
>>> needs
>>> +    // masking.
>>> +    if (FixupNode) {
>>> +      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
>>> +                                FixupNode->getValueType(0),
>>> +                                SDValue(FixupNode, 0), MaskOp);
>>> +      DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
>>> +      DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
>>> +                             MaskOp);
>>> +    }
>>> +
>>> +    // Narrow any constants that need it.
>>> +    for (auto *LogicN : NodesWithConsts) {
>>> +      auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
>>> +      SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
>>> +                                SDValue(C, 0), MaskOp);
>>> +      DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
>>> +    }
>>> +
>>> +    // Create narrow loads.
>>> +    for (auto *Load : Loads) {
>>> +      SDValue And = DAG.getNode(ISD::AND, SDLoc(Load),
>>> Load->getValueType(0),
>>> +                                SDValue(Load, 0), MaskOp);
>>> +      DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
>>> +      DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
>>> +      SDValue NewLoad = ReduceLoadWidth(And.getNode());
>>> +      assert(NewLoad &&
>>> +             "Shouldn't be masking the load if it can't be narrowed");
>>> +      CombineTo(Load, NewLoad, NewLoad.getValue(1));
>>> +    }
>>> +    DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
>>> +    return true;
>>> +  }
>>> +  return false;
>>> +}
>>> +
>>>  SDValue DAGCombiner::visitAND(SDNode *N) {
>>>    SDValue N0 = N->getOperand(0);
>>>    SDValue N1 = N->getOperand(1);
>>> @@ -3998,6 +4132,16 @@ SDValue DAGCombiner::visitAND(SDNode *N)
>>>        return SDValue(N, 0);
>>>      }
>>>    }
>>> +
>>> +  if (Level >= AfterLegalizeTypes) {
>>> +    // Attempt to propagate the AND back up to the leaves which, if
>>> they're
>>> +    // loads, can be combined to narrow loads and the AND node can be
>>> removed.
>>> +    // Perform after legalization so that extend nodes will already be
>>> +    // combined into the loads.
>>> +    if (BackwardsPropagateMask(N, DAG)) {
>>> +      return SDValue(N, 0);
>>> +    }
>>> +  }
>>>
>>>    if (SDValue Combined = visitANDLike(N0, N1, N))
>>>      return Combined;
>>>
>>> Modified: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>> ARM/and-load-combine.ll?rev=320962&r1=320961&r2=320962&view=diff
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll (original)
>>> +++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll Mon Dec 18 02:04:27
>>> 2017
>>> @@ -5,34 +5,30 @@
>>>  ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s
>>> --check-prefix=THUMB2
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture
>>> readonly %a,
>>> +                                                    i16* nocapture
>>> readonly %b) {
>>>  ; ARM-LABEL: cmp_xor8_short_short:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> -; ARM-NEXT:    ldrh r1, [r1]
>>> -; ARM-NEXT:    eor r1, r1, r0
>>> +; ARM:         ldrb r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    teq r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_xor8_short_short:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> -; ARMEB-NEXT:    ldrh r1, [r1]
>>> -; ARMEB-NEXT:    eor r1, r1, r0
>>> +; ARMEB:        ldrb r2, [r0, #1]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #1]
>>> +; ARMEB-NEXT:    teq r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_xor8_short_short:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r0, [r0]
>>> -; THUMB1-NEXT:    ldrh r2, [r1]
>>> +; THUMB1:         ldrb r0, [r0]
>>> +; THUMB1-NEXT:    ldrb r2, [r1]
>>>  ; THUMB1-NEXT:    eors r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB0_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -40,16 +36,13 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_xor8_short_short:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> -; THUMB2-NEXT:    ldrh r1, [r1]
>>> -; THUMB2-NEXT:    eors r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrb r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>> +; THUMB2-NEXT:    teq.w r1, r2
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                    i16* nocapture
>>> readonly %b) {
>>>  entry:
>>>    %0 = load i16, i16* %a, align 2
>>>    %1 = load i16, i16* %b, align 2
>>> @@ -60,34 +53,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture
>>> readonly %a,
>>> +                                                  i32* nocapture
>>> readonly %b) {
>>>  ; ARM-LABEL: cmp_xor8_short_int:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    eor r1, r1, r0
>>> +; ARM:         ldrb r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    teq r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_xor8_short_int:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    eor r1, r1, r0
>>> +; ARMEB:         ldrb r2, [r0, #1]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #3]
>>> +; ARMEB-NEXT:    teq r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_xor8_short_int:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> +; THUMB1:         ldrb r0, [r0]
>>> +; THUMB1-NEXT:    ldrb r2, [r1]
>>>  ; THUMB1-NEXT:    eors r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB1_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -95,16 +84,13 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_xor8_short_int:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    eors r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrb r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>> +; THUMB2-NEXT:    teq.w r1, r2
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                  i32* nocapture
>>> readonly %b) {
>>>  entry:
>>>    %0 = load i16, i16* %a, align 2
>>>    %conv = zext i16 %0 to i32
>>> @@ -116,34 +102,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly
>>> %a,
>>> +                                                i32* nocapture readonly
>>> %b) {
>>>  ; ARM-LABEL: cmp_xor8_int_int:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    eor r1, r1, r0
>>> +; ARM:         ldrb r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    teq r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_xor8_int_int:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    eor r1, r1, r0
>>> +; ARMEB:         ldrb r2, [r0, #3]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #3]
>>> +; ARMEB-NEXT:    teq r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_xor8_int_int:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> +; THUMB1:         ldrb r0, [r0]
>>> +; THUMB1-NEXT:    ldrb r2, [r1]
>>>  ; THUMB1-NEXT:    eors r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB2_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -151,16 +133,13 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_xor8_int_int:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    eors r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrb r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>> +; THUMB2-NEXT:    teq.w r1, r2
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                i32* nocapture readonly
>>> %b) {
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -171,36 +150,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a,
>>> +                                         i32* nocapture readonly %b) {
>>>  ; ARM-LABEL: cmp_xor16:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    movw r2, #65535
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    eor r1, r1, r0
>>> +; ARM:         ldrh r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, r2
>>> +; ARM-NEXT:    ldrh r1, [r1]
>>> +; ARM-NEXT:    teq r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_xor16:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    movw r2, #65535
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    eor r1, r1, r0
>>> +; ARMEB:         ldrh r2, [r0, #2]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, r2
>>> +; ARMEB-NEXT:    ldrh r1, [r1, #2]
>>> +; ARMEB-NEXT:    teq r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_xor16:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> +; THUMB1:         ldrh r0, [r0]
>>> +; THUMB1-NEXT:    ldrh r2, [r1]
>>>  ; THUMB1-NEXT:    eors r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #16
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB3_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -208,16 +181,13 @@ define arm_aapcscc zeroext i1 @cmp_xor16
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_xor16:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    eors r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #16
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrh r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrh r1, [r1]
>>> +; THUMB2-NEXT:    teq.w r1, r2
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                         i32* nocapture readonly %b) {
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -228,34 +198,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture
>>> readonly %a,
>>> +                                                   i16* nocapture
>>> readonly %b) {
>>>  ; ARM-LABEL: cmp_or8_short_short:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> -; ARM-NEXT:    ldrh r1, [r1]
>>> -; ARM-NEXT:    orr r1, r1, r0
>>> +; ARM:         ldrb r0, [r0]
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    orrs r0, r1, r0
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_or8_short_short:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> -; ARMEB-NEXT:    ldrh r1, [r1]
>>> -; ARMEB-NEXT:    orr r1, r1, r0
>>> +; ARMEB:         ldrb r0, [r0, #1]
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #1]
>>> +; ARMEB-NEXT:    orrs r0, r1, r0
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_or8_short_short:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r0, [r0]
>>> -; THUMB1-NEXT:    ldrh r2, [r1]
>>> +; THUMB1:         ldrb r0, [r0]
>>> +; THUMB1-NEXT:    ldrb r2, [r1]
>>>  ; THUMB1-NEXT:    orrs r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB4_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -263,16 +229,13 @@ define arm_aapcscc zeroext i1 @cmp_or8_s
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_or8_short_short:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> -; THUMB2-NEXT:    ldrh r1, [r1]
>>> +; THUMB2:         ldrb r0, [r0]
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>>  ; THUMB2-NEXT:    orrs r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>>  ; THUMB2-NEXT:    mov.w r0, #0
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                   i16* nocapture
>>> readonly %b) {
>>>  entry:
>>>    %0 = load i16, i16* %a, align 2
>>>    %1 = load i16, i16* %b, align 2
>>> @@ -283,34 +246,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture
>>> readonly %a,
>>> +                                                 i32* nocapture
>>> readonly %b) {
>>>  ; ARM-LABEL: cmp_or8_short_int:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    orr r1, r1, r0
>>> +; ARM:         ldrb r0, [r0]
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    orrs r0, r1, r0
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_or8_short_int:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    orr r1, r1, r0
>>> +; ARMEB:         ldrb r0, [r0, #1]
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #3]
>>> +; ARMEB-NEXT:    orrs r0, r1, r0
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_or8_short_int:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> +; THUMB1:         ldrb r0, [r0]
>>> +; THUMB1-NEXT:    ldrb r2, [r1]
>>>  ; THUMB1-NEXT:    orrs r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB5_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -318,16 +277,13 @@ define arm_aapcscc zeroext i1 @cmp_or8_s
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_or8_short_int:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> +; THUMB2:         ldrb r0, [r0]
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>>  ; THUMB2-NEXT:    orrs r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>>  ; THUMB2-NEXT:    mov.w r0, #0
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                 i32* nocapture
>>> readonly %b) {
>>>  entry:
>>>    %0 = load i16, i16* %a, align 2
>>>    %conv = zext i16 %0 to i32
>>> @@ -339,34 +295,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly
>>> %a,
>>> +                                               i32* nocapture readonly
>>> %b) {
>>>  ; ARM-LABEL: cmp_or8_int_int:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    orr r1, r1, r0
>>> +; ARM:         ldrb r0, [r0]
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    orrs r0, r1, r0
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_or8_int_int:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    orr r1, r1, r0
>>> +; ARMEB:         ldrb r0, [r0, #3]
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #3]
>>> +; ARMEB-NEXT:    orrs r0, r1, r0
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_or8_int_int:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> +; THUMB1:         ldrb r0, [r0]
>>> +; THUMB1-NEXT:    ldrb r2, [r1]
>>>  ; THUMB1-NEXT:    orrs r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB6_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -374,16 +326,13 @@ define arm_aapcscc zeroext i1 @cmp_or8_i
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_or8_int_int:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> +; THUMB2:         ldrb r0, [r0]
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>>  ; THUMB2-NEXT:    orrs r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>>  ; THUMB2-NEXT:    mov.w r0, #0
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                               i32* nocapture readonly
>>> %b) {
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -394,36 +343,30 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a,
>>> +                                        i32* nocapture readonly %b) {
>>>  ; ARM-LABEL: cmp_or16:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    movw r2, #65535
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    orr r1, r1, r0
>>> +; ARM:         ldrh r0, [r0]
>>> +; ARM-NEXT:    ldrh r1, [r1]
>>> +; ARM-NEXT:    orrs r0, r1, r0
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_or16:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    movw r2, #65535
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    orr r1, r1, r0
>>> +; ARMEB:         ldrh r0, [r0, #2]
>>> +; ARMEB-NEXT:    ldrh r1, [r1, #2]
>>> +; ARMEB-NEXT:    orrs r0, r1, r0
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_or16:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> +; THUMB1:         ldrh r0, [r0]
>>> +; THUMB1-NEXT:    ldrh r2, [r1]
>>>  ; THUMB1-NEXT:    orrs r2, r0
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #16
>>> +; THUMB1-NEXT:    cmp r2, #0
>>>  ; THUMB1-NEXT:    beq .LBB7_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -431,16 +374,13 @@ define arm_aapcscc zeroext i1 @cmp_or16(
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_or16:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> +; THUMB2:         ldrh r0, [r0]
>>> +; THUMB2-NEXT:    ldrh r1, [r1]
>>>  ; THUMB2-NEXT:    orrs r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #16
>>>  ; THUMB2-NEXT:    mov.w r0, #0
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                        i32* nocapture readonly %b) {
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -451,34 +391,29 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture
>>> readonly %a,
>>> +                                                    i16* nocapture
>>> readonly %b) {
>>>  ; ARM-LABEL: cmp_and8_short_short:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r1, [r1]
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> -; ARM-NEXT:    and r1, r0, r1
>>> +; ARM:         ldrb r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    tst r2, r1
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_and8_short_short:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r1, [r1]
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> -; ARMEB-NEXT:    and r1, r0, r1
>>> +; ARMEB:         ldrb r2, [r0, #1]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #1]
>>> +; ARMEB-NEXT:    tst r2, r1
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_and8_short_short:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r1, [r1]
>>> -; THUMB1-NEXT:    ldrh r2, [r0]
>>> -; THUMB1-NEXT:    ands r2, r1
>>> +; THUMB1:         ldrb r2, [r1]
>>> +; THUMB1-NEXT:    ldrb r3, [r0]
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    tst r3, r2
>>>  ; THUMB1-NEXT:    beq .LBB8_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -486,16 +421,13 @@ define arm_aapcscc zeroext i1 @cmp_and8_
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_and8_short_short:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r1, [r1]
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> -; THUMB2-NEXT:    ands r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrb r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>> +; THUMB2-NEXT:    tst r2, r1
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                    i16* nocapture
>>> readonly %b) {
>>>  entry:
>>>    %0 = load i16, i16* %a, align 2
>>>    %1 = load i16, i16* %b, align 2
>>> @@ -506,34 +438,29 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture
>>> readonly %a,
>>> +                                                  i32* nocapture
>>> readonly %b) {
>>>  ; ARM-LABEL: cmp_and8_short_int:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    and r1, r1, r0
>>> +; ARM:         ldrb r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    tst r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_and8_short_int:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    and r1, r1, r0
>>> +; ARMEB:         ldrb r2, [r0, #1]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #3]
>>> +; ARMEB-NEXT:    tst r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_and8_short_int:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r0, [r0]
>>> -; THUMB1-NEXT:    ldr r2, [r1]
>>> -; THUMB1-NEXT:    ands r2, r0
>>> +; THUMB1:         ldrb r2, [r0]
>>> +; THUMB1-NEXT:    ldrb r3, [r1]
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    tst r3, r2
>>>  ; THUMB1-NEXT:    beq .LBB9_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -541,16 +468,13 @@ define arm_aapcscc zeroext i1 @cmp_and8_
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_and8_short_int:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    ands r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrb r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>> +; THUMB2-NEXT:    tst r1, r2
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                  i32* nocapture
>>> readonly %b) {
>>>  entry:
>>>    %0 = load i16, i16* %a, align 2
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -562,34 +486,29 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly
>>> %a,
>>> +                                                i32* nocapture readonly
>>> %b) {
>>>  ; ARM-LABEL: cmp_and8_int_int:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    and r1, r0, r1
>>> +; ARM:         ldrb r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, #255
>>> +; ARM-NEXT:    ldrb r1, [r1]
>>> +; ARM-NEXT:    tst r2, r1
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_and8_int_int:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    and r1, r0, r1
>>> +; ARMEB:         ldrb r2, [r0, #3]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, #255
>>> +; ARMEB-NEXT:    ldrb r1, [r1, #3]
>>> +; ARMEB-NEXT:    tst r2, r1
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_and8_int_int:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r1, [r1]
>>> -; THUMB1-NEXT:    ldr r2, [r0]
>>> -; THUMB1-NEXT:    ands r2, r1
>>> +; THUMB1:         ldrb r2, [r1]
>>> +; THUMB1-NEXT:    ldrb r3, [r0]
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #24
>>> +; THUMB1-NEXT:    tst r3, r2
>>>  ; THUMB1-NEXT:    beq .LBB10_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -597,16 +516,13 @@ define arm_aapcscc zeroext i1 @cmp_and8_
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_and8_int_int:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    ands r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #24
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrb r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrb r1, [r1]
>>> +; THUMB2-NEXT:    tst r2, r1
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                                i32* nocapture readonly
>>> %b) {
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -617,36 +533,29 @@ entry:
>>>  }
>>>
>>>  define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a,
>>> +                                         i32* nocapture readonly %b) {
>>>  ; ARM-LABEL: cmp_and16:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    movw r2, #65535
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    and r1, r0, r1
>>> +; ARM:         ldrh r2, [r0]
>>>  ; ARM-NEXT:    mov r0, #0
>>> -; ARM-NEXT:    tst r1, r2
>>> +; ARM-NEXT:    ldrh r1, [r1]
>>> +; ARM-NEXT:    tst r2, r1
>>>  ; ARM-NEXT:    movweq r0, #1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: cmp_and16:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    movw r2, #65535
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    and r1, r0, r1
>>> +; ARMEB:         ldrh r2, [r0, #2]
>>>  ; ARMEB-NEXT:    mov r0, #0
>>> -; ARMEB-NEXT:    tst r1, r2
>>> +; ARMEB-NEXT:    ldrh r1, [r1, #2]
>>> +; ARMEB-NEXT:    tst r2, r1
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: cmp_and16:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r1, [r1]
>>> -; THUMB1-NEXT:    ldr r2, [r0]
>>> -; THUMB1-NEXT:    ands r2, r1
>>> +; THUMB1:         ldrh r2, [r1]
>>> +; THUMB1-NEXT:    ldrh r3, [r0]
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> -; THUMB1-NEXT:    lsls r2, r2, #16
>>> +; THUMB1-NEXT:    tst r3, r2
>>>  ; THUMB1-NEXT:    beq .LBB11_2
>>>  ; THUMB1-NEXT:  @ %bb.1: @ %entry
>>>  ; THUMB1-NEXT:    mov r0, r1
>>> @@ -654,16 +563,13 @@ define arm_aapcscc zeroext i1 @cmp_and16
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: cmp_and16:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    ands r0, r1
>>> -; THUMB2-NEXT:    lsls r0, r0, #16
>>> -; THUMB2-NEXT:    mov.w r0, #0
>>> +; THUMB2:         ldrh r2, [r0]
>>> +; THUMB2-NEXT:    movs r0, #0
>>> +; THUMB2-NEXT:    ldrh r1, [r1]
>>> +; THUMB2-NEXT:    tst r2, r1
>>>  ; THUMB2-NEXT:    it eq
>>>  ; THUMB2-NEXT:    moveq r0, #1
>>>  ; THUMB2-NEXT:    bx lr
>>> -                                         i32* nocapture readonly %b) {
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>>    %1 = load i32, i32* %b, align 4
>>> @@ -675,35 +581,31 @@ entry:
>>>
>>>  define arm_aapcscc i32 @add_and16(i32* nocapture readonly %a, i32 %y,
>>> i32 %z) {
>>>  ; ARM-LABEL: add_and16:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> -; ARM-NEXT:    add r1, r1, r2
>>> +; ARM:         add r1, r1, r2
>>> +; ARM-NEXT:    ldrh r0, [r0]
>>> +; ARM-NEXT:    uxth r1, r1
>>>  ; ARM-NEXT:    orr r0, r0, r1
>>> -; ARM-NEXT:    uxth r0, r0
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: add_and16:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> -; ARMEB-NEXT:    add r1, r1, r2
>>> +; ARMEB:         add r1, r1, r2
>>> +; ARMEB-NEXT:    ldrh r0, [r0, #2]
>>> +; ARMEB-NEXT:    uxth r1, r1
>>>  ; ARMEB-NEXT:    orr r0, r0, r1
>>> -; ARMEB-NEXT:    uxth r0, r0
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: add_and16:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    adds r1, r1, r2
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> +; THUMB1:         adds r1, r1, r2
>>> +; THUMB1-NEXT:    uxth r1, r1
>>> +; THUMB1-NEXT:    ldrh r0, [r0]
>>>  ; THUMB1-NEXT:    orrs r0, r1
>>> -; THUMB1-NEXT:    uxth r0, r0
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: add_and16:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> -; THUMB2-NEXT:    add r1, r2
>>> +; THUMB2:         add r1, r2
>>> +; THUMB2-NEXT:    ldrh r0, [r0]
>>> +; THUMB2-NEXT:    uxth r1, r1
>>>  ; THUMB2-NEXT:    orrs r0, r1
>>> -; THUMB2-NEXT:    uxth r0, r0
>>>  ; THUMB2-NEXT:    bx lr
>>>  entry:
>>>    %x = load i32, i32* %a, align 4
>>> @@ -715,43 +617,39 @@ entry:
>>>
>>>  define arm_aapcscc i32 @test1(i32* %a, i32* %b, i32 %x, i32 %y) {
>>>  ; ARM-LABEL: test1:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    mul r2, r2, r3
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> +; ARM:         mul r2, r2, r3
>>> +; ARM-NEXT:    ldrh r1, [r1]
>>> +; ARM-NEXT:    ldrh r0, [r0]
>>>  ; ARM-NEXT:    eor r0, r0, r1
>>> -; ARM-NEXT:    orr r0, r0, r2
>>> -; ARM-NEXT:    uxth r0, r0
>>> +; ARM-NEXT:    uxth r1, r2
>>> +; ARM-NEXT:    orr r0, r0, r1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: test1:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    mul r2, r2, r3
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> +; ARMEB:         mul r2, r2, r3
>>> +; ARMEB-NEXT:    ldrh r1, [r1, #2]
>>> +; ARMEB-NEXT:    ldrh r0, [r0, #2]
>>>  ; ARMEB-NEXT:    eor r0, r0, r1
>>> -; ARMEB-NEXT:    orr r0, r0, r2
>>> -; ARMEB-NEXT:    uxth r0, r0
>>> +; ARMEB-NEXT:    uxth r1, r2
>>> +; ARMEB-NEXT:    orr r0, r0, r1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: test1:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> +; THUMB1:         ldrh r1, [r1]
>>> +; THUMB1-NEXT:    ldrh r4, [r0]
>>> +; THUMB1-NEXT:    eors r4, r1
>>>  ; THUMB1-NEXT:    muls r2, r3, r2
>>> -; THUMB1-NEXT:    ldr r1, [r1]
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    eors r0, r1
>>> -; THUMB1-NEXT:    orrs r0, r2
>>> -; THUMB1-NEXT:    uxth r0, r0
>>> -; THUMB1-NEXT:    bx lr
>>> +; THUMB1-NEXT:    uxth r0, r2
>>> +; THUMB1-NEXT:    orrs r0, r4
>>> +; THUMB1-NEXT:    pop
>>>  ;
>>>  ; THUMB2-LABEL: test1:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    muls r2, r3, r2
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> +; THUMB2:         ldrh r1, [r1]
>>> +; THUMB2-NEXT:    ldrh r0, [r0]
>>>  ; THUMB2-NEXT:    eors r0, r1
>>> -; THUMB2-NEXT:    orrs r0, r2
>>> -; THUMB2-NEXT:    uxth r0, r0
>>> +; THUMB2-NEXT:    mul r1, r2, r3
>>> +; THUMB2-NEXT:    uxth r1, r1
>>> +; THUMB2-NEXT:    orrs r0, r1
>>>  ; THUMB2-NEXT:    bx lr
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>> @@ -765,8 +663,7 @@ entry:
>>>
>>>  define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) {
>>>  ; ARM-LABEL: test2:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> +; ARM:         ldr r1, [r1]
>>>  ; ARM-NEXT:    ldr r0, [r0]
>>>  ; ARM-NEXT:    mul r1, r2, r1
>>>  ; ARM-NEXT:    eor r0, r0, r3
>>> @@ -775,8 +672,7 @@ define arm_aapcscc i32 @test2(i32* %a, i
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: test2:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> +; ARMEB:         ldr r1, [r1]
>>>  ; ARMEB-NEXT:    ldr r0, [r0]
>>>  ; ARMEB-NEXT:    mul r1, r2, r1
>>>  ; ARMEB-NEXT:    eor r0, r0, r3
>>> @@ -785,8 +681,7 @@ define arm_aapcscc i32 @test2(i32* %a, i
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: test2:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r1, [r1]
>>> +; THUMB1:         ldr r1, [r1]
>>>  ; THUMB1-NEXT:    muls r1, r2, r1
>>>  ; THUMB1-NEXT:    ldr r0, [r0]
>>>  ; THUMB1-NEXT:    eors r0, r3
>>> @@ -795,8 +690,7 @@ define arm_aapcscc i32 @test2(i32* %a, i
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: test2:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> +; THUMB2:         ldr r1, [r1]
>>>  ; THUMB2-NEXT:    ldr r0, [r0]
>>>  ; THUMB2-NEXT:    muls r1, r2, r1
>>>  ; THUMB2-NEXT:    eors r0, r3
>>> @@ -815,8 +709,7 @@ entry:
>>>
>>>  define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) {
>>>  ; ARM-LABEL: test3:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> +; ARM:         ldr r0, [r0]
>>>  ; ARM-NEXT:    mul r1, r2, r0
>>>  ; ARM-NEXT:    ldrh r2, [r3]
>>>  ; ARM-NEXT:    eor r0, r0, r2
>>> @@ -825,8 +718,7 @@ define arm_aapcscc i32 @test3(i32* %a, i
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: test3:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> +; ARMEB:         ldr r0, [r0]
>>>  ; ARMEB-NEXT:    mul r1, r2, r0
>>>  ; ARMEB-NEXT:    ldrh r2, [r3]
>>>  ; ARMEB-NEXT:    eor r0, r0, r2
>>> @@ -835,8 +727,7 @@ define arm_aapcscc i32 @test3(i32* %a, i
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: test3:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> +; THUMB1:         ldr r0, [r0]
>>>  ; THUMB1-NEXT:    muls r2, r0, r2
>>>  ; THUMB1-NEXT:    ldrh r1, [r3]
>>>  ; THUMB1-NEXT:    eors r1, r0
>>> @@ -845,8 +736,7 @@ define arm_aapcscc i32 @test3(i32* %a, i
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: test3:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> +; THUMB2:         ldr r0, [r0]
>>>  ; THUMB2-NEXT:    mul r1, r2, r0
>>>  ; THUMB2-NEXT:    ldrh r2, [r3]
>>>  ; THUMB2-NEXT:    eors r0, r2
>>> @@ -866,43 +756,39 @@ entry:
>>>
>>>  define arm_aapcscc i32 @test4(i32* %a, i32* %b, i32 %x, i32 %y) {
>>>  ; ARM-LABEL: test4:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    mul r2, r2, r3
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> +; ARM:         mul r2, r2, r3
>>> +; ARM-NEXT:    ldrh r1, [r1]
>>> +; ARM-NEXT:    ldrh r0, [r0]
>>>  ; ARM-NEXT:    eor r0, r0, r1
>>> -; ARM-NEXT:    orr r0, r0, r2
>>> -; ARM-NEXT:    uxth r0, r0
>>> +; ARM-NEXT:    uxth r1, r2
>>> +; ARM-NEXT:    orr r0, r0, r1
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: test4:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    mul r2, r2, r3
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> +; ARMEB:         mul r2, r2, r3
>>> +; ARMEB-NEXT:    ldrh r1, [r1, #2]
>>> +; ARMEB-NEXT:    ldrh r0, [r0, #2]
>>>  ; ARMEB-NEXT:    eor r0, r0, r1
>>> -; ARMEB-NEXT:    orr r0, r0, r2
>>> -; ARMEB-NEXT:    uxth r0, r0
>>> +; ARMEB-NEXT:    uxth r1, r2
>>> +; ARMEB-NEXT:    orr r0, r0, r1
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: test4:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> +; THUMB1:         ldrh r1, [r1]
>>> +; THUMB1-NEXT:    ldrh r4, [r0]
>>> +; THUMB1-NEXT:    eors r4, r1
>>>  ; THUMB1-NEXT:    muls r2, r3, r2
>>> -; THUMB1-NEXT:    ldr r1, [r1]
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    eors r0, r1
>>> -; THUMB1-NEXT:    orrs r0, r2
>>> -; THUMB1-NEXT:    uxth r0, r0
>>> -; THUMB1-NEXT:    bx lr
>>> +; THUMB1-NEXT:    uxth r0, r2
>>> +; THUMB1-NEXT:    orrs r0, r4
>>> +; THUMB1-NEXT:    pop
>>>  ;
>>>  ; THUMB2-LABEL: test4:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    muls r2, r3, r2
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> +; THUMB2:         ldrh r1, [r1]
>>> +; THUMB2-NEXT:    ldrh r0, [r0]
>>>  ; THUMB2-NEXT:    eors r0, r1
>>> -; THUMB2-NEXT:    orrs r0, r2
>>> -; THUMB2-NEXT:    uxth r0, r0
>>> +; THUMB2-NEXT:    mul r1, r2, r3
>>> +; THUMB2-NEXT:    uxth r1, r1
>>> +; THUMB2-NEXT:    orrs r0, r1
>>>  ; THUMB2-NEXT:    bx lr
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>> @@ -916,43 +802,39 @@ entry:
>>>
>>>  define arm_aapcscc i32 @test5(i32* %a, i32* %b, i32 %x, i16 zeroext %y)
>>> {
>>>  ; ARM-LABEL: test5:
>>> -; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r1, [r1]
>>> -; ARM-NEXT:    ldr r0, [r0]
>>> +; ARM:         ldr r1, [r1]
>>> +; ARM-NEXT:    ldrh r0, [r0]
>>>  ; ARM-NEXT:    mul r1, r2, r1
>>>  ; ARM-NEXT:    eor r0, r0, r3
>>> +; ARM-NEXT:    uxth r1, r1
>>>  ; ARM-NEXT:    orr r0, r0, r1
>>> -; ARM-NEXT:    uxth r0, r0
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: test5:
>>> -; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r1, [r1]
>>> -; ARMEB-NEXT:    ldr r0, [r0]
>>> +; ARMEB:         ldr r1, [r1]
>>> +; ARMEB-NEXT:    ldrh r0, [r0, #2]
>>>  ; ARMEB-NEXT:    mul r1, r2, r1
>>>  ; ARMEB-NEXT:    eor r0, r0, r3
>>> +; ARMEB-NEXT:    uxth r1, r1
>>>  ; ARMEB-NEXT:    orr r0, r0, r1
>>> -; ARMEB-NEXT:    uxth r0, r0
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: test5:
>>> -; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r1, [r1]
>>> -; THUMB1-NEXT:    muls r1, r2, r1
>>> -; THUMB1-NEXT:    ldr r0, [r0]
>>> -; THUMB1-NEXT:    eors r0, r3
>>> -; THUMB1-NEXT:    orrs r0, r1
>>> +; THUMB1:         ldrh r4, [r0]
>>> +; THUMB1-NEXT:    eors r4, r3
>>> +; THUMB1-NEXT:    ldr r0, [r1]
>>> +; THUMB1-NEXT:    muls r0, r2, r0
>>>  ; THUMB1-NEXT:    uxth r0, r0
>>> -; THUMB1-NEXT:    bx lr
>>> +; THUMB1-NEXT:    orrs r0, r4
>>> +; THUMB1-NEXT:    pop
>>>  ;
>>>  ; THUMB2-LABEL: test5:
>>> -; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r1, [r1]
>>> -; THUMB2-NEXT:    ldr r0, [r0]
>>> +; THUMB2:         ldr r1, [r1]
>>> +; THUMB2-NEXT:    ldrh r0, [r0]
>>>  ; THUMB2-NEXT:    muls r1, r2, r1
>>>  ; THUMB2-NEXT:    eors r0, r3
>>> +; THUMB2-NEXT:    uxth r1, r1
>>>  ; THUMB2-NEXT:    orrs r0, r1
>>> -; THUMB2-NEXT:    uxth r0, r0
>>>  ; THUMB2-NEXT:    bx lr
>>>  entry:
>>>    %0 = load i32, i32* %a, align 4
>>> @@ -1024,10 +906,9 @@ entry:
>>>  define arm_aapcscc i1 @test7(i16* %x, i16 %y, i8 %z) {
>>>  ; ARM-LABEL: test7:
>>>  ; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldrh r0, [r0]
>>> +; ARM-NEXT:    ldrb r0, [r0]
>>>  ; ARM-NEXT:    uxtb r2, r2
>>> -; ARM-NEXT:    and r0, r0, r1
>>> -; ARM-NEXT:    uxtb r1, r0
>>> +; ARM-NEXT:    and r1, r0, r1
>>>  ; ARM-NEXT:    mov r0, #0
>>>  ; ARM-NEXT:    cmp r1, r2
>>>  ; ARM-NEXT:    movweq r0, #1
>>> @@ -1035,10 +916,9 @@ define arm_aapcscc i1 @test7(i16* %x, i1
>>>  ;
>>>  ; ARMEB-LABEL: test7:
>>>  ; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldrh r0, [r0]
>>> +; ARMEB-NEXT:    ldrb r0, [r0, #1]
>>>  ; ARMEB-NEXT:    uxtb r2, r2
>>> -; ARMEB-NEXT:    and r0, r0, r1
>>> -; ARMEB-NEXT:    uxtb r1, r0
>>> +; ARMEB-NEXT:    and r1, r0, r1
>>>  ; ARMEB-NEXT:    mov r0, #0
>>>  ; ARMEB-NEXT:    cmp r1, r2
>>>  ; ARMEB-NEXT:    movweq r0, #1
>>> @@ -1046,9 +926,8 @@ define arm_aapcscc i1 @test7(i16* %x, i1
>>>  ;
>>>  ; THUMB1-LABEL: test7:
>>>  ; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldrh r0, [r0]
>>> -; THUMB1-NEXT:    ands r0, r1
>>> -; THUMB1-NEXT:    uxtb r3, r0
>>> +; THUMB1-NEXT:    ldrb r3, [r0]
>>> +; THUMB1-NEXT:    ands r3, r1
>>>  ; THUMB1-NEXT:    uxtb r2, r2
>>>  ; THUMB1-NEXT:    movs r0, #1
>>>  ; THUMB1-NEXT:    movs r1, #0
>>> @@ -1061,10 +940,9 @@ define arm_aapcscc i1 @test7(i16* %x, i1
>>>  ;
>>>  ; THUMB2-LABEL: test7:
>>>  ; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldrh r0, [r0]
>>> +; THUMB2-NEXT:    ldrb r0, [r0]
>>>  ; THUMB2-NEXT:    uxtb r2, r2
>>> -; THUMB2-NEXT:    ands r0, r1
>>> -; THUMB2-NEXT:    uxtb r1, r0
>>> +; THUMB2-NEXT:    ands r1, r0
>>>  ; THUMB2-NEXT:    movs r0, #0
>>>  ; THUMB2-NEXT:    cmp r1, r2
>>>  ; THUMB2-NEXT:    it eq
>>> @@ -1081,33 +959,30 @@ entry:
>>>  define arm_aapcscc void @test8(i32* nocapture %p) {
>>>  ; ARM-LABEL: test8:
>>>  ; ARM:       @ %bb.0: @ %entry
>>> -; ARM-NEXT:    ldr r1, [r0]
>>> -; ARM-NEXT:    mvn r1, r1
>>> -; ARM-NEXT:    uxtb r1, r1
>>> +; ARM-NEXT:    ldrb r1, [r0]
>>> +; ARM-NEXT:    eor r1, r1, #255
>>>  ; ARM-NEXT:    str r1, [r0]
>>>  ; ARM-NEXT:    bx lr
>>>  ;
>>>  ; ARMEB-LABEL: test8:
>>>  ; ARMEB:       @ %bb.0: @ %entry
>>> -; ARMEB-NEXT:    ldr r1, [r0]
>>> -; ARMEB-NEXT:    mvn r1, r1
>>> -; ARMEB-NEXT:    uxtb r1, r1
>>> +; ARMEB-NEXT:    ldrb r1, [r0, #3]
>>> +; ARMEB-NEXT:    eor r1, r1, #255
>>>  ; ARMEB-NEXT:    str r1, [r0]
>>>  ; ARMEB-NEXT:    bx lr
>>>  ;
>>>  ; THUMB1-LABEL: test8:
>>>  ; THUMB1:       @ %bb.0: @ %entry
>>> -; THUMB1-NEXT:    ldr r1, [r0]
>>> +; THUMB1-NEXT:    ldrb r1, [r0]
>>>  ; THUMB1-NEXT:    movs r2, #255
>>> -; THUMB1-NEXT:    bics r2, r1
>>> +; THUMB1-NEXT:    eors r2, r1
>>>  ; THUMB1-NEXT:    str r2, [r0]
>>>  ; THUMB1-NEXT:    bx lr
>>>  ;
>>>  ; THUMB2-LABEL: test8:
>>>  ; THUMB2:       @ %bb.0: @ %entry
>>> -; THUMB2-NEXT:    ldr r1, [r0]
>>> -; THUMB2-NEXT:    mvns r1, r1
>>> -; THUMB2-NEXT:    uxtb r1, r1
>>> +; THUMB2-NEXT:    ldrb r1, [r0]
>>> +; THUMB2-NEXT:    eor r1, r1, #255
>>>  ; THUMB2-NEXT:    str r1, [r0]
>>>  ; THUMB2-NEXT:    bx lr
>>>  entry:
>>> @@ -1117,3 +992,78 @@ entry:
>>>    store i32 %and, i32* %p, align 4
>>>    ret void
>>>  }
>>> +
>>> +define arm_aapcscc void @test9(i32* nocapture %p) {
>>> +; ARM-LABEL: test9:
>>> +; ARM:       @ %bb.0: @ %entry
>>> +; ARM-NEXT:    ldrb r1, [r0]
>>> +; ARM-NEXT:    eor r1, r1, #255
>>> +; ARM-NEXT:    str r1, [r0]
>>> +; ARM-NEXT:    bx lr
>>> +;
>>> +; ARMEB-LABEL: test9:
>>> +; ARMEB:       @ %bb.0: @ %entry
>>> +; ARMEB-NEXT:    ldrb r1, [r0, #3]
>>> +; ARMEB-NEXT:    eor r1, r1, #255
>>> +; ARMEB-NEXT:    str r1, [r0]
>>> +; ARMEB-NEXT:    bx lr
>>> +;
>>> +; THUMB1-LABEL: test9:
>>> +; THUMB1:       @ %bb.0: @ %entry
>>> +; THUMB1-NEXT:    ldrb r1, [r0]
>>> +; THUMB1-NEXT:    movs r2, #255
>>> +; THUMB1-NEXT:    eors r2, r1
>>> +; THUMB1-NEXT:    str r2, [r0]
>>> +; THUMB1-NEXT:    bx lr
>>> +;
>>> +; THUMB2-LABEL: test9:
>>> +; THUMB2:       @ %bb.0: @ %entry
>>> +; THUMB2-NEXT:    ldrb r1, [r0]
>>> +; THUMB2-NEXT:    eor r1, r1, #255
>>> +; THUMB2-NEXT:    str r1, [r0]
>>> +; THUMB2-NEXT:    bx lr
>>> +entry:
>>> +  %0 = load i32, i32* %p, align 4
>>> +  %neg = xor i32 %0, -1
>>> +  %and = and i32 %neg, 255
>>> +  store i32 %and, i32* %p, align 4
>>> +  ret void
>>> +}
>>> +
>>> +; ARM-LABEL: test10:
>>> +; ARM:       @ %bb.0: @ %entry
>>> +; ARM-NEXT:    ldrb r1, [r0]
>>> +; ARM-NEXT:    eor r1, r1, #255
>>> +; ARM-NEXT:    str r1, [r0]
>>> +; ARM-NEXT:    bx lr
>>> +;
>>> +; ARMEB-LABEL: test10:
>>> +; ARMEB:       @ %bb.0: @ %entry
>>> +; ARMEB-NEXT:    ldrb r1, [r0, #3]
>>> +; ARMEB-NEXT:    eor r1, r1, #255
>>> +; ARMEB-NEXT:    str r1, [r0]
>>> +; ARMEB-NEXT:    bx lr
>>> +;
>>> +; THUMB1-LABEL: test10:
>>> +; THUMB1:       @ %bb.0: @ %entry
>>> +; THUMB1-NEXT:    ldrb r1, [r0]
>>> +; THUMB1-NEXT:    movs r2, #255
>>> +; THUMB1-NEXT:    eors r2, r1
>>> +; THUMB1-NEXT:    str r2, [r0]
>>> +; THUMB1-NEXT:    bx lr
>>> +;
>>> +; THUMB2-LABEL: test10:
>>> +; THUMB2:       @ %bb.0: @ %entry
>>> +; THUMB2-NEXT:    ldrb r1, [r0]
>>> +; THUMB2-NEXT:    eor r1, r1, #255
>>> +; THUMB2-NEXT:    str r1, [r0]
>>> +; THUMB2-NEXT:    bx lr
>>> +define arm_aapcscc void @test10(i32* nocapture %p) {
>>> +entry:
>>> +  %0 = load i32, i32* %p, align 4
>>> +  %neg = and i32 %0, 255
>>> +  %and = xor i32 %neg, 255
>>> +  store i32 %and, i32* %p, align 4
>>> +  ret void
>>> +}
>>> +
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171228/5e2e7474/attachment-0001.html>


More information about the llvm-commits mailing list