[llvm] r319867 - Revert "[DAGCombine] Move AND nodes to multiple load leaves"

Vlad Tsyrklevich via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 17:16:08 PST 2017


Author: vlad.tsyrklevich
Date: Tue Dec  5 17:16:08 2017
New Revision: 319867

URL: http://llvm.org/viewvc/llvm-project?rev=319867&view=rev
Log:
Revert "[DAGCombine] Move AND nodes to multiple load leaves"

This reverts commit r319773. It was causing some buildbots to hang, e.g.
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-android/builds/5589

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/ARM/and-load-combine.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=319867&r1=319866&r2=319867&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Dec  5 17:16:08 2017
@@ -505,14 +505,6 @@ namespace {
     bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
                            EVT &ExtVT, unsigned ShAmt = 0);
 
-    /// Used by BackwardsPropagateMask to find suitable loads.
-    bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
-                           ConstantSDNode *Mask, SDNode *&UncombinedNode);
-
-    /// Attempt to propagate a given AND node back to load leaves so that they
-    /// can be combined into narrow loads.
-    bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
-
     /// Helper function for MergeConsecutiveStores which merges the
     /// component store chains.
     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
@@ -3789,111 +3781,6 @@ bool DAGCombiner::isLegalNarrowLoad(Load
   return true;
 }
 
-bool DAGCombiner::SearchForAndLoads(SDNode *N,
-                                    SmallPtrSetImpl<LoadSDNode*> &Loads,
-                                    ConstantSDNode *Mask,
-                                    SDNode *&NodeToMask) {
-  // Recursively search for the operands, looking for loads which can be
-  // narrowed.
-  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
-    SDValue Op = N->getOperand(i);
-
-    // Constants should already be fixed up...
-    if (isa<ConstantSDNode>(Op))
-      continue;
-
-    if (!Op.hasOneUse() || Op.getValueType().isVector())
-      return false;
-
-    switch(Op.getOpcode()) {
-    case ISD::LOAD: {
-      auto *Load = cast<LoadSDNode>(Op);
-      EVT ExtVT;
-      if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
-          isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
-        Loads.insert(Load);
-        continue;
-      }
-      return false;
-    }
-    case ISD::ZERO_EXTEND:
-    case ISD::ANY_EXTEND:
-    case ISD::AssertZext: {
-      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
-      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-      EVT VT = Op.getOpcode() == ISD::AssertZext ?
-        cast<VTSDNode>(Op.getOperand(1))->getVT() :
-        Op.getOperand(0).getValueType();
-
-      // We can accept extending nodes if the mask is wider or an equal
-      // width to the original type.
-      if (ExtVT.bitsGE(VT))
-        continue;
-      break;
-    }
-    case ISD::OR:
-    case ISD::XOR:
-    case ISD::AND:
-      if (!SearchForAndLoads(Op.getNode(), Loads, Mask, NodeToMask))
-        return false;
-      continue;
-    }
-
-    // Allow one node which will masked along with any loads found.
-    if (NodeToMask)
-      return false;
-    NodeToMask = Op.getNode();
-  }
-  return true;
-}
-
-bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
-  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (!Mask)
-    return false;
-
-  if (!Mask->getAPIntValue().isMask())
-    return false;
-
-  // No need to do anything if the and directly uses a load.
-  if (isa<LoadSDNode>(N->getOperand(0)))
-    return false;
-
-  SmallPtrSet<LoadSDNode*, 8> Loads;
-  SDNode *FixupNode = nullptr;
-  if (SearchForAndLoads(N, Loads, Mask, FixupNode)) {
-    if (Loads.size() == 0)
-      return false;
-
-    SDValue MaskOp = N->getOperand(1);
-
-    // If it exists, fixup the single node we allow in the tree that needs
-    // masking.
-    if (FixupNode) {
-      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
-                                FixupNode->getValueType(0),
-                                SDValue(FixupNode, 0), MaskOp);
-      DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
-      DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
-                             MaskOp);
-    }
-
-    for (auto *Load : Loads) {
-      SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
-                                SDValue(Load, 0), MaskOp);
-      DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
-      DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
-      SDValue NewLoad = ReduceLoadWidth(And.getNode());
-      assert(NewLoad &&
-             "Shouldn't be masking the load if it can't be narrowed");
-      CombineTo(Load, NewLoad, NewLoad.getValue(1));
-    }
-    DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
-    return true;
-  }
-  return false;
-}
-
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4094,16 +3981,6 @@ SDValue DAGCombiner::visitAND(SDNode *N)
       return SDValue(N, 0);
     }
   }
-
-  if (Level >= AfterLegalizeTypes) {
-    // Attempt to propagate the AND back up to the leaves which, if they're
-    // loads, can be combined to narrow loads and the AND node can be removed.
-    // Perform after legalization so that extend nodes will already be
-    // combined into the loads.
-    if (BackwardsPropagateMask(N, DAG)) {
-      return SDValue(N, 0);
-    }
-  }
 
   if (SDValue Combined = visitANDLike(N0, N1, N))
     return Combined;

Modified: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/and-load-combine.ll?rev=319867&r1=319866&r2=319867&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll Tue Dec  5 17:16:08 2017
@@ -5,30 +5,34 @@
 ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefix=THUMB2
 
 define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a,
-                                                    i16* nocapture readonly %b) {
 ; ARM-LABEL: cmp_xor8_short_short:
-; ARM:         ldrb r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldrh r0, [r0]
+; ARM-NEXT:    ldrh r1, [r1]
+; ARM-NEXT:    eor r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    teq r1, r2
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_xor8_short_short:
-; ARMEB:        ldrb r2, [r0, #1]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrh r0, [r0]
+; ARMEB-NEXT:    ldrh r1, [r1]
+; ARMEB-NEXT:    eor r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrb r1, [r1, #1]
-; ARMEB-NEXT:    teq r1, r2
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_xor8_short_short:
-; THUMB1:         ldrb r0, [r0]
-; THUMB1-NEXT:    ldrb r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrh r0, [r0]
+; THUMB1-NEXT:    ldrh r2, [r1]
 ; THUMB1-NEXT:    eors r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB0_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -36,13 +40,16 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_xor8_short_short:
-; THUMB2:         ldrb r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrb r1, [r1]
-; THUMB2-NEXT:    teq.w r1, r2
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2-NEXT:    ldrh r1, [r1]
+; THUMB2-NEXT:    eors r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                    i16* nocapture readonly %b) {
 entry:
   %0 = load i16, i16* %a, align 2
   %1 = load i16, i16* %b, align 2
@@ -53,30 +60,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a,
-                                                  i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_xor8_short_int:
-; ARM:         ldrb r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldrh r0, [r0]
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    eor r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    teq r1, r2
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_xor8_short_int:
-; ARMEB:         ldrb r2, [r0, #1]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrh r0, [r0]
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    eor r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    teq r1, r2
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_xor8_short_int:
-; THUMB1:         ldrb r0, [r0]
-; THUMB1-NEXT:    ldrb r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrh r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
 ; THUMB1-NEXT:    eors r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB1_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -84,13 +95,16 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_xor8_short_int:
-; THUMB2:         ldrb r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrb r1, [r1]
-; THUMB2-NEXT:    teq.w r1, r2
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    eors r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                  i32* nocapture readonly %b) {
 entry:
   %0 = load i16, i16* %a, align 2
   %conv = zext i16 %0 to i32
@@ -102,30 +116,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a,
-                                                i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_xor8_int_int:
-; ARM:         ldrb r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    eor r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    teq r1, r2
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_xor8_int_int:
-; ARMEB:         ldrb r2, [r0, #3]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    eor r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    teq r1, r2
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_xor8_int_int:
-; THUMB1:         ldrb r0, [r0]
-; THUMB1-NEXT:    ldrb r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
 ; THUMB1-NEXT:    eors r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB2_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -133,13 +151,16 @@ define arm_aapcscc zeroext i1 @cmp_xor8_
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_xor8_int_int:
-; THUMB2:         ldrb r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrb r1, [r1]
-; THUMB2-NEXT:    teq.w r1, r2
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    eors r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                i32* nocapture readonly %b) {
 entry:
   %0 = load i32, i32* %a, align 4
   %1 = load i32, i32* %b, align 4
@@ -150,30 +171,36 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a,
-                                         i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_xor16:
-; ARM:         ldrh r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    movw r2, #65535
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    eor r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrh r1, [r1]
-; ARM-NEXT:    teq r1, r2
+; ARM-NEXT:    tst r1, r2
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_xor16:
-; ARMEB:         ldrh r2, [r0, #2]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    movw r2, #65535
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    eor r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrh r1, [r1, #2]
-; ARMEB-NEXT:    teq r1, r2
+; ARMEB-NEXT:    tst r1, r2
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_xor16:
-; THUMB1:         ldrh r0, [r0]
-; THUMB1-NEXT:    ldrh r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
 ; THUMB1-NEXT:    eors r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #16
 ; THUMB1-NEXT:    beq .LBB3_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -181,13 +208,16 @@ define arm_aapcscc zeroext i1 @cmp_xor16
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_xor16:
-; THUMB2:         ldrh r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrh r1, [r1]
-; THUMB2-NEXT:    teq.w r1, r2
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    eors r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #16
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                         i32* nocapture readonly %b) {
 entry:
   %0 = load i32, i32* %a, align 4
   %1 = load i32, i32* %b, align 4
@@ -198,30 +228,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a,
-                                                   i16* nocapture readonly %b) {
 ; ARM-LABEL: cmp_or8_short_short:
-; ARM:         ldrb r0, [r0]
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    orrs r0, r1, r0
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldrh r0, [r0]
+; ARM-NEXT:    ldrh r1, [r1]
+; ARM-NEXT:    orr r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_or8_short_short:
-; ARMEB:         ldrb r0, [r0, #1]
-; ARMEB-NEXT:    ldrb r1, [r1, #1]
-; ARMEB-NEXT:    orrs r0, r1, r0
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrh r0, [r0]
+; ARMEB-NEXT:    ldrh r1, [r1]
+; ARMEB-NEXT:    orr r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_or8_short_short:
-; THUMB1:         ldrb r0, [r0]
-; THUMB1-NEXT:    ldrb r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrh r0, [r0]
+; THUMB1-NEXT:    ldrh r2, [r1]
 ; THUMB1-NEXT:    orrs r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB4_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -229,13 +263,16 @@ define arm_aapcscc zeroext i1 @cmp_or8_s
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_or8_short_short:
-; THUMB2:         ldrb r0, [r0]
-; THUMB2-NEXT:    ldrb r1, [r1]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2-NEXT:    ldrh r1, [r1]
 ; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
 ; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                   i16* nocapture readonly %b) {
 entry:
   %0 = load i16, i16* %a, align 2
   %1 = load i16, i16* %b, align 2
@@ -246,30 +283,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a,
-                                                 i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_or8_short_int:
-; ARM:         ldrb r0, [r0]
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    orrs r0, r1, r0
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldrh r0, [r0]
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    orr r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_or8_short_int:
-; ARMEB:         ldrb r0, [r0, #1]
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    orrs r0, r1, r0
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrh r0, [r0]
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    orr r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_or8_short_int:
-; THUMB1:         ldrb r0, [r0]
-; THUMB1-NEXT:    ldrb r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrh r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
 ; THUMB1-NEXT:    orrs r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB5_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -277,13 +318,16 @@ define arm_aapcscc zeroext i1 @cmp_or8_s
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_or8_short_int:
-; THUMB2:         ldrb r0, [r0]
-; THUMB2-NEXT:    ldrb r1, [r1]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
 ; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
 ; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                 i32* nocapture readonly %b) {
 entry:
   %0 = load i16, i16* %a, align 2
   %conv = zext i16 %0 to i32
@@ -295,30 +339,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a,
-                                               i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_or8_int_int:
-; ARM:         ldrb r0, [r0]
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    orrs r0, r1, r0
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    orr r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_or8_int_int:
-; ARMEB:         ldrb r0, [r0, #3]
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    orrs r0, r1, r0
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    orr r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_or8_int_int:
-; THUMB1:         ldrb r0, [r0]
-; THUMB1-NEXT:    ldrb r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
 ; THUMB1-NEXT:    orrs r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB6_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -326,13 +374,16 @@ define arm_aapcscc zeroext i1 @cmp_or8_i
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_or8_int_int:
-; THUMB2:         ldrb r0, [r0]
-; THUMB2-NEXT:    ldrb r1, [r1]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
 ; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
 ; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                               i32* nocapture readonly %b) {
 entry:
   %0 = load i32, i32* %a, align 4
   %1 = load i32, i32* %b, align 4
@@ -343,30 +394,36 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a,
-                                        i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_or16:
-; ARM:         ldrh r0, [r0]
-; ARM-NEXT:    ldrh r1, [r1]
-; ARM-NEXT:    orrs r0, r1, r0
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    movw r2, #65535
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    orr r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    tst r1, r2
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_or16:
-; ARMEB:         ldrh r0, [r0, #2]
-; ARMEB-NEXT:    ldrh r1, [r1, #2]
-; ARMEB-NEXT:    orrs r0, r1, r0
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    movw r2, #65535
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    orr r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
+; ARMEB-NEXT:    tst r1, r2
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_or16:
-; THUMB1:         ldrh r0, [r0]
-; THUMB1-NEXT:    ldrh r2, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
 ; THUMB1-NEXT:    orrs r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    cmp r2, #0
+; THUMB1-NEXT:    lsls r2, r2, #16
 ; THUMB1-NEXT:    beq .LBB7_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -374,13 +431,16 @@ define arm_aapcscc zeroext i1 @cmp_or16(
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_or16:
-; THUMB2:         ldrh r0, [r0]
-; THUMB2-NEXT:    ldrh r1, [r1]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
 ; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #16
 ; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                        i32* nocapture readonly %b) {
 entry:
   %0 = load i32, i32* %a, align 4
   %1 = load i32, i32* %b, align 4
@@ -391,29 +451,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a,
-                                                    i16* nocapture readonly %b) {
 ; ARM-LABEL: cmp_and8_short_short:
-; ARM:         ldrb r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldrh r1, [r1]
+; ARM-NEXT:    ldrh r0, [r0]
+; ARM-NEXT:    and r1, r0, r1
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    tst r2, r1
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_and8_short_short:
-; ARMEB:         ldrb r2, [r0, #1]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrh r1, [r1]
+; ARMEB-NEXT:    ldrh r0, [r0]
+; ARMEB-NEXT:    and r1, r0, r1
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrb r1, [r1, #1]
-; ARMEB-NEXT:    tst r2, r1
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_and8_short_short:
-; THUMB1:         ldrb r2, [r1]
-; THUMB1-NEXT:    ldrb r3, [r0]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrh r1, [r1]
+; THUMB1-NEXT:    ldrh r2, [r0]
+; THUMB1-NEXT:    ands r2, r1
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    tst r3, r2
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB8_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -421,13 +486,16 @@ define arm_aapcscc zeroext i1 @cmp_and8_
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_and8_short_short:
-; THUMB2:         ldrb r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrb r1, [r1]
-; THUMB2-NEXT:    tst r2, r1
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldrh r1, [r1]
+; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2-NEXT:    ands r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                    i16* nocapture readonly %b) {
 entry:
   %0 = load i16, i16* %a, align 2
   %1 = load i16, i16* %b, align 2
@@ -438,29 +506,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a,
-                                                  i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_and8_short_int:
-; ARM:         ldrb r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldrh r0, [r0]
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    and r1, r1, r0
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    tst r1, r2
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_and8_short_int:
-; ARMEB:         ldrb r2, [r0, #1]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrh r0, [r0]
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    and r1, r1, r0
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    tst r1, r2
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_and8_short_int:
-; THUMB1:         ldrb r2, [r0]
-; THUMB1-NEXT:    ldrb r3, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrh r0, [r0]
+; THUMB1-NEXT:    ldr r2, [r1]
+; THUMB1-NEXT:    ands r2, r0
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    tst r3, r2
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB9_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -468,13 +541,16 @@ define arm_aapcscc zeroext i1 @cmp_and8_
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_and8_short_int:
-; THUMB2:         ldrb r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrb r1, [r1]
-; THUMB2-NEXT:    tst r1, r2
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    ands r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                  i32* nocapture readonly %b) {
 entry:
   %0 = load i16, i16* %a, align 2
   %1 = load i32, i32* %b, align 4
@@ -486,29 +562,34 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a,
-                                                i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_and8_int_int:
-; ARM:         ldrb r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    and r1, r0, r1
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    tst r2, r1
+; ARM-NEXT:    tst r1, #255
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_and8_int_int:
-; ARMEB:         ldrb r2, [r0, #3]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    and r1, r0, r1
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    tst r2, r1
+; ARMEB-NEXT:    tst r1, #255
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_and8_int_int:
-; THUMB1:         ldrb r2, [r1]
-; THUMB1-NEXT:    ldrb r3, [r0]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r1, [r1]
+; THUMB1-NEXT:    ldr r2, [r0]
+; THUMB1-NEXT:    ands r2, r1
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    tst r3, r2
+; THUMB1-NEXT:    lsls r2, r2, #24
 ; THUMB1-NEXT:    beq .LBB10_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -516,13 +597,16 @@ define arm_aapcscc zeroext i1 @cmp_and8_
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_and8_int_int:
-; THUMB2:         ldrb r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrb r1, [r1]
-; THUMB2-NEXT:    tst r2, r1
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    ands r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #24
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                                i32* nocapture readonly %b) {
 entry:
   %0 = load i32, i32* %a, align 4
   %1 = load i32, i32* %b, align 4
@@ -533,29 +617,36 @@ entry:
 }
 
 define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a,
-                                         i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_and16:
-; ARM:         ldrh r2, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    movw r2, #65535
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    and r1, r0, r1
 ; ARM-NEXT:    mov r0, #0
-; ARM-NEXT:    ldrh r1, [r1]
-; ARM-NEXT:    tst r2, r1
+; ARM-NEXT:    tst r1, r2
 ; ARM-NEXT:    movweq r0, #1
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_and16:
-; ARMEB:         ldrh r2, [r0, #2]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    movw r2, #65535
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    and r1, r0, r1
 ; ARMEB-NEXT:    mov r0, #0
-; ARMEB-NEXT:    ldrh r1, [r1, #2]
-; ARMEB-NEXT:    tst r2, r1
+; ARMEB-NEXT:    tst r1, r2
 ; ARMEB-NEXT:    movweq r0, #1
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_and16:
-; THUMB1:         ldrh r2, [r1]
-; THUMB1-NEXT:    ldrh r3, [r0]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r1, [r1]
+; THUMB1-NEXT:    ldr r2, [r0]
+; THUMB1-NEXT:    ands r2, r1
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
-; THUMB1-NEXT:    tst r3, r2
+; THUMB1-NEXT:    lsls r2, r2, #16
 ; THUMB1-NEXT:    beq .LBB11_2
 ; THUMB1-NEXT:  @ %bb.1: @ %entry
 ; THUMB1-NEXT:    mov r0, r1
@@ -563,13 +654,16 @@ define arm_aapcscc zeroext i1 @cmp_and16
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_and16:
-; THUMB2:         ldrh r2, [r0]
-; THUMB2-NEXT:    movs r0, #0
-; THUMB2-NEXT:    ldrh r1, [r1]
-; THUMB2-NEXT:    tst r2, r1
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    ands r0, r1
+; THUMB2-NEXT:    lsls r0, r0, #16
+; THUMB2-NEXT:    mov.w r0, #0
 ; THUMB2-NEXT:    it eq
 ; THUMB2-NEXT:    moveq r0, #1
 ; THUMB2-NEXT:    bx lr
+                                         i32* nocapture readonly %b) {
 entry:
   %0 = load i32, i32* %a, align 4
   %1 = load i32, i32* %b, align 4
@@ -581,31 +675,35 @@ entry:
 
 define arm_aapcscc i32 @add_and16(i32* nocapture readonly %a, i32 %y, i32 %z) {
 ; ARM-LABEL: add_and16:
-; ARM:         add r1, r1, r2
-; ARM-NEXT:    ldrh r0, [r0]
-; ARM-NEXT:    uxth r1, r1
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    add r1, r1, r2
 ; ARM-NEXT:    orr r0, r0, r1
+; ARM-NEXT:    uxth r0, r0
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: add_and16:
-; ARMEB:         add r1, r1, r2
-; ARMEB-NEXT:    ldrh r0, [r0, #2]
-; ARMEB-NEXT:    uxth r1, r1
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r0, [r0]
+; ARMEB-NEXT:    add r1, r1, r2
 ; ARMEB-NEXT:    orr r0, r0, r1
+; ARMEB-NEXT:    uxth r0, r0
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: add_and16:
-; THUMB1:         adds r1, r1, r2
-; THUMB1-NEXT:    uxth r1, r1
-; THUMB1-NEXT:    ldrh r0, [r0]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    adds r1, r1, r2
+; THUMB1-NEXT:    ldr r0, [r0]
 ; THUMB1-NEXT:    orrs r0, r1
+; THUMB1-NEXT:    uxth r0, r0
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: add_and16:
-; THUMB2:         add r1, r2
-; THUMB2-NEXT:    ldrh r0, [r0]
-; THUMB2-NEXT:    uxth r1, r1
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r0, [r0]
+; THUMB2-NEXT:    add r1, r2
 ; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    uxth r0, r0
 ; THUMB2-NEXT:    bx lr
 entry:
   %x = load i32, i32* %a, align 4
@@ -617,39 +715,43 @@ entry:
 
 define arm_aapcscc i32 @test1(i32* %a, i32* %b, i32 %x, i32 %y) {
 ; ARM-LABEL: test1:
-; ARM:         mul r2, r2, r3
-; ARM-NEXT:    ldrh r1, [r1]
-; ARM-NEXT:    ldrh r0, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    mul r2, r2, r3
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    ldr r0, [r0]
 ; ARM-NEXT:    eor r0, r0, r1
-; ARM-NEXT:    uxth r1, r2
-; ARM-NEXT:    orr r0, r0, r1
+; ARM-NEXT:    orr r0, r0, r2
+; ARM-NEXT:    uxth r0, r0
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: test1:
-; ARMEB:         mul r2, r2, r3
-; ARMEB-NEXT:    ldrh r1, [r1, #2]
-; ARMEB-NEXT:    ldrh r0, [r0, #2]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    mul r2, r2, r3
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    ldr r0, [r0]
 ; ARMEB-NEXT:    eor r0, r0, r1
-; ARMEB-NEXT:    uxth r1, r2
-; ARMEB-NEXT:    orr r0, r0, r1
+; ARMEB-NEXT:    orr r0, r0, r2
+; ARMEB-NEXT:    uxth r0, r0
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: test1:
-; THUMB1:         ldrh r1, [r1]
-; THUMB1-NEXT:    ldrh r4, [r0]
-; THUMB1-NEXT:    eors r4, r1
+; THUMB1:       @ %bb.0: @ %entry
 ; THUMB1-NEXT:    muls r2, r3, r2
-; THUMB1-NEXT:    uxth r0, r2
-; THUMB1-NEXT:    orrs r0, r4
-; THUMB1-NEXT:    pop
+; THUMB1-NEXT:    ldr r1, [r1]
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    eors r0, r1
+; THUMB1-NEXT:    orrs r0, r2
+; THUMB1-NEXT:    uxth r0, r0
+; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: test1:
-; THUMB2:         ldrh r1, [r1]
-; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    muls r2, r3, r2
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    ldr r0, [r0]
 ; THUMB2-NEXT:    eors r0, r1
-; THUMB2-NEXT:    mul r1, r2, r3
-; THUMB2-NEXT:    uxth r1, r1
-; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    orrs r0, r2
+; THUMB2-NEXT:    uxth r0, r0
 ; THUMB2-NEXT:    bx lr
 entry:
   %0 = load i32, i32* %a, align 4
@@ -663,7 +765,8 @@ entry:
 
 define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) {
 ; ARM-LABEL: test2:
-; ARM:         ldr r1, [r1]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r1, [r1]
 ; ARM-NEXT:    ldr r0, [r0]
 ; ARM-NEXT:    mul r1, r2, r1
 ; ARM-NEXT:    eor r0, r0, r3
@@ -672,7 +775,8 @@ define arm_aapcscc i32 @test2(i32* %a, i
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: test2:
-; ARMEB:         ldr r1, [r1]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r1, [r1]
 ; ARMEB-NEXT:    ldr r0, [r0]
 ; ARMEB-NEXT:    mul r1, r2, r1
 ; ARMEB-NEXT:    eor r0, r0, r3
@@ -681,7 +785,8 @@ define arm_aapcscc i32 @test2(i32* %a, i
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: test2:
-; THUMB1:         ldr r1, [r1]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r1, [r1]
 ; THUMB1-NEXT:    muls r1, r2, r1
 ; THUMB1-NEXT:    ldr r0, [r0]
 ; THUMB1-NEXT:    eors r0, r3
@@ -690,7 +795,8 @@ define arm_aapcscc i32 @test2(i32* %a, i
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: test2:
-; THUMB2:         ldr r1, [r1]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r1, [r1]
 ; THUMB2-NEXT:    ldr r0, [r0]
 ; THUMB2-NEXT:    muls r1, r2, r1
 ; THUMB2-NEXT:    eors r0, r3
@@ -709,7 +815,8 @@ entry:
 
 define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) {
 ; ARM-LABEL: test3:
-; ARM:         ldr r0, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r0, [r0]
 ; ARM-NEXT:    mul r1, r2, r0
 ; ARM-NEXT:    ldrh r2, [r3]
 ; ARM-NEXT:    eor r0, r0, r2
@@ -718,7 +825,8 @@ define arm_aapcscc i32 @test3(i32* %a, i
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: test3:
-; ARMEB:         ldr r0, [r0]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r0, [r0]
 ; ARMEB-NEXT:    mul r1, r2, r0
 ; ARMEB-NEXT:    ldrh r2, [r3]
 ; ARMEB-NEXT:    eor r0, r0, r2
@@ -727,7 +835,8 @@ define arm_aapcscc i32 @test3(i32* %a, i
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: test3:
-; THUMB1:         ldr r0, [r0]
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r0, [r0]
 ; THUMB1-NEXT:    muls r2, r0, r2
 ; THUMB1-NEXT:    ldrh r1, [r3]
 ; THUMB1-NEXT:    eors r1, r0
@@ -736,7 +845,8 @@ define arm_aapcscc i32 @test3(i32* %a, i
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: test3:
-; THUMB2:         ldr r0, [r0]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r0, [r0]
 ; THUMB2-NEXT:    mul r1, r2, r0
 ; THUMB2-NEXT:    ldrh r2, [r3]
 ; THUMB2-NEXT:    eors r0, r2
@@ -756,39 +866,43 @@ entry:
 
 define arm_aapcscc i32 @test4(i32* %a, i32* %b, i32 %x, i32 %y) {
 ; ARM-LABEL: test4:
-; ARM:         mul r2, r2, r3
-; ARM-NEXT:    ldrh r1, [r1]
-; ARM-NEXT:    ldrh r0, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    mul r2, r2, r3
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    ldr r0, [r0]
 ; ARM-NEXT:    eor r0, r0, r1
-; ARM-NEXT:    uxth r1, r2
-; ARM-NEXT:    orr r0, r0, r1
+; ARM-NEXT:    orr r0, r0, r2
+; ARM-NEXT:    uxth r0, r0
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: test4:
-; ARMEB:         mul r2, r2, r3
-; ARMEB-NEXT:    ldrh r1, [r1, #2]
-; ARMEB-NEXT:    ldrh r0, [r0, #2]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    mul r2, r2, r3
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    ldr r0, [r0]
 ; ARMEB-NEXT:    eor r0, r0, r1
-; ARMEB-NEXT:    uxth r1, r2
-; ARMEB-NEXT:    orr r0, r0, r1
+; ARMEB-NEXT:    orr r0, r0, r2
+; ARMEB-NEXT:    uxth r0, r0
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: test4:
-; THUMB1:         ldrh r1, [r1]
-; THUMB1-NEXT:    ldrh r4, [r0]
-; THUMB1-NEXT:    eors r4, r1
+; THUMB1:       @ %bb.0: @ %entry
 ; THUMB1-NEXT:    muls r2, r3, r2
-; THUMB1-NEXT:    uxth r0, r2
-; THUMB1-NEXT:    orrs r0, r4
-; THUMB1-NEXT:    pop
+; THUMB1-NEXT:    ldr r1, [r1]
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    eors r0, r1
+; THUMB1-NEXT:    orrs r0, r2
+; THUMB1-NEXT:    uxth r0, r0
+; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: test4:
-; THUMB2:         ldrh r1, [r1]
-; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    muls r2, r3, r2
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    ldr r0, [r0]
 ; THUMB2-NEXT:    eors r0, r1
-; THUMB2-NEXT:    mul r1, r2, r3
-; THUMB2-NEXT:    uxth r1, r1
-; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    orrs r0, r2
+; THUMB2-NEXT:    uxth r0, r0
 ; THUMB2-NEXT:    bx lr
 entry:
   %0 = load i32, i32* %a, align 4
@@ -802,39 +916,43 @@ entry:
 
 define arm_aapcscc i32 @test5(i32* %a, i32* %b, i32 %x, i16 zeroext %y) {
 ; ARM-LABEL: test5:
-; ARM:         ldr r1, [r1]
-; ARM-NEXT:    ldrh r0, [r0]
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    ldr r0, [r0]
 ; ARM-NEXT:    mul r1, r2, r1
 ; ARM-NEXT:    eor r0, r0, r3
-; ARM-NEXT:    uxth r1, r1
 ; ARM-NEXT:    orr r0, r0, r1
+; ARM-NEXT:    uxth r0, r0
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: test5:
-; ARMEB:         ldr r1, [r1]
-; ARMEB-NEXT:    ldrh r0, [r0, #2]
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    ldr r0, [r0]
 ; ARMEB-NEXT:    mul r1, r2, r1
 ; ARMEB-NEXT:    eor r0, r0, r3
-; ARMEB-NEXT:    uxth r1, r1
 ; ARMEB-NEXT:    orr r0, r0, r1
+; ARMEB-NEXT:    uxth r0, r0
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: test5:
-; THUMB1:         ldrh r4, [r0]
-; THUMB1-NEXT:    eors r4, r3
-; THUMB1-NEXT:    ldr r0, [r1]
-; THUMB1-NEXT:    muls r0, r2, r0
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldr r1, [r1]
+; THUMB1-NEXT:    muls r1, r2, r1
+; THUMB1-NEXT:    ldr r0, [r0]
+; THUMB1-NEXT:    eors r0, r3
+; THUMB1-NEXT:    orrs r0, r1
 ; THUMB1-NEXT:    uxth r0, r0
-; THUMB1-NEXT:    orrs r0, r4
-; THUMB1-NEXT:    pop
+; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: test5:
-; THUMB2:         ldr r1, [r1]
-; THUMB2-NEXT:    ldrh r0, [r0]
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    ldr r1, [r1]
+; THUMB2-NEXT:    ldr r0, [r0]
 ; THUMB2-NEXT:    muls r1, r2, r1
 ; THUMB2-NEXT:    eors r0, r3
-; THUMB2-NEXT:    uxth r1, r1
 ; THUMB2-NEXT:    orrs r0, r1
+; THUMB2-NEXT:    uxth r0, r0
 ; THUMB2-NEXT:    bx lr
 entry:
   %0 = load i32, i32* %a, align 4
@@ -846,4 +964,3 @@ entry:
   %and = and i32 %or, 65535
   ret i32 %and
 }
-




More information about the llvm-commits mailing list