[llvm] 1626ee6 - [DAGCombine] Hoist shifts out of a logic operations tree.

Filipp Zhinkin via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 12 02:42:23 PDT 2022


Author: Filipp Zhinkin
Date: 2022-08-12T12:42:16+03:00
New Revision: 1626ee6a9581be98d4a2dc251524ad6dc2725696

URL: https://github.com/llvm/llvm-project/commit/1626ee6a9581be98d4a2dc251524ad6dc2725696
DIFF: https://github.com/llvm/llvm-project/commit/1626ee6a9581be98d4a2dc251524ad6dc2725696.diff

LOG: [DAGCombine] Hoist shifts out of a logic operations tree.

Hoist and combine shift operations from logic operations tree:
logic (logic (SH x0, s), y), (logic (SH x1, s), z)  --> logic (SH (logic x0, x1), s), (logic y, z)

The transformation improves code generated for some cases related to the issue https://github.com/llvm/llvm-project/issues/49541.

Correctness:
https://alive2.llvm.org/ce/z/pVqVgY
https://alive2.llvm.org/ce/z/YVvT-q
https://alive2.llvm.org/ce/z/W5zTBq
https://alive2.llvm.org/ce/z/YfJsvJ
https://alive2.llvm.org/ce/z/3YSyDM
https://alive2.llvm.org/ce/z/Bs2kzk
https://alive2.llvm.org/ce/z/EoQpzU
https://alive2.llvm.org/ce/z/Jnc_5H
https://alive2.llvm.org/ce/z/_LP6k_
https://alive2.llvm.org/ce/z/KvZNC9

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D131189

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/ARM/icmp-shift-opt.ll
    llvm/test/CodeGen/ARM/shift-combine.ll
    llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
    llvm/test/CodeGen/X86/bswap_tree2.ll
    llvm/test/CodeGen/X86/shift-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 17451b40f7502..d396f5f130a9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6157,6 +6157,43 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
 }
 
+/// Given a tree of logic operations with shape like
+/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
+/// try to match and fold shift operations with the same shift amount.
+/// For example:
+/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
+/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
+static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
+                                     SDValue RightHand, SelectionDAG &DAG) {
+  unsigned LogicOpcode = N->getOpcode();
+  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+          LogicOpcode == ISD::XOR));
+  if (LeftHand.getOpcode() != LogicOpcode ||
+      RightHand.getOpcode() != LogicOpcode)
+    return SDValue();
+  if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
+    return SDValue();
+
+  // Try to match one of following patterns:
+  // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
+  // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
+  // Note that foldLogicOfShifts will handle commuted versions of the left hand
+  // itself.
+  SDValue CombinedShifts, W;
+  SDValue R0 = RightHand.getOperand(0);
+  SDValue R1 = RightHand.getOperand(1);
+  if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
+    W = R1;
+  else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
+    W = R0;
+  else
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -6530,6 +6567,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue V = foldAndToUsubsat(N, DAG))
       return V;
 
+  // Postpone until legalization completed to avoid interference with bswap
+  // folding
+  if (LegalOperations || VT.isVector())
+    if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+      return R;
+
   return SDValue();
 }
 
@@ -7130,6 +7173,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     if (SDValue Combined = visitADDLike(N))
       return Combined;
 
+  // Postpone until legalization completed to avoid interference with bswap
+  // folding
+  if (LegalOperations || VT.isVector())
+    if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+      return R;
+
   return SDValue();
 }
 
@@ -8614,6 +8663,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     return R;
   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
     return R;
+  if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+    return R;
 
   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
   if (SDValue MM = unfoldMaskedMerge(N))

diff  --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
index a78978f977f86..b51eb846f24b3 100644
--- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
@@ -139,15 +139,13 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i32 %a, i32 %b) nounwind {
 define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_ne_zero_i128:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsl r3, r3, #17
-; CHECK-NEXT:    orr r12, r3, r2, lsr #15
-; CHECK-NEXT:    lsl r3, r1, #17
-; CHECK-NEXT:    orr r3, r3, r0, lsr #15
+; CHECK-NEXT:    orr r3, r1, r3
 ; CHECK-NEXT:    orr r0, r2, r0
-; CHECK-NEXT:    orr r3, r3, r12
-; CHECK-NEXT:    lsl r0, r0, #17
-; CHECK-NEXT:    orr r0, r0, r1, lsr #15
-; CHECK-NEXT:    orrs r0, r0, r3
+; CHECK-NEXT:    orr r2, r0, r3
+; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    lsr r0, r0, #15
+; CHECK-NEXT:    orr r0, r0, r2, lsl #17
+; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
   %shl = shl i128 %a, 17

diff  --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll
index 3529947339b00..ea2b9dcac9059 100644
--- a/llvm/test/CodeGen/ARM/shift-combine.ll
+++ b/llvm/test/CodeGen/ARM/shift-combine.ll
@@ -900,12 +900,11 @@ define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-ARM-NEXT:    .save {r11, lr}
 ; CHECK-ARM-NEXT:    push {r11, lr}
 ; CHECK-ARM-NEXT:    ldr lr, [sp, #16]
-; CHECK-ARM-NEXT:    lsl r3, r3, #16
-; CHECK-ARM-NEXT:    ldr r12, [sp, #8]
-; CHECK-ARM-NEXT:    orr r3, r3, r2, lsr #16
 ; CHECK-ARM-NEXT:    orr r0, r0, r2, lsl #16
-; CHECK-ARM-NEXT:    orr r1, r1, lr, lsl #16
-; CHECK-ARM-NEXT:    orr r1, r1, r3
+; CHECK-ARM-NEXT:    ldr r12, [sp, #8]
+; CHECK-ARM-NEXT:    orr r3, lr, r3
+; CHECK-ARM-NEXT:    orr r1, r1, r3, lsl #16
+; CHECK-ARM-NEXT:    orr r1, r1, r2, lsr #16
 ; CHECK-ARM-NEXT:    orr r1, r1, r12
 ; CHECK-ARM-NEXT:    pop {r11, pc}
 ;
@@ -914,41 +913,38 @@ define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-BE-NEXT:    .save {r11, lr}
 ; CHECK-BE-NEXT:    push {r11, lr}
 ; CHECK-BE-NEXT:    ldr lr, [sp, #20]
-; CHECK-BE-NEXT:    lsl r2, r2, #16
-; CHECK-BE-NEXT:    ldr r12, [sp, #12]
-; CHECK-BE-NEXT:    orr r2, r2, r3, lsr #16
 ; CHECK-BE-NEXT:    orr r1, r1, r3, lsl #16
-; CHECK-BE-NEXT:    orr r0, r0, lr, lsl #16
-; CHECK-BE-NEXT:    orr r0, r0, r2
+; CHECK-BE-NEXT:    ldr r12, [sp, #12]
+; CHECK-BE-NEXT:    orr r2, lr, r2
+; CHECK-BE-NEXT:    orr r0, r0, r2, lsl #16
+; CHECK-BE-NEXT:    orr r0, r0, r3, lsr #16
 ; CHECK-BE-NEXT:    orr r0, r0, r12
 ; CHECK-BE-NEXT:    pop {r11, pc}
 ;
 ; CHECK-ALIGN-LABEL: or_tree_with_shifts_i64:
 ; CHECK-ALIGN:       @ %bb.0:
 ; CHECK-ALIGN-NEXT:    ldr.w r12, [sp, #8]
-; CHECK-ALIGN-NEXT:    lsls r3, r3, #16
-; CHECK-ALIGN-NEXT:    orr.w r3, r3, r2, lsr #16
 ; CHECK-ALIGN-NEXT:    orr.w r0, r0, r2, lsl #16
-; CHECK-ALIGN-NEXT:    orr.w r1, r1, r12, lsl #16
-; CHECK-ALIGN-NEXT:    orrs r1, r3
-; CHECK-ALIGN-NEXT:    ldr r3, [sp]
-; CHECK-ALIGN-NEXT:    orrs r1, r3
+; CHECK-ALIGN-NEXT:    orr.w r3, r3, r12
+; CHECK-ALIGN-NEXT:    orr.w r1, r1, r3, lsl #16
+; CHECK-ALIGN-NEXT:    orr.w r1, r1, r2, lsr #16
+; CHECK-ALIGN-NEXT:    ldr r2, [sp]
+; CHECK-ALIGN-NEXT:    orrs r1, r2
 ; CHECK-ALIGN-NEXT:    bx lr
 ;
 ; CHECK-V6M-LABEL: or_tree_with_shifts_i64:
 ; CHECK-V6M:       @ %bb.0:
 ; CHECK-V6M-NEXT:    push {r4, lr}
-; CHECK-V6M-NEXT:    lsrs r4, r2, #16
-; CHECK-V6M-NEXT:    lsls r3, r3, #16
-; CHECK-V6M-NEXT:    adds r3, r3, r4
+; CHECK-V6M-NEXT:    lsls r4, r2, #16
+; CHECK-V6M-NEXT:    orrs r0, r4
 ; CHECK-V6M-NEXT:    ldr r4, [sp, #16]
-; CHECK-V6M-NEXT:    lsls r4, r4, #16
-; CHECK-V6M-NEXT:    orrs r1, r4
+; CHECK-V6M-NEXT:    orrs r4, r3
+; CHECK-V6M-NEXT:    lsls r3, r4, #16
 ; CHECK-V6M-NEXT:    orrs r1, r3
-; CHECK-V6M-NEXT:    ldr r3, [sp, #8]
-; CHECK-V6M-NEXT:    orrs r1, r3
-; CHECK-V6M-NEXT:    lsls r2, r2, #16
-; CHECK-V6M-NEXT:    orrs r0, r2
+; CHECK-V6M-NEXT:    lsrs r2, r2, #16
+; CHECK-V6M-NEXT:    orrs r1, r2
+; CHECK-V6M-NEXT:    ldr r2, [sp, #8]
+; CHECK-V6M-NEXT:    orrs r1, r2
 ; CHECK-V6M-NEXT:    pop {r4, pc}
   %b.shifted = shl i64 %b, 16
   %c.shifted = shl i64 %c, 32
@@ -962,39 +958,38 @@ define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
 define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-ARM-LABEL: or_tree_with_shifts_i32:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    orr r2, r3, r2, lsl #16
-; CHECK-ARM-NEXT:    orr r0, r1, r0, lsl #16
 ; CHECK-ARM-NEXT:    orr r0, r0, r2
+; CHECK-ARM-NEXT:    orr r0, r1, r0, lsl #16
+; CHECK-ARM-NEXT:    orr r0, r0, r3
 ; CHECK-ARM-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: or_tree_with_shifts_i32:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    orr r2, r3, r2, lsl #16
-; CHECK-BE-NEXT:    orr r0, r1, r0, lsl #16
 ; CHECK-BE-NEXT:    orr r0, r0, r2
+; CHECK-BE-NEXT:    orr r0, r1, r0, lsl #16
+; CHECK-BE-NEXT:    orr r0, r0, r3
 ; CHECK-BE-NEXT:    bx lr
 ;
 ; CHECK-THUMB-LABEL: or_tree_with_shifts_i32:
 ; CHECK-THUMB:       @ %bb.0:
-; CHECK-THUMB-NEXT:    orr.w r2, r3, r2, lsl #16
-; CHECK-THUMB-NEXT:    orr.w r0, r1, r0, lsl #16
 ; CHECK-THUMB-NEXT:    orrs r0, r2
+; CHECK-THUMB-NEXT:    orr.w r0, r1, r0, lsl #16
+; CHECK-THUMB-NEXT:    orrs r0, r3
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ALIGN-LABEL: or_tree_with_shifts_i32:
 ; CHECK-ALIGN:       @ %bb.0:
-; CHECK-ALIGN-NEXT:    orr.w r2, r3, r2, lsl #16
-; CHECK-ALIGN-NEXT:    orr.w r0, r1, r0, lsl #16
 ; CHECK-ALIGN-NEXT:    orrs r0, r2
+; CHECK-ALIGN-NEXT:    orr.w r0, r1, r0, lsl #16
+; CHECK-ALIGN-NEXT:    orrs r0, r3
 ; CHECK-ALIGN-NEXT:    bx lr
 ;
 ; CHECK-V6M-LABEL: or_tree_with_shifts_i32:
 ; CHECK-V6M:       @ %bb.0:
-; CHECK-V6M-NEXT:    lsls r2, r2, #16
-; CHECK-V6M-NEXT:    orrs r2, r3
+; CHECK-V6M-NEXT:    orrs r0, r2
 ; CHECK-V6M-NEXT:    lsls r0, r0, #16
 ; CHECK-V6M-NEXT:    orrs r0, r1
-; CHECK-V6M-NEXT:    orrs r0, r2
+; CHECK-V6M-NEXT:    orrs r0, r3
 ; CHECK-V6M-NEXT:    bx lr
   %a.shifted = shl i32 %a, 16
   %c.shifted = shl i32 %c, 16
@@ -1007,39 +1002,38 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-ARM-LABEL: xor_tree_with_shifts_i32:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    eor r2, r3, r2, lsr #16
-; CHECK-ARM-NEXT:    eor r0, r1, r0, lsr #16
 ; CHECK-ARM-NEXT:    eor r0, r0, r2
+; CHECK-ARM-NEXT:    eor r0, r1, r0, lsr #16
+; CHECK-ARM-NEXT:    eor r0, r0, r3
 ; CHECK-ARM-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: xor_tree_with_shifts_i32:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    eor r2, r3, r2, lsr #16
-; CHECK-BE-NEXT:    eor r0, r1, r0, lsr #16
 ; CHECK-BE-NEXT:    eor r0, r0, r2
+; CHECK-BE-NEXT:    eor r0, r1, r0, lsr #16
+; CHECK-BE-NEXT:    eor r0, r0, r3
 ; CHECK-BE-NEXT:    bx lr
 ;
 ; CHECK-THUMB-LABEL: xor_tree_with_shifts_i32:
 ; CHECK-THUMB:       @ %bb.0:
-; CHECK-THUMB-NEXT:    eor.w r2, r3, r2, lsr #16
-; CHECK-THUMB-NEXT:    eor.w r0, r1, r0, lsr #16
 ; CHECK-THUMB-NEXT:    eors r0, r2
+; CHECK-THUMB-NEXT:    eor.w r0, r1, r0, lsr #16
+; CHECK-THUMB-NEXT:    eors r0, r3
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ALIGN-LABEL: xor_tree_with_shifts_i32:
 ; CHECK-ALIGN:       @ %bb.0:
-; CHECK-ALIGN-NEXT:    eor.w r2, r3, r2, lsr #16
-; CHECK-ALIGN-NEXT:    eor.w r0, r1, r0, lsr #16
 ; CHECK-ALIGN-NEXT:    eors r0, r2
+; CHECK-ALIGN-NEXT:    eor.w r0, r1, r0, lsr #16
+; CHECK-ALIGN-NEXT:    eors r0, r3
 ; CHECK-ALIGN-NEXT:    bx lr
 ;
 ; CHECK-V6M-LABEL: xor_tree_with_shifts_i32:
 ; CHECK-V6M:       @ %bb.0:
-; CHECK-V6M-NEXT:    lsrs r2, r2, #16
-; CHECK-V6M-NEXT:    eors r2, r3
+; CHECK-V6M-NEXT:    eors r0, r2
 ; CHECK-V6M-NEXT:    lsrs r0, r0, #16
 ; CHECK-V6M-NEXT:    eors r0, r1
-; CHECK-V6M-NEXT:    eors r0, r2
+; CHECK-V6M-NEXT:    eors r0, r3
 ; CHECK-V6M-NEXT:    bx lr
   %a.shifted = lshr i32 %a, 16
   %c.shifted = lshr i32 %c, 16
@@ -1052,39 +1046,38 @@ define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-ARM-LABEL: and_tree_with_shifts_i32:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    and r2, r3, r2, asr #16
-; CHECK-ARM-NEXT:    and r0, r1, r0, asr #16
 ; CHECK-ARM-NEXT:    and r0, r0, r2
+; CHECK-ARM-NEXT:    and r0, r1, r0, asr #16
+; CHECK-ARM-NEXT:    and r0, r0, r3
 ; CHECK-ARM-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: and_tree_with_shifts_i32:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    and r2, r3, r2, asr #16
-; CHECK-BE-NEXT:    and r0, r1, r0, asr #16
 ; CHECK-BE-NEXT:    and r0, r0, r2
+; CHECK-BE-NEXT:    and r0, r1, r0, asr #16
+; CHECK-BE-NEXT:    and r0, r0, r3
 ; CHECK-BE-NEXT:    bx lr
 ;
 ; CHECK-THUMB-LABEL: and_tree_with_shifts_i32:
 ; CHECK-THUMB:       @ %bb.0:
-; CHECK-THUMB-NEXT:    and.w r2, r3, r2, asr #16
-; CHECK-THUMB-NEXT:    and.w r0, r1, r0, asr #16
 ; CHECK-THUMB-NEXT:    ands r0, r2
+; CHECK-THUMB-NEXT:    and.w r0, r1, r0, asr #16
+; CHECK-THUMB-NEXT:    ands r0, r3
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ALIGN-LABEL: and_tree_with_shifts_i32:
 ; CHECK-ALIGN:       @ %bb.0:
-; CHECK-ALIGN-NEXT:    and.w r2, r3, r2, asr #16
-; CHECK-ALIGN-NEXT:    and.w r0, r1, r0, asr #16
 ; CHECK-ALIGN-NEXT:    ands r0, r2
+; CHECK-ALIGN-NEXT:    and.w r0, r1, r0, asr #16
+; CHECK-ALIGN-NEXT:    ands r0, r3
 ; CHECK-ALIGN-NEXT:    bx lr
 ;
 ; CHECK-V6M-LABEL: and_tree_with_shifts_i32:
 ; CHECK-V6M:       @ %bb.0:
-; CHECK-V6M-NEXT:    asrs r2, r2, #16
-; CHECK-V6M-NEXT:    ands r2, r3
+; CHECK-V6M-NEXT:    ands r0, r2
 ; CHECK-V6M-NEXT:    asrs r0, r0, #16
 ; CHECK-V6M-NEXT:    ands r0, r1
-; CHECK-V6M-NEXT:    ands r0, r2
+; CHECK-V6M-NEXT:    ands r0, r3
 ; CHECK-V6M-NEXT:    bx lr
   %a.shifted = ashr i32 %a, 16
   %c.shifted = ashr i32 %c, 16
@@ -1098,49 +1091,36 @@ define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %
 ; CHECK-ARM-LABEL: logic_tree_with_shifts_var_i32:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    ldr r12, [sp]
-; CHECK-ARM-NEXT:    orr r2, r3, r2, lsl r12
-; CHECK-ARM-NEXT:    orr r0, r1, r0, lsl r12
 ; CHECK-ARM-NEXT:    orr r0, r0, r2
+; CHECK-ARM-NEXT:    orr r0, r1, r0, lsl r12
+; CHECK-ARM-NEXT:    orr r0, r0, r3
 ; CHECK-ARM-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: logic_tree_with_shifts_var_i32:
 ; CHECK-BE:       @ %bb.0:
 ; CHECK-BE-NEXT:    ldr r12, [sp]
-; CHECK-BE-NEXT:    orr r2, r3, r2, lsl r12
-; CHECK-BE-NEXT:    orr r0, r1, r0, lsl r12
 ; CHECK-BE-NEXT:    orr r0, r0, r2
+; CHECK-BE-NEXT:    orr r0, r1, r0, lsl r12
+; CHECK-BE-NEXT:    orr r0, r0, r3
 ; CHECK-BE-NEXT:    bx lr
 ;
-; CHECK-THUMB-LABEL: logic_tree_with_shifts_var_i32:
-; CHECK-THUMB:       @ %bb.0:
-; CHECK-THUMB-NEXT:    ldr.w r12, [sp]
-; CHECK-THUMB-NEXT:    lsl.w r2, r2, r12
-; CHECK-THUMB-NEXT:    lsl.w r0, r0, r12
-; CHECK-THUMB-NEXT:    orrs r2, r3
-; CHECK-THUMB-NEXT:    orrs r0, r1
-; CHECK-THUMB-NEXT:    orrs r0, r2
-; CHECK-THUMB-NEXT:    bx lr
-;
 ; CHECK-ALIGN-LABEL: logic_tree_with_shifts_var_i32:
 ; CHECK-ALIGN:       @ %bb.0:
-; CHECK-ALIGN-NEXT:    ldr.w r12, [sp]
-; CHECK-ALIGN-NEXT:    lsl.w r2, r2, r12
-; CHECK-ALIGN-NEXT:    lsl.w r0, r0, r12
-; CHECK-ALIGN-NEXT:    orrs r2, r3
-; CHECK-ALIGN-NEXT:    orrs r0, r1
 ; CHECK-ALIGN-NEXT:    orrs r0, r2
+; CHECK-ALIGN-NEXT:    ldr r2, [sp]
+; CHECK-ALIGN-NEXT:    lsls r0, r2
+; CHECK-ALIGN-NEXT:    orrs r0, r1
+; CHECK-ALIGN-NEXT:    orrs r0, r3
 ; CHECK-ALIGN-NEXT:    bx lr
 ;
 ; CHECK-V6M-LABEL: logic_tree_with_shifts_var_i32:
 ; CHECK-V6M:       @ %bb.0:
-; CHECK-V6M-NEXT:    push {r4, lr}
-; CHECK-V6M-NEXT:    ldr r4, [sp, #8]
-; CHECK-V6M-NEXT:    lsls r2, r4
-; CHECK-V6M-NEXT:    orrs r2, r3
-; CHECK-V6M-NEXT:    lsls r0, r4
-; CHECK-V6M-NEXT:    orrs r0, r1
 ; CHECK-V6M-NEXT:    orrs r0, r2
-; CHECK-V6M-NEXT:    pop {r4, pc}
+; CHECK-V6M-NEXT:    ldr r2, [sp]
+; CHECK-V6M-NEXT:    lsls r0, r2
+; CHECK-V6M-NEXT:    orrs r0, r1
+; CHECK-V6M-NEXT:    orrs r0, r3
+; CHECK-V6M-NEXT:    bx lr
   %a.shifted = shl i32 %a, %s
   %c.shifted = shl i32 %c, %s
   %or.ab = or i32 %b, %a.shifted
@@ -1242,24 +1222,22 @@ define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32
 define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
 ; CHECK-ARM-LABEL: or_tree_with_shifts_vec_i32:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    vshl.i32 q8, q2, #16
-; CHECK-ARM-NEXT:    vshl.i32 q9, q0, #16
-; CHECK-ARM-NEXT:    vorr q8, q8, q3
-; CHECK-ARM-NEXT:    vorr q9, q9, q1
-; CHECK-ARM-NEXT:    vorr q0, q9, q8
+; CHECK-ARM-NEXT:    vorr q8, q0, q2
+; CHECK-ARM-NEXT:    vshl.i32 q8, q8, #16
+; CHECK-ARM-NEXT:    vorr q8, q8, q1
+; CHECK-ARM-NEXT:    vorr q0, q8, q3
 ; CHECK-ARM-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: or_tree_with_shifts_vec_i32:
 ; CHECK-BE:       @ %bb.0:
 ; CHECK-BE-NEXT:    vrev64.32 q8, q2
 ; CHECK-BE-NEXT:    vrev64.32 q9, q0
-; CHECK-BE-NEXT:    vshl.i32 q8, q8, #16
+; CHECK-BE-NEXT:    vorr q8, q9, q8
+; CHECK-BE-NEXT:    vrev64.32 q9, q1
 ; CHECK-BE-NEXT:    vrev64.32 q10, q3
-; CHECK-BE-NEXT:    vshl.i32 q9, q9, #16
-; CHECK-BE-NEXT:    vrev64.32 q11, q1
+; CHECK-BE-NEXT:    vshl.i32 q8, q8, #16
+; CHECK-BE-NEXT:    vorr q8, q8, q9
 ; CHECK-BE-NEXT:    vorr q8, q8, q10
-; CHECK-BE-NEXT:    vorr q9, q9, q11
-; CHECK-BE-NEXT:    vorr q8, q9, q8
 ; CHECK-BE-NEXT:    vrev64.32 q0, q8
 ; CHECK-BE-NEXT:    bx lr
   %a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>

diff  --git a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
index ad0bd404d313e..82641312666bc 100644
--- a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
@@ -8,90 +8,91 @@
 define i32 @SplitPromoteVectorTest(i32 %Opc) align 2 {
 ; CHECK-LABEL: SplitPromoteVectorTest:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv v3, .LCPI0_0 at PCREL(0), 1
-; CHECK-NEXT:    mtvsrws v2, r3
-; CHECK-NEXT:    li r5, 4
+; CHECK-NEXT:    plxv v2, .LCPI0_0 at PCREL(0), 1
+; CHECK-NEXT:    plxv v4, .LCPI0_1 at PCREL(0), 1
+; CHECK-NEXT:    mtvsrws v3, r3
+; CHECK-NEXT:    li r5, 12
 ; CHECK-NEXT:    li r8, 0
-; CHECK-NEXT:    vcmpequw v3, v2, v3
-; CHECK-NEXT:    vextubrx r6, r5, v3
-; CHECK-NEXT:    vextubrx r4, r8, v3
-; CHECK-NEXT:    rlwimi r4, r6, 1, 30, 30
-; CHECK-NEXT:    li r6, 8
-; CHECK-NEXT:    vextubrx r7, r6, v3
-; CHECK-NEXT:    rlwimi r4, r7, 2, 29, 29
-; CHECK-NEXT:    li r7, 12
-; CHECK-NEXT:    vextubrx r9, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_1 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r4, r9, 3, 28, 28
-; CHECK-NEXT:    vcmpequw v3, v2, v3
-; CHECK-NEXT:    vextubrx r9, r8, v3
-; CHECK-NEXT:    rlwimi r4, r9, 4, 27, 27
-; CHECK-NEXT:    vextubrx r9, r5, v3
-; CHECK-NEXT:    rlwimi r4, r9, 5, 26, 26
-; CHECK-NEXT:    vextubrx r9, r6, v3
-; CHECK-NEXT:    rlwimi r4, r9, 6, 25, 25
-; CHECK-NEXT:    vextubrx r9, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_2 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r4, r9, 7, 24, 24
-; CHECK-NEXT:    vcmpequw v3, v2, v3
-; CHECK-NEXT:    vextubrx r9, r8, v3
-; CHECK-NEXT:    rlwimi r4, r9, 8, 23, 23
-; CHECK-NEXT:    vextubrx r9, r5, v3
-; CHECK-NEXT:    rlwimi r4, r9, 9, 22, 22
-; CHECK-NEXT:    vextubrx r9, r6, v3
-; CHECK-NEXT:    rlwimi r4, r9, 10, 21, 21
-; CHECK-NEXT:    vextubrx r9, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_3 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r4, r9, 11, 20, 20
-; CHECK-NEXT:    vcmpequw v3, v2, v3
+; CHECK-NEXT:    vcmpequw v2, v3, v2
+; CHECK-NEXT:    plxv v5, .LCPI0_2 at PCREL(0), 1
+; CHECK-NEXT:    vcmpequw v4, v3, v4
+; CHECK-NEXT:    vcmpequw v5, v3, v5
+; CHECK-NEXT:    vextubrx r4, r5, v2
+; CHECK-NEXT:    vextubrx r6, r5, v4
+; CHECK-NEXT:    or r9, r6, r4
+; CHECK-NEXT:    li r6, 4
+; CHECK-NEXT:    vextubrx r4, r8, v5
+; CHECK-NEXT:    vextubrx r7, r6, v5
+; CHECK-NEXT:    rlwimi r4, r7, 1, 30, 30
+; CHECK-NEXT:    li r7, 8
+; CHECK-NEXT:    vextubrx r10, r7, v5
+; CHECK-NEXT:    rlwimi r4, r10, 2, 29, 29
+; CHECK-NEXT:    vextubrx r10, r5, v5
+; CHECK-NEXT:    plxv v5, .LCPI0_3 at PCREL(0), 1
+; CHECK-NEXT:    rlwimi r4, r10, 3, 28, 28
+; CHECK-NEXT:    vcmpequw v5, v3, v5
+; CHECK-NEXT:    vextubrx r10, r8, v5
+; CHECK-NEXT:    rlwimi r4, r10, 4, 27, 27
+; CHECK-NEXT:    vextubrx r10, r6, v5
+; CHECK-NEXT:    rlwimi r4, r10, 5, 26, 26
+; CHECK-NEXT:    vextubrx r10, r7, v5
+; CHECK-NEXT:    rlwimi r4, r10, 6, 25, 25
+; CHECK-NEXT:    vextubrx r10, r5, v5
+; CHECK-NEXT:    plxv v5, .LCPI0_4 at PCREL(0), 1
+; CHECK-NEXT:    rlwimi r4, r10, 7, 24, 24
+; CHECK-NEXT:    vcmpequw v5, v3, v5
+; CHECK-NEXT:    vextubrx r10, r8, v5
+; CHECK-NEXT:    rlwimi r4, r10, 8, 23, 23
+; CHECK-NEXT:    vextubrx r10, r6, v5
+; CHECK-NEXT:    rlwimi r4, r10, 9, 22, 22
+; CHECK-NEXT:    vextubrx r10, r7, v5
+; CHECK-NEXT:    rlwimi r4, r10, 10, 21, 21
+; CHECK-NEXT:    vextubrx r10, r5, v5
+; CHECK-NEXT:    rlwimi r4, r10, 11, 20, 20
+; CHECK-NEXT:    vextubrx r10, r8, v4
+; CHECK-NEXT:    rlwimi r4, r10, 12, 19, 19
+; CHECK-NEXT:    vextubrx r10, r6, v4
+; CHECK-NEXT:    rlwimi r4, r10, 13, 18, 18
+; CHECK-NEXT:    vextubrx r10, r7, v4
+; CHECK-NEXT:    plxv v4, .LCPI0_5 at PCREL(0), 1
+; CHECK-NEXT:    rlwimi r4, r10, 14, 17, 17
+; CHECK-NEXT:    rlwimi r4, r9, 15, 0, 16
+; CHECK-NEXT:    vcmpequw v4, v3, v4
+; CHECK-NEXT:    vextubrx r10, r8, v4
+; CHECK-NEXT:    vextubrx r9, r6, v4
+; CHECK-NEXT:    clrlwi r10, r10, 31
+; CHECK-NEXT:    rlwimi r10, r9, 1, 30, 30
+; CHECK-NEXT:    vextubrx r9, r7, v4
+; CHECK-NEXT:    rlwimi r10, r9, 2, 29, 29
+; CHECK-NEXT:    vextubrx r9, r5, v4
+; CHECK-NEXT:    plxv v4, .LCPI0_6 at PCREL(0), 1
+; CHECK-NEXT:    rlwimi r10, r9, 3, 28, 28
+; CHECK-NEXT:    vcmpequw v4, v3, v4
+; CHECK-NEXT:    vextubrx r9, r8, v4
+; CHECK-NEXT:    rlwimi r10, r9, 4, 27, 27
+; CHECK-NEXT:    vextubrx r9, r6, v4
+; CHECK-NEXT:    rlwimi r10, r9, 5, 26, 26
+; CHECK-NEXT:    vextubrx r9, r7, v4
+; CHECK-NEXT:    rlwimi r10, r9, 6, 25, 25
+; CHECK-NEXT:    vextubrx r9, r5, v4
+; CHECK-NEXT:    plxv v4, .LCPI0_7 at PCREL(0), 1
+; CHECK-NEXT:    rlwimi r10, r9, 7, 24, 24
+; CHECK-NEXT:    vcmpequw v3, v3, v4
 ; CHECK-NEXT:    vextubrx r9, r8, v3
-; CHECK-NEXT:    rlwimi r4, r9, 12, 19, 19
-; CHECK-NEXT:    vextubrx r9, r5, v3
-; CHECK-NEXT:    rlwimi r4, r9, 13, 18, 18
+; CHECK-NEXT:    vextubrx r5, r5, v3
+; CHECK-NEXT:    rlwimi r10, r9, 8, 23, 23
 ; CHECK-NEXT:    vextubrx r9, r6, v3
-; CHECK-NEXT:    rlwimi r4, r9, 14, 17, 17
+; CHECK-NEXT:    rlwimi r10, r9, 9, 22, 22
 ; CHECK-NEXT:    vextubrx r9, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_4 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r4, r9, 15, 0, 16
-; CHECK-NEXT:    vcmpequw v3, v2, v3
-; CHECK-NEXT:    vextubrx r10, r5, v3
-; CHECK-NEXT:    vextubrx r9, r8, v3
-; CHECK-NEXT:    rlwimi r9, r10, 1, 30, 30
-; CHECK-NEXT:    vextubrx r10, r6, v3
-; CHECK-NEXT:    rlwimi r9, r10, 2, 29, 29
-; CHECK-NEXT:    vextubrx r10, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_5 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r9, r10, 3, 28, 28
-; CHECK-NEXT:    vcmpequw v3, v2, v3
-; CHECK-NEXT:    vextubrx r10, r8, v3
-; CHECK-NEXT:    rlwimi r9, r10, 4, 27, 27
-; CHECK-NEXT:    vextubrx r10, r5, v3
-; CHECK-NEXT:    rlwimi r9, r10, 5, 26, 26
-; CHECK-NEXT:    vextubrx r10, r6, v3
-; CHECK-NEXT:    rlwimi r9, r10, 6, 25, 25
-; CHECK-NEXT:    vextubrx r10, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_6 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r9, r10, 7, 24, 24
-; CHECK-NEXT:    vcmpequw v3, v2, v3
-; CHECK-NEXT:    vextubrx r10, r8, v3
-; CHECK-NEXT:    rlwimi r9, r10, 8, 23, 23
-; CHECK-NEXT:    vextubrx r10, r5, v3
-; CHECK-NEXT:    rlwimi r9, r10, 9, 22, 22
-; CHECK-NEXT:    vextubrx r10, r6, v3
-; CHECK-NEXT:    rlwimi r9, r10, 10, 21, 21
-; CHECK-NEXT:    vextubrx r10, r7, v3
-; CHECK-NEXT:    plxv v3, .LCPI0_7 at PCREL(0), 1
-; CHECK-NEXT:    rlwimi r9, r10, 11, 20, 20
-; CHECK-NEXT:    vcmpequw v2, v2, v3
-; CHECK-NEXT:    vextubrx r8, r8, v2
-; CHECK-NEXT:    vextubrx r5, r5, v2
-; CHECK-NEXT:    rlwimi r9, r8, 12, 19, 19
-; CHECK-NEXT:    rlwimi r9, r5, 13, 18, 18
+; CHECK-NEXT:    rlwimi r10, r9, 10, 21, 21
+; CHECK-NEXT:    rlwimi r10, r5, 11, 20, 20
+; CHECK-NEXT:    vextubrx r5, r8, v2
+; CHECK-NEXT:    rlwimi r10, r5, 12, 19, 19
 ; CHECK-NEXT:    vextubrx r5, r6, v2
-; CHECK-NEXT:    rlwimi r9, r5, 14, 17, 17
+; CHECK-NEXT:    rlwimi r10, r5, 13, 18, 18
 ; CHECK-NEXT:    vextubrx r5, r7, v2
-; CHECK-NEXT:    rlwimi r9, r5, 15, 0, 16
-; CHECK-NEXT:    or r4, r9, r4
+; CHECK-NEXT:    rlwimi r10, r5, 14, 17, 17
+; CHECK-NEXT:    or r4, r4, r10
 ; CHECK-NEXT:    andi. r4, r4, 65535
 ; CHECK-NEXT:    iseleq r3, 0, r3
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/X86/bswap_tree2.ll b/llvm/test/CodeGen/X86/bswap_tree2.ll
index da238da46767d..ead7f4baec414 100644
--- a/llvm/test/CodeGen/X86/bswap_tree2.ll
+++ b/llvm/test/CodeGen/X86/bswap_tree2.ll
@@ -10,29 +10,23 @@
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $16711680, %ecx # imm = 0xFF0000
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    orl $-16777216, %edx # imm = 0xFF000000
-; CHECK-NEXT:    shll $8, %ecx
-; CHECK-NEXT:    shrl $8, %edx
-; CHECK-NEXT:    orl %ecx, %edx
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    shrl $16, %eax
-; CHECK-NEXT:    orl %edx, %eax
+; CHECK-NEXT:    movzwl %ax, %ecx
+; CHECK-NEXT:    orl %eax, %ecx
+; CHECK-NEXT:    orl $-16777216, %ecx # imm = 0xFF000000
+; CHECK-NEXT:    shrl $8, %ecx
+; CHECK-NEXT:    andl $16711935, %eax # imm = 0xFF00FF
+; CHECK-NEXT:    shll $8, %eax
+; CHECK-NEXT:    orl %ecx, %eax
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: test1:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %ecx
-; CHECK64-NEXT:    andl $16711680, %ecx # imm = 0xFF0000
-; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    movzwl %di, %eax
+; CHECK64-NEXT:    orl %edi, %eax
 ; CHECK64-NEXT:    orl $-16777216, %eax # imm = 0xFF000000
-; CHECK64-NEXT:    shll $8, %ecx
 ; CHECK64-NEXT:    shrl $8, %eax
-; CHECK64-NEXT:    orl %ecx, %eax
-; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    shrl $16, %edi
+; CHECK64-NEXT:    andl $16711935, %edi # imm = 0xFF00FF
+; CHECK64-NEXT:    shll $8, %edi
 ; CHECK64-NEXT:    orl %edi, %eax
 ; CHECK64-NEXT:    retq
   %byte0 = and i32 %x, 255        ; 0x000000ff

diff  --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 82b1afd4c93d7..1795cbda1e8c1 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -511,21 +511,18 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; X32-LABEL: or_tree_with_shifts_i32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    shll $16, %ecx
+; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    shll $16, %eax
-; X32-NEXT:    orl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    orl %ecx, %eax
+; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: or_tree_with_shifts_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    orl %edx, %edi
 ; X64-NEXT:    shll $16, %edi
-; X64-NEXT:    shll $16, %eax
 ; X64-NEXT:    orl %ecx, %eax
-; X64-NEXT:    orl %esi, %eax
 ; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %a.shifted = shl i32 %a, 16
@@ -539,20 +536,19 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; X32-LABEL: xor_tree_with_shifts_i32:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    shrl $16, %eax
+; X32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    xorl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: xor_tree_with_shifts_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    xorl %edx, %edi
 ; X64-NEXT:    shrl $16, %edi
-; X64-NEXT:    shrl $16, %eax
 ; X64-NEXT:    xorl %ecx, %eax
-; X64-NEXT:    xorl %esi, %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
   %a.shifted = lshr i32 %a, 16
@@ -575,12 +571,11 @@ define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 ;
 ; X64-LABEL: and_tree_with_shifts_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    andl %edx, %edi
 ; X64-NEXT:    sarl $16, %edi
-; X64-NEXT:    sarl $16, %eax
 ; X64-NEXT:    andl %ecx, %eax
 ; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %a.shifted = ashr i32 %a, 16
   %c.shifted = ashr i32 %c, 16
@@ -593,25 +588,22 @@ define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
 define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) {
 ; X32-LABEL: logic_tree_with_shifts_var_i32:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shll %cl, %edx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    shll %cl, %eax
-; X32-NEXT:    orl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    orl %edx, %eax
+; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: logic_tree_with_shifts_var_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    orl %edx, %edi
 ; X64-NEXT:    movl %r8d, %ecx
 ; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    shll %cl, %edx
-; X64-NEXT:    orl %edx, %eax
-; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    orl %esi, %eax
+; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %a.shifted = shl i32 %a, %s
   %c.shifted = shl i32 %c, %s
@@ -681,11 +673,10 @@ define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32
 define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
 ; X64-LABEL: or_tree_with_shifts_vec_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    pslld $16, %xmm0
-; X64-NEXT:    pslld $16, %xmm2
-; X64-NEXT:    por %xmm3, %xmm2
-; X64-NEXT:    por %xmm1, %xmm2
 ; X64-NEXT:    por %xmm2, %xmm0
+; X64-NEXT:    pslld $16, %xmm0
+; X64-NEXT:    por %xmm3, %xmm1
+; X64-NEXT:    por %xmm1, %xmm0
 ; X64-NEXT:    retq
   %a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
   %c.shifted = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>


        


More information about the llvm-commits mailing list