[llvm] 1626ee6 - [DAGCombine] Hoist shifts out of a logic operations tree.
Filipp Zhinkin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 12 02:42:23 PDT 2022
Author: Filipp Zhinkin
Date: 2022-08-12T12:42:16+03:00
New Revision: 1626ee6a9581be98d4a2dc251524ad6dc2725696
URL: https://github.com/llvm/llvm-project/commit/1626ee6a9581be98d4a2dc251524ad6dc2725696
DIFF: https://github.com/llvm/llvm-project/commit/1626ee6a9581be98d4a2dc251524ad6dc2725696.diff
LOG: [DAGCombine] Hoist shifts out of a logic operations tree.
Hoist and combine shift operations from logic operations tree:
logic (logic (SH x0, s), y), (logic (SH x1, s), z) --> logic (SH (logic x0, x1), s), (logic y, z)
The transformation improves code generated for some cases related to the issue https://github.com/llvm/llvm-project/issues/49541.
Correctness:
https://alive2.llvm.org/ce/z/pVqVgY
https://alive2.llvm.org/ce/z/YVvT-q
https://alive2.llvm.org/ce/z/W5zTBq
https://alive2.llvm.org/ce/z/YfJsvJ
https://alive2.llvm.org/ce/z/3YSyDM
https://alive2.llvm.org/ce/z/Bs2kzk
https://alive2.llvm.org/ce/z/EoQpzU
https://alive2.llvm.org/ce/z/Jnc_5H
https://alive2.llvm.org/ce/z/_LP6k_
https://alive2.llvm.org/ce/z/KvZNC9
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D131189
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/ARM/icmp-shift-opt.ll
llvm/test/CodeGen/ARM/shift-combine.ll
llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
llvm/test/CodeGen/X86/bswap_tree2.ll
llvm/test/CodeGen/X86/shift-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 17451b40f7502..d396f5f130a9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6157,6 +6157,43 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
}
+/// Given a tree of logic operations with shape like
+/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
+/// try to match and fold shift operations with the same shift amount.
+/// For example:
+/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
+/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
+static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
+ SDValue RightHand, SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR));
+ if (LeftHand.getOpcode() != LogicOpcode ||
+ RightHand.getOpcode() != LogicOpcode)
+ return SDValue();
+ if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
+ return SDValue();
+
+ // Try to match one of following patterns:
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
+ // Note that foldLogicOfShifts will handle commuted versions of the left hand
+ // itself.
+ SDValue CombinedShifts, W;
+ SDValue R0 = RightHand.getOperand(0);
+ SDValue R1 = RightHand.getOperand(1);
+ if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
+ W = R1;
+ else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
+ W = R0;
+ else
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6530,6 +6567,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = foldAndToUsubsat(N, DAG))
return V;
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
@@ -7130,6 +7173,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
@@ -8614,6 +8663,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return R;
if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
return R;
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
index a78978f977f86..b51eb846f24b3 100644
--- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
@@ -139,15 +139,13 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i32 %a, i32 %b) nounwind {
define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero_i128:
; CHECK: @ %bb.0:
-; CHECK-NEXT: lsl r3, r3, #17
-; CHECK-NEXT: orr r12, r3, r2, lsr #15
-; CHECK-NEXT: lsl r3, r1, #17
-; CHECK-NEXT: orr r3, r3, r0, lsr #15
+; CHECK-NEXT: orr r3, r1, r3
; CHECK-NEXT: orr r0, r2, r0
-; CHECK-NEXT: orr r3, r3, r12
-; CHECK-NEXT: lsl r0, r0, #17
-; CHECK-NEXT: orr r0, r0, r1, lsr #15
-; CHECK-NEXT: orrs r0, r0, r3
+; CHECK-NEXT: orr r2, r0, r3
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: lsr r0, r0, #15
+; CHECK-NEXT: orr r0, r0, r2, lsl #17
+; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
%shl = shl i128 %a, 17
diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll
index 3529947339b00..ea2b9dcac9059 100644
--- a/llvm/test/CodeGen/ARM/shift-combine.ll
+++ b/llvm/test/CodeGen/ARM/shift-combine.ll
@@ -900,12 +900,11 @@ define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-ARM-NEXT: .save {r11, lr}
; CHECK-ARM-NEXT: push {r11, lr}
; CHECK-ARM-NEXT: ldr lr, [sp, #16]
-; CHECK-ARM-NEXT: lsl r3, r3, #16
-; CHECK-ARM-NEXT: ldr r12, [sp, #8]
-; CHECK-ARM-NEXT: orr r3, r3, r2, lsr #16
; CHECK-ARM-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-ARM-NEXT: orr r1, r1, lr, lsl #16
-; CHECK-ARM-NEXT: orr r1, r1, r3
+; CHECK-ARM-NEXT: ldr r12, [sp, #8]
+; CHECK-ARM-NEXT: orr r3, lr, r3
+; CHECK-ARM-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-ARM-NEXT: orr r1, r1, r2, lsr #16
; CHECK-ARM-NEXT: orr r1, r1, r12
; CHECK-ARM-NEXT: pop {r11, pc}
;
@@ -914,41 +913,38 @@ define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-BE-NEXT: .save {r11, lr}
; CHECK-BE-NEXT: push {r11, lr}
; CHECK-BE-NEXT: ldr lr, [sp, #20]
-; CHECK-BE-NEXT: lsl r2, r2, #16
-; CHECK-BE-NEXT: ldr r12, [sp, #12]
-; CHECK-BE-NEXT: orr r2, r2, r3, lsr #16
; CHECK-BE-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-BE-NEXT: orr r0, r0, lr, lsl #16
-; CHECK-BE-NEXT: orr r0, r0, r2
+; CHECK-BE-NEXT: ldr r12, [sp, #12]
+; CHECK-BE-NEXT: orr r2, lr, r2
+; CHECK-BE-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-BE-NEXT: orr r0, r0, r3, lsr #16
; CHECK-BE-NEXT: orr r0, r0, r12
; CHECK-BE-NEXT: pop {r11, pc}
;
; CHECK-ALIGN-LABEL: or_tree_with_shifts_i64:
; CHECK-ALIGN: @ %bb.0:
; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #8]
-; CHECK-ALIGN-NEXT: lsls r3, r3, #16
-; CHECK-ALIGN-NEXT: orr.w r3, r3, r2, lsr #16
; CHECK-ALIGN-NEXT: orr.w r0, r0, r2, lsl #16
-; CHECK-ALIGN-NEXT: orr.w r1, r1, r12, lsl #16
-; CHECK-ALIGN-NEXT: orrs r1, r3
-; CHECK-ALIGN-NEXT: ldr r3, [sp]
-; CHECK-ALIGN-NEXT: orrs r1, r3
+; CHECK-ALIGN-NEXT: orr.w r3, r3, r12
+; CHECK-ALIGN-NEXT: orr.w r1, r1, r3, lsl #16
+; CHECK-ALIGN-NEXT: orr.w r1, r1, r2, lsr #16
+; CHECK-ALIGN-NEXT: ldr r2, [sp]
+; CHECK-ALIGN-NEXT: orrs r1, r2
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: or_tree_with_shifts_i64:
; CHECK-V6M: @ %bb.0:
; CHECK-V6M-NEXT: push {r4, lr}
-; CHECK-V6M-NEXT: lsrs r4, r2, #16
-; CHECK-V6M-NEXT: lsls r3, r3, #16
-; CHECK-V6M-NEXT: adds r3, r3, r4
+; CHECK-V6M-NEXT: lsls r4, r2, #16
+; CHECK-V6M-NEXT: orrs r0, r4
; CHECK-V6M-NEXT: ldr r4, [sp, #16]
-; CHECK-V6M-NEXT: lsls r4, r4, #16
-; CHECK-V6M-NEXT: orrs r1, r4
+; CHECK-V6M-NEXT: orrs r4, r3
+; CHECK-V6M-NEXT: lsls r3, r4, #16
; CHECK-V6M-NEXT: orrs r1, r3
-; CHECK-V6M-NEXT: ldr r3, [sp, #8]
-; CHECK-V6M-NEXT: orrs r1, r3
-; CHECK-V6M-NEXT: lsls r2, r2, #16
-; CHECK-V6M-NEXT: orrs r0, r2
+; CHECK-V6M-NEXT: lsrs r2, r2, #16
+; CHECK-V6M-NEXT: orrs r1, r2
+; CHECK-V6M-NEXT: ldr r2, [sp, #8]
+; CHECK-V6M-NEXT: orrs r1, r2
; CHECK-V6M-NEXT: pop {r4, pc}
%b.shifted = shl i64 %b, 16
%c.shifted = shl i64 %c, 32
@@ -962,39 +958,38 @@ define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-ARM-LABEL: or_tree_with_shifts_i32:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: orr r2, r3, r2, lsl #16
-; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16
; CHECK-ARM-NEXT: orr r0, r0, r2
+; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16
+; CHECK-ARM-NEXT: orr r0, r0, r3
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: or_tree_with_shifts_i32:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: orr r2, r3, r2, lsl #16
-; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16
; CHECK-BE-NEXT: orr r0, r0, r2
+; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16
+; CHECK-BE-NEXT: orr r0, r0, r3
; CHECK-BE-NEXT: bx lr
;
; CHECK-THUMB-LABEL: or_tree_with_shifts_i32:
; CHECK-THUMB: @ %bb.0:
-; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsl #16
-; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16
; CHECK-THUMB-NEXT: orrs r0, r2
+; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16
+; CHECK-THUMB-NEXT: orrs r0, r3
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ALIGN-LABEL: or_tree_with_shifts_i32:
; CHECK-ALIGN: @ %bb.0:
-; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsl #16
-; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16
; CHECK-ALIGN-NEXT: orrs r0, r2
+; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16
+; CHECK-ALIGN-NEXT: orrs r0, r3
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: or_tree_with_shifts_i32:
; CHECK-V6M: @ %bb.0:
-; CHECK-V6M-NEXT: lsls r2, r2, #16
-; CHECK-V6M-NEXT: orrs r2, r3
+; CHECK-V6M-NEXT: orrs r0, r2
; CHECK-V6M-NEXT: lsls r0, r0, #16
; CHECK-V6M-NEXT: orrs r0, r1
-; CHECK-V6M-NEXT: orrs r0, r2
+; CHECK-V6M-NEXT: orrs r0, r3
; CHECK-V6M-NEXT: bx lr
%a.shifted = shl i32 %a, 16
%c.shifted = shl i32 %c, 16
@@ -1007,39 +1002,38 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-ARM-LABEL: xor_tree_with_shifts_i32:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: eor r2, r3, r2, lsr #16
-; CHECK-ARM-NEXT: eor r0, r1, r0, lsr #16
; CHECK-ARM-NEXT: eor r0, r0, r2
+; CHECK-ARM-NEXT: eor r0, r1, r0, lsr #16
+; CHECK-ARM-NEXT: eor r0, r0, r3
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: xor_tree_with_shifts_i32:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: eor r2, r3, r2, lsr #16
-; CHECK-BE-NEXT: eor r0, r1, r0, lsr #16
; CHECK-BE-NEXT: eor r0, r0, r2
+; CHECK-BE-NEXT: eor r0, r1, r0, lsr #16
+; CHECK-BE-NEXT: eor r0, r0, r3
; CHECK-BE-NEXT: bx lr
;
; CHECK-THUMB-LABEL: xor_tree_with_shifts_i32:
; CHECK-THUMB: @ %bb.0:
-; CHECK-THUMB-NEXT: eor.w r2, r3, r2, lsr #16
-; CHECK-THUMB-NEXT: eor.w r0, r1, r0, lsr #16
; CHECK-THUMB-NEXT: eors r0, r2
+; CHECK-THUMB-NEXT: eor.w r0, r1, r0, lsr #16
+; CHECK-THUMB-NEXT: eors r0, r3
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ALIGN-LABEL: xor_tree_with_shifts_i32:
; CHECK-ALIGN: @ %bb.0:
-; CHECK-ALIGN-NEXT: eor.w r2, r3, r2, lsr #16
-; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, lsr #16
; CHECK-ALIGN-NEXT: eors r0, r2
+; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, lsr #16
+; CHECK-ALIGN-NEXT: eors r0, r3
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: xor_tree_with_shifts_i32:
; CHECK-V6M: @ %bb.0:
-; CHECK-V6M-NEXT: lsrs r2, r2, #16
-; CHECK-V6M-NEXT: eors r2, r3
+; CHECK-V6M-NEXT: eors r0, r2
; CHECK-V6M-NEXT: lsrs r0, r0, #16
; CHECK-V6M-NEXT: eors r0, r1
-; CHECK-V6M-NEXT: eors r0, r2
+; CHECK-V6M-NEXT: eors r0, r3
; CHECK-V6M-NEXT: bx lr
%a.shifted = lshr i32 %a, 16
%c.shifted = lshr i32 %c, 16
@@ -1052,39 +1046,38 @@ define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-ARM-LABEL: and_tree_with_shifts_i32:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: and r2, r3, r2, asr #16
-; CHECK-ARM-NEXT: and r0, r1, r0, asr #16
; CHECK-ARM-NEXT: and r0, r0, r2
+; CHECK-ARM-NEXT: and r0, r1, r0, asr #16
+; CHECK-ARM-NEXT: and r0, r0, r3
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: and_tree_with_shifts_i32:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: and r2, r3, r2, asr #16
-; CHECK-BE-NEXT: and r0, r1, r0, asr #16
; CHECK-BE-NEXT: and r0, r0, r2
+; CHECK-BE-NEXT: and r0, r1, r0, asr #16
+; CHECK-BE-NEXT: and r0, r0, r3
; CHECK-BE-NEXT: bx lr
;
; CHECK-THUMB-LABEL: and_tree_with_shifts_i32:
; CHECK-THUMB: @ %bb.0:
-; CHECK-THUMB-NEXT: and.w r2, r3, r2, asr #16
-; CHECK-THUMB-NEXT: and.w r0, r1, r0, asr #16
; CHECK-THUMB-NEXT: ands r0, r2
+; CHECK-THUMB-NEXT: and.w r0, r1, r0, asr #16
+; CHECK-THUMB-NEXT: ands r0, r3
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ALIGN-LABEL: and_tree_with_shifts_i32:
; CHECK-ALIGN: @ %bb.0:
-; CHECK-ALIGN-NEXT: and.w r2, r3, r2, asr #16
-; CHECK-ALIGN-NEXT: and.w r0, r1, r0, asr #16
; CHECK-ALIGN-NEXT: ands r0, r2
+; CHECK-ALIGN-NEXT: and.w r0, r1, r0, asr #16
+; CHECK-ALIGN-NEXT: ands r0, r3
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: and_tree_with_shifts_i32:
; CHECK-V6M: @ %bb.0:
-; CHECK-V6M-NEXT: asrs r2, r2, #16
-; CHECK-V6M-NEXT: ands r2, r3
+; CHECK-V6M-NEXT: ands r0, r2
; CHECK-V6M-NEXT: asrs r0, r0, #16
; CHECK-V6M-NEXT: ands r0, r1
-; CHECK-V6M-NEXT: ands r0, r2
+; CHECK-V6M-NEXT: ands r0, r3
; CHECK-V6M-NEXT: bx lr
%a.shifted = ashr i32 %a, 16
%c.shifted = ashr i32 %c, 16
@@ -1098,49 +1091,36 @@ define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %
; CHECK-ARM-LABEL: logic_tree_with_shifts_var_i32:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: ldr r12, [sp]
-; CHECK-ARM-NEXT: orr r2, r3, r2, lsl r12
-; CHECK-ARM-NEXT: orr r0, r1, r0, lsl r12
; CHECK-ARM-NEXT: orr r0, r0, r2
+; CHECK-ARM-NEXT: orr r0, r1, r0, lsl r12
+; CHECK-ARM-NEXT: orr r0, r0, r3
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: logic_tree_with_shifts_var_i32:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: ldr r12, [sp]
-; CHECK-BE-NEXT: orr r2, r3, r2, lsl r12
-; CHECK-BE-NEXT: orr r0, r1, r0, lsl r12
; CHECK-BE-NEXT: orr r0, r0, r2
+; CHECK-BE-NEXT: orr r0, r1, r0, lsl r12
+; CHECK-BE-NEXT: orr r0, r0, r3
; CHECK-BE-NEXT: bx lr
;
-; CHECK-THUMB-LABEL: logic_tree_with_shifts_var_i32:
-; CHECK-THUMB: @ %bb.0:
-; CHECK-THUMB-NEXT: ldr.w r12, [sp]
-; CHECK-THUMB-NEXT: lsl.w r2, r2, r12
-; CHECK-THUMB-NEXT: lsl.w r0, r0, r12
-; CHECK-THUMB-NEXT: orrs r2, r3
-; CHECK-THUMB-NEXT: orrs r0, r1
-; CHECK-THUMB-NEXT: orrs r0, r2
-; CHECK-THUMB-NEXT: bx lr
-;
; CHECK-ALIGN-LABEL: logic_tree_with_shifts_var_i32:
; CHECK-ALIGN: @ %bb.0:
-; CHECK-ALIGN-NEXT: ldr.w r12, [sp]
-; CHECK-ALIGN-NEXT: lsl.w r2, r2, r12
-; CHECK-ALIGN-NEXT: lsl.w r0, r0, r12
-; CHECK-ALIGN-NEXT: orrs r2, r3
-; CHECK-ALIGN-NEXT: orrs r0, r1
; CHECK-ALIGN-NEXT: orrs r0, r2
+; CHECK-ALIGN-NEXT: ldr r2, [sp]
+; CHECK-ALIGN-NEXT: lsls r0, r2
+; CHECK-ALIGN-NEXT: orrs r0, r1
+; CHECK-ALIGN-NEXT: orrs r0, r3
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: logic_tree_with_shifts_var_i32:
; CHECK-V6M: @ %bb.0:
-; CHECK-V6M-NEXT: push {r4, lr}
-; CHECK-V6M-NEXT: ldr r4, [sp, #8]
-; CHECK-V6M-NEXT: lsls r2, r4
-; CHECK-V6M-NEXT: orrs r2, r3
-; CHECK-V6M-NEXT: lsls r0, r4
-; CHECK-V6M-NEXT: orrs r0, r1
; CHECK-V6M-NEXT: orrs r0, r2
-; CHECK-V6M-NEXT: pop {r4, pc}
+; CHECK-V6M-NEXT: ldr r2, [sp]
+; CHECK-V6M-NEXT: lsls r0, r2
+; CHECK-V6M-NEXT: orrs r0, r1
+; CHECK-V6M-NEXT: orrs r0, r3
+; CHECK-V6M-NEXT: bx lr
%a.shifted = shl i32 %a, %s
%c.shifted = shl i32 %c, %s
%or.ab = or i32 %b, %a.shifted
@@ -1242,24 +1222,22 @@ define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32
define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; CHECK-ARM-LABEL: or_tree_with_shifts_vec_i32:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: vshl.i32 q8, q2, #16
-; CHECK-ARM-NEXT: vshl.i32 q9, q0, #16
-; CHECK-ARM-NEXT: vorr q8, q8, q3
-; CHECK-ARM-NEXT: vorr q9, q9, q1
-; CHECK-ARM-NEXT: vorr q0, q9, q8
+; CHECK-ARM-NEXT: vorr q8, q0, q2
+; CHECK-ARM-NEXT: vshl.i32 q8, q8, #16
+; CHECK-ARM-NEXT: vorr q8, q8, q1
+; CHECK-ARM-NEXT: vorr q0, q8, q3
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: or_tree_with_shifts_vec_i32:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vrev64.32 q8, q2
; CHECK-BE-NEXT: vrev64.32 q9, q0
-; CHECK-BE-NEXT: vshl.i32 q8, q8, #16
+; CHECK-BE-NEXT: vorr q8, q9, q8
+; CHECK-BE-NEXT: vrev64.32 q9, q1
; CHECK-BE-NEXT: vrev64.32 q10, q3
-; CHECK-BE-NEXT: vshl.i32 q9, q9, #16
-; CHECK-BE-NEXT: vrev64.32 q11, q1
+; CHECK-BE-NEXT: vshl.i32 q8, q8, #16
+; CHECK-BE-NEXT: vorr q8, q8, q9
; CHECK-BE-NEXT: vorr q8, q8, q10
-; CHECK-BE-NEXT: vorr q9, q9, q11
-; CHECK-BE-NEXT: vorr q8, q9, q8
; CHECK-BE-NEXT: vrev64.32 q0, q8
; CHECK-BE-NEXT: bx lr
%a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
diff --git a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
index ad0bd404d313e..82641312666bc 100644
--- a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll
@@ -8,90 +8,91 @@
define i32 @SplitPromoteVectorTest(i32 %Opc) align 2 {
; CHECK-LABEL: SplitPromoteVectorTest:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: plxv v3, .LCPI0_0 at PCREL(0), 1
-; CHECK-NEXT: mtvsrws v2, r3
-; CHECK-NEXT: li r5, 4
+; CHECK-NEXT: plxv v2, .LCPI0_0 at PCREL(0), 1
+; CHECK-NEXT: plxv v4, .LCPI0_1 at PCREL(0), 1
+; CHECK-NEXT: mtvsrws v3, r3
+; CHECK-NEXT: li r5, 12
; CHECK-NEXT: li r8, 0
-; CHECK-NEXT: vcmpequw v3, v2, v3
-; CHECK-NEXT: vextubrx r6, r5, v3
-; CHECK-NEXT: vextubrx r4, r8, v3
-; CHECK-NEXT: rlwimi r4, r6, 1, 30, 30
-; CHECK-NEXT: li r6, 8
-; CHECK-NEXT: vextubrx r7, r6, v3
-; CHECK-NEXT: rlwimi r4, r7, 2, 29, 29
-; CHECK-NEXT: li r7, 12
-; CHECK-NEXT: vextubrx r9, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_1 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r4, r9, 3, 28, 28
-; CHECK-NEXT: vcmpequw v3, v2, v3
-; CHECK-NEXT: vextubrx r9, r8, v3
-; CHECK-NEXT: rlwimi r4, r9, 4, 27, 27
-; CHECK-NEXT: vextubrx r9, r5, v3
-; CHECK-NEXT: rlwimi r4, r9, 5, 26, 26
-; CHECK-NEXT: vextubrx r9, r6, v3
-; CHECK-NEXT: rlwimi r4, r9, 6, 25, 25
-; CHECK-NEXT: vextubrx r9, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_2 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r4, r9, 7, 24, 24
-; CHECK-NEXT: vcmpequw v3, v2, v3
-; CHECK-NEXT: vextubrx r9, r8, v3
-; CHECK-NEXT: rlwimi r4, r9, 8, 23, 23
-; CHECK-NEXT: vextubrx r9, r5, v3
-; CHECK-NEXT: rlwimi r4, r9, 9, 22, 22
-; CHECK-NEXT: vextubrx r9, r6, v3
-; CHECK-NEXT: rlwimi r4, r9, 10, 21, 21
-; CHECK-NEXT: vextubrx r9, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_3 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r4, r9, 11, 20, 20
-; CHECK-NEXT: vcmpequw v3, v2, v3
+; CHECK-NEXT: vcmpequw v2, v3, v2
+; CHECK-NEXT: plxv v5, .LCPI0_2 at PCREL(0), 1
+; CHECK-NEXT: vcmpequw v4, v3, v4
+; CHECK-NEXT: vcmpequw v5, v3, v5
+; CHECK-NEXT: vextubrx r4, r5, v2
+; CHECK-NEXT: vextubrx r6, r5, v4
+; CHECK-NEXT: or r9, r6, r4
+; CHECK-NEXT: li r6, 4
+; CHECK-NEXT: vextubrx r4, r8, v5
+; CHECK-NEXT: vextubrx r7, r6, v5
+; CHECK-NEXT: rlwimi r4, r7, 1, 30, 30
+; CHECK-NEXT: li r7, 8
+; CHECK-NEXT: vextubrx r10, r7, v5
+; CHECK-NEXT: rlwimi r4, r10, 2, 29, 29
+; CHECK-NEXT: vextubrx r10, r5, v5
+; CHECK-NEXT: plxv v5, .LCPI0_3 at PCREL(0), 1
+; CHECK-NEXT: rlwimi r4, r10, 3, 28, 28
+; CHECK-NEXT: vcmpequw v5, v3, v5
+; CHECK-NEXT: vextubrx r10, r8, v5
+; CHECK-NEXT: rlwimi r4, r10, 4, 27, 27
+; CHECK-NEXT: vextubrx r10, r6, v5
+; CHECK-NEXT: rlwimi r4, r10, 5, 26, 26
+; CHECK-NEXT: vextubrx r10, r7, v5
+; CHECK-NEXT: rlwimi r4, r10, 6, 25, 25
+; CHECK-NEXT: vextubrx r10, r5, v5
+; CHECK-NEXT: plxv v5, .LCPI0_4 at PCREL(0), 1
+; CHECK-NEXT: rlwimi r4, r10, 7, 24, 24
+; CHECK-NEXT: vcmpequw v5, v3, v5
+; CHECK-NEXT: vextubrx r10, r8, v5
+; CHECK-NEXT: rlwimi r4, r10, 8, 23, 23
+; CHECK-NEXT: vextubrx r10, r6, v5
+; CHECK-NEXT: rlwimi r4, r10, 9, 22, 22
+; CHECK-NEXT: vextubrx r10, r7, v5
+; CHECK-NEXT: rlwimi r4, r10, 10, 21, 21
+; CHECK-NEXT: vextubrx r10, r5, v5
+; CHECK-NEXT: rlwimi r4, r10, 11, 20, 20
+; CHECK-NEXT: vextubrx r10, r8, v4
+; CHECK-NEXT: rlwimi r4, r10, 12, 19, 19
+; CHECK-NEXT: vextubrx r10, r6, v4
+; CHECK-NEXT: rlwimi r4, r10, 13, 18, 18
+; CHECK-NEXT: vextubrx r10, r7, v4
+; CHECK-NEXT: plxv v4, .LCPI0_5 at PCREL(0), 1
+; CHECK-NEXT: rlwimi r4, r10, 14, 17, 17
+; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16
+; CHECK-NEXT: vcmpequw v4, v3, v4
+; CHECK-NEXT: vextubrx r10, r8, v4
+; CHECK-NEXT: vextubrx r9, r6, v4
+; CHECK-NEXT: clrlwi r10, r10, 31
+; CHECK-NEXT: rlwimi r10, r9, 1, 30, 30
+; CHECK-NEXT: vextubrx r9, r7, v4
+; CHECK-NEXT: rlwimi r10, r9, 2, 29, 29
+; CHECK-NEXT: vextubrx r9, r5, v4
+; CHECK-NEXT: plxv v4, .LCPI0_6 at PCREL(0), 1
+; CHECK-NEXT: rlwimi r10, r9, 3, 28, 28
+; CHECK-NEXT: vcmpequw v4, v3, v4
+; CHECK-NEXT: vextubrx r9, r8, v4
+; CHECK-NEXT: rlwimi r10, r9, 4, 27, 27
+; CHECK-NEXT: vextubrx r9, r6, v4
+; CHECK-NEXT: rlwimi r10, r9, 5, 26, 26
+; CHECK-NEXT: vextubrx r9, r7, v4
+; CHECK-NEXT: rlwimi r10, r9, 6, 25, 25
+; CHECK-NEXT: vextubrx r9, r5, v4
+; CHECK-NEXT: plxv v4, .LCPI0_7 at PCREL(0), 1
+; CHECK-NEXT: rlwimi r10, r9, 7, 24, 24
+; CHECK-NEXT: vcmpequw v3, v3, v4
; CHECK-NEXT: vextubrx r9, r8, v3
-; CHECK-NEXT: rlwimi r4, r9, 12, 19, 19
-; CHECK-NEXT: vextubrx r9, r5, v3
-; CHECK-NEXT: rlwimi r4, r9, 13, 18, 18
+; CHECK-NEXT: vextubrx r5, r5, v3
+; CHECK-NEXT: rlwimi r10, r9, 8, 23, 23
; CHECK-NEXT: vextubrx r9, r6, v3
-; CHECK-NEXT: rlwimi r4, r9, 14, 17, 17
+; CHECK-NEXT: rlwimi r10, r9, 9, 22, 22
; CHECK-NEXT: vextubrx r9, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_4 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16
-; CHECK-NEXT: vcmpequw v3, v2, v3
-; CHECK-NEXT: vextubrx r10, r5, v3
-; CHECK-NEXT: vextubrx r9, r8, v3
-; CHECK-NEXT: rlwimi r9, r10, 1, 30, 30
-; CHECK-NEXT: vextubrx r10, r6, v3
-; CHECK-NEXT: rlwimi r9, r10, 2, 29, 29
-; CHECK-NEXT: vextubrx r10, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_5 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r9, r10, 3, 28, 28
-; CHECK-NEXT: vcmpequw v3, v2, v3
-; CHECK-NEXT: vextubrx r10, r8, v3
-; CHECK-NEXT: rlwimi r9, r10, 4, 27, 27
-; CHECK-NEXT: vextubrx r10, r5, v3
-; CHECK-NEXT: rlwimi r9, r10, 5, 26, 26
-; CHECK-NEXT: vextubrx r10, r6, v3
-; CHECK-NEXT: rlwimi r9, r10, 6, 25, 25
-; CHECK-NEXT: vextubrx r10, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_6 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r9, r10, 7, 24, 24
-; CHECK-NEXT: vcmpequw v3, v2, v3
-; CHECK-NEXT: vextubrx r10, r8, v3
-; CHECK-NEXT: rlwimi r9, r10, 8, 23, 23
-; CHECK-NEXT: vextubrx r10, r5, v3
-; CHECK-NEXT: rlwimi r9, r10, 9, 22, 22
-; CHECK-NEXT: vextubrx r10, r6, v3
-; CHECK-NEXT: rlwimi r9, r10, 10, 21, 21
-; CHECK-NEXT: vextubrx r10, r7, v3
-; CHECK-NEXT: plxv v3, .LCPI0_7 at PCREL(0), 1
-; CHECK-NEXT: rlwimi r9, r10, 11, 20, 20
-; CHECK-NEXT: vcmpequw v2, v2, v3
-; CHECK-NEXT: vextubrx r8, r8, v2
-; CHECK-NEXT: vextubrx r5, r5, v2
-; CHECK-NEXT: rlwimi r9, r8, 12, 19, 19
-; CHECK-NEXT: rlwimi r9, r5, 13, 18, 18
+; CHECK-NEXT: rlwimi r10, r9, 10, 21, 21
+; CHECK-NEXT: rlwimi r10, r5, 11, 20, 20
+; CHECK-NEXT: vextubrx r5, r8, v2
+; CHECK-NEXT: rlwimi r10, r5, 12, 19, 19
; CHECK-NEXT: vextubrx r5, r6, v2
-; CHECK-NEXT: rlwimi r9, r5, 14, 17, 17
+; CHECK-NEXT: rlwimi r10, r5, 13, 18, 18
; CHECK-NEXT: vextubrx r5, r7, v2
-; CHECK-NEXT: rlwimi r9, r5, 15, 0, 16
-; CHECK-NEXT: or r4, r9, r4
+; CHECK-NEXT: rlwimi r10, r5, 14, 17, 17
+; CHECK-NEXT: or r4, r4, r10
; CHECK-NEXT: andi. r4, r4, 65535
; CHECK-NEXT: iseleq r3, 0, r3
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/X86/bswap_tree2.ll b/llvm/test/CodeGen/X86/bswap_tree2.ll
index da238da46767d..ead7f4baec414 100644
--- a/llvm/test/CodeGen/X86/bswap_tree2.ll
+++ b/llvm/test/CodeGen/X86/bswap_tree2.ll
@@ -10,29 +10,23 @@
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $16711680, %ecx # imm = 0xFF0000
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: orl $-16777216, %edx # imm = 0xFF000000
-; CHECK-NEXT: shll $8, %ecx
-; CHECK-NEXT: shrl $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: shrl $16, %eax
-; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: movzwl %ax, %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: orl $-16777216, %ecx # imm = 0xFF000000
+; CHECK-NEXT: shrl $8, %ecx
+; CHECK-NEXT: andl $16711935, %eax # imm = 0xFF00FF
+; CHECK-NEXT: shll $8, %eax
+; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: test1:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: movl %edi, %ecx
-; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000
-; CHECK64-NEXT: movl %edi, %eax
+; CHECK64-NEXT: movzwl %di, %eax
+; CHECK64-NEXT: orl %edi, %eax
; CHECK64-NEXT: orl $-16777216, %eax # imm = 0xFF000000
-; CHECK64-NEXT: shll $8, %ecx
; CHECK64-NEXT: shrl $8, %eax
-; CHECK64-NEXT: orl %ecx, %eax
-; CHECK64-NEXT: bswapl %edi
-; CHECK64-NEXT: shrl $16, %edi
+; CHECK64-NEXT: andl $16711935, %edi # imm = 0xFF00FF
+; CHECK64-NEXT: shll $8, %edi
; CHECK64-NEXT: orl %edi, %eax
; CHECK64-NEXT: retq
%byte0 = and i32 %x, 255 ; 0x000000ff
diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 82b1afd4c93d7..1795cbda1e8c1 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -511,21 +511,18 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; X32-LABEL: or_tree_with_shifts_i32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: shll $16, %ecx
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shll $16, %eax
-; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
;
; X64-LABEL: or_tree_with_shifts_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: orl %edx, %edi
; X64-NEXT: shll $16, %edi
-; X64-NEXT: shll $16, %eax
; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: orl %esi, %eax
; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq
%a.shifted = shl i32 %a, 16
@@ -539,20 +536,19 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; X32-LABEL: xor_tree_with_shifts_i32:
; X32: # %bb.0:
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: shrl $16, %eax
+; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: xorl %ecx, %eax
; X32-NEXT: retl
;
; X64-LABEL: xor_tree_with_shifts_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: xorl %edx, %edi
; X64-NEXT: shrl $16, %edi
-; X64-NEXT: shrl $16, %eax
; X64-NEXT: xorl %ecx, %eax
-; X64-NEXT: xorl %esi, %eax
; X64-NEXT: xorl %edi, %eax
; X64-NEXT: retq
%a.shifted = lshr i32 %a, 16
@@ -575,12 +571,11 @@ define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
;
; X64-LABEL: and_tree_with_shifts_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edx, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl %edx, %edi
; X64-NEXT: sarl $16, %edi
-; X64-NEXT: sarl $16, %eax
; X64-NEXT: andl %ecx, %eax
; X64-NEXT: andl %edi, %eax
-; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%a.shifted = ashr i32 %a, 16
%c.shifted = ashr i32 %c, 16
@@ -593,25 +588,22 @@ define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) {
; X32-LABEL: logic_tree_with_shifts_var_i32:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: shll %cl, %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shll %cl, %eax
-; X32-NEXT: orl {{[0-9]+}}(%esp), %edx
; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: orl %edx, %eax
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
;
; X64-LABEL: logic_tree_with_shifts_var_i32:
; X64: # %bb.0:
; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: orl %edx, %edi
; X64-NEXT: movl %r8d, %ecx
; X64-NEXT: shll %cl, %edi
-; X64-NEXT: shll %cl, %edx
-; X64-NEXT: orl %edx, %eax
-; X64-NEXT: orl %edi, %eax
; X64-NEXT: orl %esi, %eax
+; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq
%a.shifted = shl i32 %a, %s
%c.shifted = shl i32 %c, %s
@@ -681,11 +673,10 @@ define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32
define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; X64-LABEL: or_tree_with_shifts_vec_i32:
; X64: # %bb.0:
-; X64-NEXT: pslld $16, %xmm0
-; X64-NEXT: pslld $16, %xmm2
-; X64-NEXT: por %xmm3, %xmm2
-; X64-NEXT: por %xmm1, %xmm2
; X64-NEXT: por %xmm2, %xmm0
+; X64-NEXT: pslld $16, %xmm0
+; X64-NEXT: por %xmm3, %xmm1
+; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: retq
%a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
%c.shifted = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
More information about the llvm-commits
mailing list