[llvm] f4b5397 - [SDAG] fold bitwise logic with shifted operands

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 5 08:24:30 PST 2022


Author: Sanjay Patel
Date: 2022-03-05T11:14:45-05:00
New Revision: f4b53972ced23adbc5e6ef64ffd1c34f0f8f223f

URL: https://github.com/llvm/llvm-project/commit/f4b53972ced23adbc5e6ef64ffd1c34f0f8f223f
DIFF: https://github.com/llvm/llvm-project/commit/f4b53972ced23adbc5e6ef64ffd1c34f0f8f223f.diff

LOG: [SDAG] fold bitwise logic with shifted operands

This extends acb96ffd149d to 'and' and 'xor' opcodes.

Copying from that message:

LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z

https://alive2.llvm.org/ce/z/QmR9rR

This is a reassociation + factoring fold. The common shift operation is moved
after a bitwise logic op on 2 input operands.
We get simpler cases of these patterns in IR, but I suspect we would miss all
of these exact tests in IR too. We also handle the simpler form of this plus
several other folds in DAGCombiner::hoistLogicOpWithSameOpcodeHands().

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/logic-shift.ll
    llvm/test/CodeGen/X86/logic-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4f669f15f8ef3..299ba51d606d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5950,6 +5950,53 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
 }
 
+/// Given a bitwise logic operation N with a matching bitwise logic operand,
+/// fold a pattern where 2 of the source operands are identically shifted
+/// values. For example:
+/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
+static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
+                                 SelectionDAG &DAG) {
+  unsigned LogicOpcode = N->getOpcode();
+  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+          LogicOpcode == ISD::XOR)
+         && "Expected bitwise logic operation");
+
+  if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
+    return SDValue();
+
+  // Match another bitwise logic op and a shift.
+  unsigned ShiftOpcode = ShiftOp.getOpcode();
+  if (LogicOp.getOpcode() != LogicOpcode ||
+      !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
+        ShiftOpcode == ISD::SRA))
+    return SDValue();
+
+  // Match another shift op inside the first logic operand. Handle both commuted
+  // possibilities.
+  // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+  // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+  SDValue X1 = ShiftOp.getOperand(0);
+  SDValue Y = ShiftOp.getOperand(1);
+  SDValue X0, Z;
+  if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
+      LogicOp.getOperand(0).getOperand(1) == Y) {
+    X0 = LogicOp.getOperand(0).getOperand(0);
+    Z = LogicOp.getOperand(1);
+  } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
+             LogicOp.getOperand(1).getOperand(1) == Y) {
+    X0 = LogicOp.getOperand(1).getOperand(0);
+    Z = LogicOp.getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
+  SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
+  return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -6219,6 +6266,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
       return V;
 
+  if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+    return R;
+  if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+    return R;
+
   // Masking the negated extension of a boolean is just the zero-extended
   // boolean:
   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
@@ -6696,52 +6748,6 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
   return SDValue();
 }
 
-/// Given a bitwise logic operation N with a matching bitwise logic operand,
-/// fold a pattern where 2 of the source operands are identically shifted
-/// values. For example:
-/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
-static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
-                                 SelectionDAG &DAG) {
-  // TODO: This should be extended to allow AND/XOR.
-  assert(N->getOpcode() == ISD::OR && "Expected bitwise logic operation");
-
-  if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
-    return SDValue();
-
-  // Match another bitwise logic op and a shift.
-  unsigned LogicOpcode = N->getOpcode();
-  unsigned ShiftOpcode = ShiftOp.getOpcode();
-  if (LogicOp.getOpcode() != LogicOpcode ||
-      !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
-        ShiftOpcode == ISD::SRA))
-    return SDValue();
-
-  // Match another shift op inside the first logic operand. Handle both commuted
-  // possibilities.
-  // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
-  // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
-  SDValue X1 = ShiftOp.getOperand(0);
-  SDValue Y = ShiftOp.getOperand(1);
-  SDValue X0, Z;
-  if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
-      LogicOp.getOperand(0).getOperand(1) == Y) {
-    X0 = LogicOp.getOperand(0).getOperand(0);
-    Z = LogicOp.getOperand(1);
-  } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
-             LogicOp.getOperand(1).getOperand(1) == Y) {
-    X0 = LogicOp.getOperand(1).getOperand(0);
-    Z = LogicOp.getOperand(0);
-  } else {
-    return SDValue();
-  }
-
-  EVT VT = N->getValueType(0);
-  SDLoc DL(N);
-  SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
-  SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
-  return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
-}
-
 /// OR combines for which the commuted variant will be tried as well.
 static SDValue visitORCommutative(
     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
@@ -8394,6 +8400,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
       return V;
 
+  if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+    return R;
+  if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+    return R;
+
   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
   if (SDValue MM = unfoldMaskedMerge(N))
     return MM;

diff  --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
index 058458c4dcd55..7889bda08a4f6 100644
--- a/llvm/test/CodeGen/AArch64/logic-shift.ll
+++ b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -232,13 +232,11 @@ define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_lshr_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    and w9, w1, #0xff
+; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
-; CHECK-NEXT:    lsr w9, w9, w2
-; CHECK-NEXT:    eor w8, w8, w3
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = lshr i8 %x0, %y
   %sh2 = lshr i8 %x1, %y
@@ -250,10 +248,9 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK-LABEL: xor_lshr_commute1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, w2
-; CHECK-NEXT:    lsr w9, w1, w2
-; CHECK-NEXT:    eor w8, w3, w8
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    lsr w8, w8, w2
+; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = lshr i32 %x0, %y
   %sh2 = lshr i32 %x1, %y
@@ -266,10 +263,9 @@ define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
 ; CHECK-LABEL: xor_lshr_commute2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.8h, v2.8h
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ushl v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = lshr <8 x i16> %x0, %y
   %sh2 = lshr <8 x i16> %x1, %y
@@ -282,10 +278,9 @@ define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y,
 ; CHECK-LABEL: xor_lshr_commute3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.2d, v2.2d
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.2d, v0.2d, v2.2d
-; CHECK-NEXT:    ushl v1.2d, v1.2d, v2.2d
-; CHECK-NEXT:    eor v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = lshr <2 x i64> %x0, %y
   %sh2 = lshr <2 x i64> %x1, %y
@@ -297,13 +292,11 @@ define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y,
 define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: xor_ashr_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    sxth w9, w1
+; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
-; CHECK-NEXT:    asr w9, w9, w2
-; CHECK-NEXT:    eor w8, w8, w3
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = ashr i16 %x0, %y
   %sh2 = ashr i16 %x1, %y
@@ -315,10 +308,9 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK-LABEL: xor_ashr_commute1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    asr x8, x0, x2
-; CHECK-NEXT:    asr x9, x1, x2
-; CHECK-NEXT:    eor x8, x3, x8
-; CHECK-NEXT:    eor x0, x8, x9
+; CHECK-NEXT:    eor x8, x0, x1
+; CHECK-NEXT:    asr x8, x8, x2
+; CHECK-NEXT:    eor x0, x8, x3
 ; CHECK-NEXT:    ret
   %sh1 = ashr i64 %x0, %y
   %sh2 = ashr i64 %x1, %y
@@ -331,10 +323,9 @@ define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
 ; CHECK-LABEL: xor_ashr_commute2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.4s, v2.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sshl v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sshl v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = ashr <4 x i32> %x0, %y
   %sh2 = ashr <4 x i32> %x1, %y
@@ -347,10 +338,9 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
 ; CHECK-LABEL: xor_ashr_commute3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.16b, v2.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sshl v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    sshl v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    eor v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = ashr <16 x i8> %x0, %y
   %sh2 = ashr <16 x i8> %x1, %y
@@ -362,10 +352,9 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
 define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK-LABEL: xor_shl_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, w2
-; CHECK-NEXT:    lsl w9, w1, w2
-; CHECK-NEXT:    eor w8, w8, w3
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    lsl w8, w8, w2
+; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = shl i32 %x0, %y
   %sh2 = shl i32 %x1, %y
@@ -377,11 +366,10 @@ define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_shl_commute1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    lsl w8, w0, w2
-; CHECK-NEXT:    lsl w9, w1, w2
-; CHECK-NEXT:    eor w8, w3, w8
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    lsl w8, w8, w2
+; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = shl i8 %x0, %y
   %sh2 = shl i8 %x1, %y
@@ -393,10 +381,9 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: xor_shl_commute2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.2d, v0.2d, v2.2d
-; CHECK-NEXT:    ushl v1.2d, v1.2d, v2.2d
 ; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = shl <2 x i64> %x0, %y
   %sh2 = shl <2 x i64> %x1, %y
@@ -408,10 +395,9 @@ define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
 define <8 x i16> @xor_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: xor_shl_commute3:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; CHECK-NEXT:    eor v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = shl <8 x i16> %x0, %y
   %sh2 = shl <8 x i16> %x1, %y
@@ -474,13 +460,11 @@ define i64 @mix_logic_ashr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_lshr_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    and w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    and w9, w1, #0xff
+; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
-; CHECK-NEXT:    lsr w9, w9, w2
-; CHECK-NEXT:    and w8, w8, w3
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = lshr i8 %x0, %y
   %sh2 = lshr i8 %x1, %y
@@ -492,10 +476,9 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK-LABEL: and_lshr_commute1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, w2
-; CHECK-NEXT:    lsr w9, w1, w2
-; CHECK-NEXT:    and w8, w3, w8
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    and w8, w0, w1
+; CHECK-NEXT:    lsr w8, w8, w2
+; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = lshr i32 %x0, %y
   %sh2 = lshr i32 %x1, %y
@@ -508,10 +491,9 @@ define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
 ; CHECK-LABEL: and_lshr_commute2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.8h, v2.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ushl v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = lshr <8 x i16> %x0, %y
   %sh2 = lshr <8 x i16> %x1, %y
@@ -524,10 +506,9 @@ define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y,
 ; CHECK-LABEL: and_lshr_commute3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.2d, v2.2d
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.2d, v0.2d, v2.2d
-; CHECK-NEXT:    ushl v1.2d, v1.2d, v2.2d
-; CHECK-NEXT:    and v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = lshr <2 x i64> %x0, %y
   %sh2 = lshr <2 x i64> %x1, %y
@@ -539,13 +520,11 @@ define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y,
 define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: and_ashr_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    and w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    sxth w9, w1
+; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
-; CHECK-NEXT:    asr w9, w9, w2
-; CHECK-NEXT:    and w8, w8, w3
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = ashr i16 %x0, %y
   %sh2 = ashr i16 %x1, %y
@@ -557,10 +536,9 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK-LABEL: and_ashr_commute1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    asr x8, x0, x2
-; CHECK-NEXT:    asr x9, x1, x2
-; CHECK-NEXT:    and x8, x3, x8
-; CHECK-NEXT:    and x0, x8, x9
+; CHECK-NEXT:    and x8, x0, x1
+; CHECK-NEXT:    asr x8, x8, x2
+; CHECK-NEXT:    and x0, x8, x3
 ; CHECK-NEXT:    ret
   %sh1 = ashr i64 %x0, %y
   %sh2 = ashr i64 %x1, %y
@@ -573,10 +551,9 @@ define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
 ; CHECK-LABEL: and_ashr_commute2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.4s, v2.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sshl v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sshl v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = ashr <4 x i32> %x0, %y
   %sh2 = ashr <4 x i32> %x1, %y
@@ -589,10 +566,9 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
 ; CHECK-LABEL: and_ashr_commute3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.16b, v2.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sshl v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    sshl v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    and v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = ashr <16 x i8> %x0, %y
   %sh2 = ashr <16 x i8> %x1, %y
@@ -604,10 +580,9 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
 define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK-LABEL: and_shl_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, w2
-; CHECK-NEXT:    lsl w9, w1, w2
-; CHECK-NEXT:    and w8, w8, w3
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    and w8, w0, w1
+; CHECK-NEXT:    lsl w8, w8, w2
+; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = shl i32 %x0, %y
   %sh2 = shl i32 %x1, %y
@@ -619,11 +594,10 @@ define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_shl_commute1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    lsl w8, w0, w2
-; CHECK-NEXT:    lsl w9, w1, w2
-; CHECK-NEXT:    and w8, w3, w8
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    lsl w8, w8, w2
+; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = shl i8 %x0, %y
   %sh2 = shl i8 %x1, %y
@@ -635,10 +609,9 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: and_shl_commute2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.2d, v0.2d, v2.2d
-; CHECK-NEXT:    ushl v1.2d, v1.2d, v2.2d
 ; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = shl <2 x i64> %x0, %y
   %sh2 = shl <2 x i64> %x1, %y
@@ -650,10 +623,9 @@ define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
 define <8 x i16> @and_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: and_shl_commute3:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; CHECK-NEXT:    and v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = shl <8 x i16> %x0, %y
   %sh2 = shl <8 x i16> %x1, %y

diff  --git a/llvm/test/CodeGen/X86/logic-shift.ll b/llvm/test/CodeGen/X86/logic-shift.ll
index 1a413a44f99a8..829ed4f748dd3 100644
--- a/llvm/test/CodeGen/X86/logic-shift.ll
+++ b/llvm/test/CodeGen/X86/logic-shift.ll
@@ -290,10 +290,9 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrb %cl, %dil
+; CHECK-NEXT:    xorl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrb %cl, %sil
-; CHECK-NEXT:    xorb %sil, %al
+; CHECK-NEXT:    shrb %cl, %dil
 ; CHECK-NEXT:    xorb %dil, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -309,11 +308,10 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrl %cl, %edi
+; CHECK-NEXT:    xorl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %esi
-; CHECK-NEXT:    xorl %edi, %esi
-; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    shrl %cl, %edi
+; CHECK-NEXT:    xorl %edi, %eax
 ; CHECK-NEXT:    retq
   %sh1 = lshr i32 %x0, %y
   %sh2 = lshr i32 %x1, %y
@@ -325,17 +323,13 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: xor_lshr_commute2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; CHECK-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
-; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm4
-; CHECK-NEXT:    vpackusdw %xmm4, %xmm0, %xmm0
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; CHECK-NEXT:    vpsrlvd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT:    vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpxor %xmm1, %xmm3, %xmm1
 ; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %sh1 = lshr <8 x i16> %x0, %y
@@ -348,10 +342,9 @@ define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
 define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: xor_lshr_commute3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrlvq %xmm2, %xmm1, %xmm1
 ; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpxor %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = lshr <2 x i64> %x0, %y
   %sh2 = lshr <2 x i64> %x1, %y
@@ -365,13 +358,11 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %r8d
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    movswl %si, %eax
-; CHECK-NEXT:    movswl %di, %edx
-; CHECK-NEXT:    sarl %cl, %edx
+; CHECK-NEXT:    xorl %esi, %edi
+; CHECK-NEXT:    movswl %di, %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    sarl %cl, %eax
 ; CHECK-NEXT:    xorl %r8d, %eax
-; CHECK-NEXT:    xorl %edx, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %sh1 = ashr i16 %x0, %y
@@ -386,11 +377,10 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rcx, %rax
 ; CHECK-NEXT:    movq %rdx, %rcx
-; CHECK-NEXT:    sarq %cl, %rdi
+; CHECK-NEXT:    xorq %rsi, %rdi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT:    sarq %cl, %rsi
-; CHECK-NEXT:    xorq %rdi, %rsi
-; CHECK-NEXT:    xorq %rsi, %rax
+; CHECK-NEXT:    sarq %cl, %rdi
+; CHECK-NEXT:    xorq %rdi, %rax
 ; CHECK-NEXT:    retq
   %sh1 = ashr i64 %x0, %y
   %sh2 = ashr i64 %x1, %y
@@ -402,10 +392,9 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: xor_ashr_commute2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpxor %xmm1, %xmm3, %xmm1
 ; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = ashr <4 x i32> %x0, %y
   %sh2 = ashr <4 x i32> %x1, %y
@@ -417,49 +406,32 @@ define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
 define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
 ; CHECK-LABEL: xor_ashr_commute3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; CHECK-NEXT:    vpsraw $4, %xmm4, %xmm5
 ; CHECK-NEXT:    vpsllw $5, %xmm2, %xmm2
-; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; CHECK-NEXT:    vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
-; CHECK-NEXT:    vpsraw $2, %xmm4, %xmm5
-; CHECK-NEXT:    vpaddw %xmm6, %xmm6, %xmm7
-; CHECK-NEXT:    vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
-; CHECK-NEXT:    vpsraw $1, %xmm4, %xmm5
-; CHECK-NEXT:    vpaddw %xmm7, %xmm7, %xmm8
-; CHECK-NEXT:    vpblendvb %xmm8, %xmm5, %xmm4, %xmm4
-; CHECK-NEXT:    vpsrlw $8, %xmm4, %xmm9
-; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm5
-; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT:    vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm5
-; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm4
-; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm5
-; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm10
-; CHECK-NEXT:    vpblendvb %xmm10, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; CHECK-NEXT:    vpackuswb %xmm9, %xmm0, %xmm9
-; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; CHECK-NEXT:    vpsraw $4, %xmm5, %xmm0
-; CHECK-NEXT:    vpblendvb %xmm6, %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm5
-; CHECK-NEXT:    vpblendvb %xmm7, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm5
-; CHECK-NEXT:    vpblendvb %xmm8, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; CHECK-NEXT:    vpsraw $4, %xmm1, %xmm5
-; CHECK-NEXT:    vpblendvb %xmm2, %xmm5, %xmm1, %xmm1
-; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm2
-; CHECK-NEXT:    vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm2
-; CHECK-NEXT:    vpblendvb %xmm10, %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm5
+; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm5
+; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
 ; CHECK-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; CHECK-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0
-; CHECK-NEXT:    vpxor %xmm0, %xmm9, %xmm0
-; CHECK-NEXT:    vpxor %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm4
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm4
+; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm4
+; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = ashr <16 x i8> %x0, %y
   %sh2 = ashr <16 x i8> %x1, %y
@@ -473,10 +445,9 @@ define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shll %cl, %edi
+; CHECK-NEXT:    xorl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shll %cl, %esi
-; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    shll %cl, %edi
 ; CHECK-NEXT:    xorl %edi, %eax
 ; CHECK-NEXT:    retq
   %sh1 = shl i32 %x0, %y
@@ -491,11 +462,10 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shlb %cl, %dil
+; CHECK-NEXT:    xorl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shlb %cl, %sil
-; CHECK-NEXT:    xorb %dil, %sil
-; CHECK-NEXT:    xorb %sil, %al
+; CHECK-NEXT:    shlb %cl, %dil
+; CHECK-NEXT:    xorb %dil, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %sh1 = shl i8 %x0, %y
@@ -508,10 +478,9 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: xor_shl_commute2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpsllvq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpxor %xmm1, %xmm3, %xmm1
 ; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = shl <2 x i64> %x0, %y
   %sh2 = shl <2 x i64> %x1, %y
@@ -523,18 +492,13 @@ define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
 define <8 x i16> @xor_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: xor_shl_commute3:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; CHECK-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
-; CHECK-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; CHECK-NEXT:    vpshufb %ymm4, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; CHECK-NEXT:    vpsllvd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT:    vpshufb %ymm4, %ymm1, %ymm1
-; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpxor %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %sh1 = shl <8 x i16> %x0, %y
@@ -610,10 +574,9 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrb %cl, %dil
+; CHECK-NEXT:    andl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrb %cl, %sil
-; CHECK-NEXT:    andb %sil, %al
+; CHECK-NEXT:    shrb %cl, %dil
 ; CHECK-NEXT:    andb %dil, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -629,11 +592,10 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrl %cl, %edi
+; CHECK-NEXT:    andl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %esi
-; CHECK-NEXT:    andl %edi, %esi
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    shrl %cl, %edi
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %sh1 = lshr i32 %x0, %y
   %sh2 = lshr i32 %x1, %y
@@ -645,17 +607,13 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: and_lshr_commute2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; CHECK-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
-; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm4
-; CHECK-NEXT:    vpackusdw %xmm4, %xmm0, %xmm0
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; CHECK-NEXT:    vpsrlvd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT:    vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %sh1 = lshr <8 x i16> %x0, %y
@@ -668,10 +626,9 @@ define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
 define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: and_lshr_commute3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrlvq %xmm2, %xmm1, %xmm1
 ; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpand %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = lshr <2 x i64> %x0, %y
   %sh2 = lshr <2 x i64> %x1, %y
@@ -685,13 +642,11 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %r8d
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    movswl %si, %eax
-; CHECK-NEXT:    movswl %di, %edx
-; CHECK-NEXT:    sarl %cl, %edx
+; CHECK-NEXT:    andl %esi, %edi
+; CHECK-NEXT:    movswl %di, %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    sarl %cl, %eax
 ; CHECK-NEXT:    andl %r8d, %eax
-; CHECK-NEXT:    andl %edx, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %sh1 = ashr i16 %x0, %y
@@ -706,11 +661,10 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rcx, %rax
 ; CHECK-NEXT:    movq %rdx, %rcx
-; CHECK-NEXT:    sarq %cl, %rdi
+; CHECK-NEXT:    andq %rsi, %rdi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT:    sarq %cl, %rsi
-; CHECK-NEXT:    andq %rdi, %rsi
-; CHECK-NEXT:    andq %rsi, %rax
+; CHECK-NEXT:    sarq %cl, %rdi
+; CHECK-NEXT:    andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %sh1 = ashr i64 %x0, %y
   %sh2 = ashr i64 %x1, %y
@@ -722,10 +676,9 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: and_ashr_commute2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = ashr <4 x i32> %x0, %y
   %sh2 = ashr <4 x i32> %x1, %y
@@ -737,49 +690,32 @@ define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
 define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
 ; CHECK-LABEL: and_ashr_commute3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; CHECK-NEXT:    vpsraw $4, %xmm4, %xmm5
 ; CHECK-NEXT:    vpsllw $5, %xmm2, %xmm2
-; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; CHECK-NEXT:    vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
-; CHECK-NEXT:    vpsraw $2, %xmm4, %xmm5
-; CHECK-NEXT:    vpaddw %xmm6, %xmm6, %xmm7
-; CHECK-NEXT:    vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
-; CHECK-NEXT:    vpsraw $1, %xmm4, %xmm5
-; CHECK-NEXT:    vpaddw %xmm7, %xmm7, %xmm8
-; CHECK-NEXT:    vpblendvb %xmm8, %xmm5, %xmm4, %xmm4
-; CHECK-NEXT:    vpsrlw $8, %xmm4, %xmm9
-; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm5
-; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT:    vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm5
-; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm4
-; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm5
-; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm10
-; CHECK-NEXT:    vpblendvb %xmm10, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; CHECK-NEXT:    vpackuswb %xmm9, %xmm0, %xmm9
-; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; CHECK-NEXT:    vpsraw $4, %xmm5, %xmm0
-; CHECK-NEXT:    vpblendvb %xmm6, %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm5
-; CHECK-NEXT:    vpblendvb %xmm7, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm5
-; CHECK-NEXT:    vpblendvb %xmm8, %xmm5, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; CHECK-NEXT:    vpsraw $4, %xmm1, %xmm5
-; CHECK-NEXT:    vpblendvb %xmm2, %xmm5, %xmm1, %xmm1
-; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm2
-; CHECK-NEXT:    vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm2
-; CHECK-NEXT:    vpblendvb %xmm10, %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm5
+; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm5
+; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
 ; CHECK-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; CHECK-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0
-; CHECK-NEXT:    vpand %xmm0, %xmm9, %xmm0
-; CHECK-NEXT:    vpand %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm4
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm4
+; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm4
+; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = ashr <16 x i8> %x0, %y
   %sh2 = ashr <16 x i8> %x1, %y
@@ -793,10 +729,9 @@ define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shll %cl, %edi
+; CHECK-NEXT:    andl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shll %cl, %esi
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    shll %cl, %edi
 ; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %sh1 = shl i32 %x0, %y
@@ -811,11 +746,10 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shlb %cl, %dil
+; CHECK-NEXT:    andl %esi, %edi
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shlb %cl, %sil
-; CHECK-NEXT:    andb %dil, %sil
-; CHECK-NEXT:    andb %sil, %al
+; CHECK-NEXT:    shlb %cl, %dil
+; CHECK-NEXT:    andb %dil, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %sh1 = shl i8 %x0, %y
@@ -828,10 +762,9 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: and_shl_commute2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpsllvq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %sh1 = shl <2 x i64> %x0, %y
   %sh2 = shl <2 x i64> %x1, %y
@@ -843,18 +776,13 @@ define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
 define <8 x i16> @and_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: and_shl_commute3:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; CHECK-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
-; CHECK-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; CHECK-NEXT:    vpshufb %ymm4, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; CHECK-NEXT:    vpsllvd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT:    vpshufb %ymm4, %ymm1, %ymm1
-; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpand %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %sh1 = shl <8 x i16> %x0, %y


        


More information about the llvm-commits mailing list