[llvm] r364791 - [InstCombine] Shift amount reassociation in bittest (PR42399)

Mon Jul 1 08:55:15 PDT 2019

Author: lebedevri
Date: Mon Jul  1 08:55:15 2019
New Revision: 364791

URL: http://llvm.org/viewvc/llvm-project?rev=364791&view=rev
Log:
[InstCombine] Shift amount reassociation in bittest (PR42399)

Summary:
Given pattern:
`icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0`
we should move shifts to the same hand of 'and', i.e. rewrite as
`icmp eq/ne (and (x shift (Q+K)), y), 0`  iff `(Q+K) u< bitwidth(x)`

It might be tempting to not restrict this to situations where we know
we'd fold two shifts together, but i'm not sure what rules should there be
to avoid endless combine loops.

We pick the same shift that was originally used to shift the variable we picked to shift:
https://rise4fun.com/Alive/6x1v

Should fix [[ https://bugs.llvm.org/show_bug.cgi?id=42399 | PR42399]].

Reviewers: spatel, nikic, RKSimon

Reviewed By: spatel

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63829

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=364791&r1=364790&r2=364791&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Mon Jul  1 08:55:15 2019
@@ -3270,6 +3270,63 @@ foldICmpWithTruncSignExtendedVal(ICmpIns
   return T1;
 }
 
+// Given pattern:
+//   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+// we should move shifts to the same hand of 'and', i.e. rewrite as
+//   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
+// We are only interested in opposite logical shifts here.
+// If we can, we want to end up creating 'lshr' shift.
+static Value *
+foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
+                                           InstCombiner::BuilderTy &Builder) {
+  if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
+      !I.getOperand(0)->hasOneUse())
+    return nullptr;
+
+  auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
+  auto m_AnyLShr = m_LShr(m_Value(), m_Value());
+
+  // Look for an 'and' of two (opposite) logical shifts.
+  // Pick the single-use shift as XShift.
+  Value *XShift, *YShift;
+  if (!match(I.getOperand(0),
+             m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
+                     m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+    return nullptr;
+
+  // If YShift is a single-use 'lshr', swap the shifts around.
+  if (match(YShift, m_OneUse(m_AnyLShr)))
+    std::swap(XShift, YShift);
+
+  // The shifts must be in opposite directions.
+  Instruction::BinaryOps XShiftOpcode =
+      cast<BinaryOperator>(XShift)->getOpcode();
+  if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+    return nullptr; // Do not care about same-direction shifts here.
+
+  Value *X, *XShAmt, *Y, *YShAmt;
+  match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
+  match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+
+  // Can we fold (XShAmt+YShAmt) ?
+  Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
+                                  SQ.getWithInstruction(&I));
+  if (!NewShAmt)
+    return nullptr;
+  // Is the new shift amount smaller than the bit width?
+  // FIXME: could also rely on ConstantRange.
+  unsigned BitWidth = X->getType()->getScalarSizeInBits();
+  if (!match(NewShAmt, m_SpecificInt_ULT(APInt(BitWidth, BitWidth))))
+    return nullptr;
+  // All good, we can do this fold. The shift is the same that was for X.
+  Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
+                  ? Builder.CreateLShr(X, NewShAmt)
+                  : Builder.CreateShl(X, NewShAmt);
+  Value *T1 = Builder.CreateAnd(T0, Y);
+  return Builder.CreateICmp(I.getPredicate(), T1,
+                            Constant::getNullValue(X->getType()));
+}
+
 /// Try to fold icmp (binop), X or icmp X, (binop).
 /// TODO: A large part of this logic is duplicated in InstSimplify's
 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3625,6 +3682,9 @@ Instruction *InstCombiner::foldICmpBinOp
   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
+    return replaceInstUsesWith(I, V);
+
   return nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll?rev=364791&r1=364790&r2=364791&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll Mon Jul  1 08:55:15 2019
@@ -11,11 +11,10 @@
 
 define i1 @t0_const_lshr_shl_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t0_const_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -25,11 +24,10 @@ define i1 @t0_const_lshr_shl_ne(i32 %x,
 }
 define i1 @t1_const_shl_lshr_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t1_const_shl_lshr_ne(
-; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = shl i32 %x, 1
   %t1 = lshr i32 %y, 1
@@ -41,11 +39,10 @@ define i1 @t1_const_shl_lshr_ne(i32 %x,
 ; We are ok with 'eq' predicate too.
 define i1 @t2_const_lshr_shl_eq(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t2_const_lshr_shl_eq(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -58,13 +55,10 @@ define i1 @t2_const_lshr_shl_eq(i32 %x,
 
 define i1 @t3_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 ; CHECK-LABEL: @t3_const_after_fold_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[Y:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = sub i32 32, %len
   %t1 = lshr i32 %x, %t0
@@ -76,13 +70,10 @@ define i1 @t3_const_after_fold_lshr_shl_
 }
 define i1 @t4_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 ; CHECK-LABEL: @t4_const_after_fold_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[Y:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = sub i32 32, %len
   %t1 = shl i32 %x, %t0
@@ -128,11 +119,10 @@ define i1 @t6_const_shl_lshr_ne(i32 %x,
 
 define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t7_const_lshr_shl_ne_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 1, i32 1>
   %t1 = shl <2 x i32> %y, <i32 1, i32 1>
@@ -142,11 +132,10 @@ define <2 x i1> @t7_const_lshr_shl_ne_ve
 }
 define <2 x i1> @t8_const_lshr_shl_ne_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t8_const_lshr_shl_ne_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 1, i32 2>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 1, i32 2>
   %t1 = shl <2 x i32> %y, <i32 3, i32 4>
@@ -156,11 +145,10 @@ define <2 x i1> @t8_const_lshr_shl_ne_ve
 }
 define <3 x i1> @t9_const_lshr_shl_ne_vec_undef0(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t9_const_lshr_shl_ne_vec_undef0(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -170,11 +158,10 @@ define <3 x i1> @t9_const_lshr_shl_ne_ve
 }
 define <3 x i1> @t10_const_lshr_shl_ne_vec_undef1(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t10_const_lshr_shl_ne_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -184,11 +171,10 @@ define <3 x i1> @t10_const_lshr_shl_ne_v
 }
 define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_undef2(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -198,11 +184,10 @@ define <3 x i1> @t11_const_lshr_shl_ne_v
 }
 define <3 x i1> @t12_const_lshr_shl_ne_vec_undef3(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t12_const_lshr_shl_ne_vec_undef3(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -212,11 +197,10 @@ define <3 x i1> @t12_const_lshr_shl_ne_v
 }
 define <3 x i1> @t13_const_lshr_shl_ne_vec_undef4(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t13_const_lshr_shl_ne_vec_undef4(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -226,11 +210,10 @@ define <3 x i1> @t13_const_lshr_shl_ne_v
 }
 define <3 x i1> @t14_const_lshr_shl_ne_vec_undef5(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t14_const_lshr_shl_ne_vec_undef5(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -240,11 +223,10 @@ define <3 x i1> @t14_const_lshr_shl_ne_v
 }
 define <3 x i1> @t15_const_lshr_shl_ne_vec_undef6(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t15_const_lshr_shl_ne_vec_undef6(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -260,11 +242,10 @@ declare i32 @gen32()
 define i1 @t16_commutativity0(i32 %x) {
 ; CHECK-LABEL: @t16_commutativity0(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -277,11 +258,10 @@ define i1 @t16_commutativity0(i32 %x) {
 define i1 @t17_commutativity1(i32 %y) {
 ; CHECK-LABEL: @t17_commutativity1(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -299,10 +279,10 @@ define i1 @t18_const_oneuse0(i32 %x, i32
 ; CHECK-LABEL: @t18_const_oneuse0(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   call void @use32(i32 %t0)
@@ -313,12 +293,12 @@ define i1 @t18_const_oneuse0(i32 %x, i32
 }
 define i1 @t19_const_oneuse1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t19_const_oneuse1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -548,10 +528,10 @@ define i1 @t32_commutativity0_oneuse0(i3
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -564,12 +544,12 @@ define i1 @t32_commutativity0_oneuse0(i3
 define i1 @t33_commutativity0_oneuse1(i32 %x) {
 ; CHECK-LABEL: @t33_commutativity0_oneuse1(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -585,10 +565,10 @@ define i1 @t34_commutativity1_oneuse0(i3
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -601,12 +581,12 @@ define i1 @t34_commutativity1_oneuse0(i3
 define i1 @t35_commutativity1_oneuse1(i32 %y) {
 ; CHECK-LABEL: @t35_commutativity1_oneuse1(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1