[llvm] r329492 - [InstCombine] Get rid of select of bittest (PR36950 / PR17564)

Sat Apr 7 03:37:24 PDT 2018

Author: lebedevri
Date: Sat Apr  7 03:37:24 2018
New Revision: 329492

URL: http://llvm.org/viewvc/llvm-project?rev=329492&view=rev
Log:
[InstCombine] Get rid of select of bittest (PR36950 / PR17564)

Summary:
See [[ https://bugs.llvm.org/show_bug.cgi?id=36950 | PR36950 ]], [[ https://bugs.llvm.org/show_bug.cgi?id=17564 | PR17564 ]], D45065, D45107
https://godbolt.org/g/iAYRup

Alive proof: https://rise4fun.com/Alive/uiH

Testing: `ninja check-llvm`

Reviewers: spatel, craig.topper

Reviewed By: spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D45108

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/select-of-bittest.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=329492&r1=329491&r2=329492&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Sat Apr  7 03:37:24 2018
@@ -397,6 +397,51 @@ Instruction *InstCombiner::foldSelectInt
 }
 
 /// We want to turn:
+///   (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1)
+/// into:
+///   zext (icmp ne i32 (and X, (or Y, (shl 1, Z))), 0)
+/// Note:
+///   Z may be 0 if lshr is missing.
+/// Worst case scenario is that we will replace 5 instructions with 5 different
+/// instructions, but we got rid of select.
+static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *IC,
+                                         Value *TrueVal, Value *FalseVal,
+                                         InstCombiner::BuilderTy &Builder) {
+  if (!(IC->hasOneUse() && IC->getOperand(0)->hasOneUse()))
+    return nullptr;
+
+  Value *X, *Y;
+  ICmpInst::Predicate EqPred;
+  if (!(match(IC, m_ICmp(EqPred, m_And(m_Value(X), m_Value(Y)), m_Zero())) &&
+        ICmpInst::Predicate::ICMP_EQ == EqPred && match(FalseVal, m_One())))
+    return nullptr;
+
+  // The TrueVal has general form of:
+  //   and %B, 1
+  Value *B;
+  if (!match(TrueVal, m_OneUse(m_And(m_Value(B), m_One()))))
+    return nullptr;
+
+  // Where %B can be one of:
+  //        %X
+  // or
+  //   lshr %X, %Z
+  // Where %Z may or may not be a constant.
+  Value *MaskB, *Z;
+  if (match(B, m_Specific(X))) {
+    MaskB = ConstantInt::get(SelType, 1);
+  } else if (match(B, m_OneUse(m_LShr(m_Specific(X), m_Value(Z))))) {
+    MaskB = Builder.CreateShl(ConstantInt::get(SelType, 1), Z);
+  } else
+    return nullptr;
+
+  Value *FullMask = Builder.CreateOr(Y, MaskB);
+  Value *MaskedX = Builder.CreateAnd(X, FullMask);
+  Value *ICmpNeZero = Builder.CreateIsNotNull(MaskedX);
+  return new ZExtInst(ICmpNeZero, SelType);
+}
+
+/// We want to turn:
 ///   (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
 /// into:
 ///   (or (shl (and X, C1), C3), Y)
@@ -863,6 +908,10 @@ Instruction *InstCombiner::foldSelectIns
     }
   }
 
+  if (Instruction *V =
+          foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
+    return V;
+
   if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 

Modified: llvm/trunk/test/Transforms/InstCombine/select-of-bittest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/select-of-bittest.ll?rev=329492&r1=329491&r2=329492&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/select-of-bittest.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/select-of-bittest.ll Sat Apr  7 03:37:24 2018
@@ -4,15 +4,12 @@
 ; https://bugs.llvm.org/show_bug.cgi?id=36950
 
 ; These all should be just and+icmp, there should be no select.
-; https://rise4fun.com/Alive/uiH
 
 define i32 @and_lshr_and(i32 %arg) {
 ; CHECK-LABEL: @and_lshr_and(
-; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[ARG]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARG:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %tmp = and i32 %arg, 1
@@ -25,11 +22,9 @@ define i32 @and_lshr_and(i32 %arg) {
 
 define <2 x i32> @and_lshr_and_splatvec(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_lshr_and_splatvec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[ARG]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %tmp = and <2 x i32> %arg, <i32 1, i32 1>
@@ -42,11 +37,9 @@ define <2 x i32> @and_lshr_and_splatvec(
 
 define <2 x i32> @and_lshr_and_vec_v0(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_lshr_and_vec_v0(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 1, i32 4>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[ARG]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %tmp = and <2 x i32> %arg, <i32 1, i32 4> ; mask is not splat
@@ -59,11 +52,9 @@ define <2 x i32> @and_lshr_and_vec_v0(<2
 
 define <2 x i32> @and_lshr_and_vec_v1(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_lshr_and_vec_v1(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[ARG]], <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %tmp = and <2 x i32> %arg, <i32 1, i32 1>
@@ -76,11 +67,9 @@ define <2 x i32> @and_lshr_and_vec_v1(<2
 
 define <2 x i32> @and_lshr_and_vec_v2(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_lshr_and_vec_v2(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 8, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[ARG]], <i32 2, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 12, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %tmp = and <2 x i32> %arg, <i32 8, i32 1> ; mask is not splat
@@ -110,10 +99,9 @@ define <3 x i32> @and_lshr_and_vec_undef
 
 define i32 @and_and(i32 %arg) {
 ; CHECK-LABEL: @and_and(
-; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 2
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARG:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %tmp = and i32 %arg, 2
@@ -125,10 +113,9 @@ define i32 @and_and(i32 %arg) {
 
 define <2 x i32> @and_and_splatvec(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_and_splatvec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[ARG]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %tmp = and <2 x i32> %arg, <i32 2, i32 2>
@@ -140,10 +127,9 @@ define <2 x i32> @and_and_splatvec(<2 x
 
 define <2 x i32> @and_and_vec(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_and_vec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 6, i32 2>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[ARG]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 7, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %tmp = and <2 x i32> %arg, <i32 6, i32 2> ; mask is not splat
@@ -174,11 +160,10 @@ define <3 x i32> @and_and_vec_undef(<3 x
 
 define i32 @f_var0(i32 %arg, i32 %arg1) {
 ; CHECK-LABEL: @f_var0(
-; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[ARG]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i32 [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[ARG1:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i1 [[TMP3]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %tmp = and i32 %arg, %arg1
@@ -191,11 +176,10 @@ define i32 @f_var0(i32 %arg, i32 %arg1)
 
 define <2 x i32> @f_var0_splatvec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var0_splatvec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[ARG]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP4]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
 ;
   %tmp = and <2 x i32> %arg, %arg1
@@ -208,11 +192,10 @@ define <2 x i32> @f_var0_splatvec(<2 x i
 
 define <2 x i32> @f_var0_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var0_vec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[ARG]], <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP4]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 2, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
 ;
   %tmp = and <2 x i32> %arg, %arg1
@@ -242,10 +225,10 @@ define <3 x i32> @f_var0_vec_undef(<3 x
 
 define i32 @f_var1(i32 %arg, i32 %arg1) {
 ; CHECK-LABEL: @f_var1(
-; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[ARG]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[ARG1:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %tmp = and i32 %arg, %arg1
@@ -257,10 +240,10 @@ define i32 @f_var1(i32 %arg, i32 %arg1)
 
 define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var1_vec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[ARG]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %tmp = and <2 x i32> %arg, %arg1
@@ -291,11 +274,11 @@ define <3 x i32> @f_var1_vec_undef(<3 x
 
 define i32 @f_var2(i32 %arg, i32 %arg1) {
 ; CHECK-LABEL: @f_var2(
-; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[ARG]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i32 [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 1, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %tmp = and i32 %arg, 1
@@ -308,11 +291,11 @@ define i32 @f_var2(i32 %arg, i32 %arg1)
 
 define <2 x i32> @f_var2_splatvec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var2_splatvec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP4]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
 ;
   %tmp = and <2 x i32> %arg, <i32 1, i32 1>
@@ -325,11 +308,11 @@ define <2 x i32> @f_var2_splatvec(<2 x i
 
 define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var2_vec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 2, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP4]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
 ;
   %tmp = and <2 x i32> %arg, <i32 2, i32 1>; mask is not splat
@@ -363,11 +346,11 @@ define <3 x i32> @f_var2_vec_undef(<3 x
 
 define i32 @f_var3(i32 %arg, i32 %arg1, i32 %arg2) {
 ; CHECK-LABEL: @f_var3(
-; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[ARG]], [[ARG2:%.*]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 1, [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP4]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP6]]
 ;
   %tmp = and i32 %arg, %arg1
@@ -380,11 +363,11 @@ define i32 @f_var3(i32 %arg, i32 %arg1,
 
 define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %arg2) {
 ; CHECK-LABEL: @f_var3_splatvec(
-; CHECK-NEXT:    [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[ARG]], [[ARG2:%.*]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i32> [[TMP4]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP6]]
 ;
   %tmp = and <2 x i32> %arg, %arg1