[llvm] de7a6ae - [InstCombine] Optimize shl+lshr+and conversion pattern

Thu Jun 9 18:58:17 PDT 2022

Author: chenglin.bi
Date: 2022-06-10T09:36:58+08:00
New Revision: de7a6ae1ffc36baa16188d3dd5befa385d05700d

URL: https://github.com/llvm/llvm-project/commit/de7a6ae1ffc36baa16188d3dd5befa385d05700d
DIFF: https://github.com/llvm/llvm-project/commit/de7a6ae1ffc36baa16188d3dd5befa385d05700d.diff

LOG:  [InstCombine] Optimize shl+lshr+and conversion pattern

if `C1` and `C3` are pow2 and `Log2(C3)+C2 < BitWidth`:
    ((C1 << X) >> C2) & C3 -> X == (Log2(C3)+C2-Log2(C1)) ? C3 : 0;

https://alive2.llvm.org/ce/z/Pus5bd

Fix issue https://github.com/llvm/llvm-project/issues/55739

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D126617

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and.ll
    llvm/test/Transforms/InstCombine/icmp-and-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 01097a0fd0c11..d04cdd8e1ec26 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1904,6 +1904,30 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
         return new ZExtInst(NewBO, Ty);
       }
     }
+
+    Constant *C1, *C2;
+    const APInt *C3 = C;
+    Value *X;
+    if (C3->isPowerOf2() &&
+        match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)),
+                                   m_ImmConstant(C2)))) &&
+        match(C1, m_Power2())) {
+      Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);
+      Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros());
+      Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3);
+      KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr);
+      if (KnownLShrc.getMaxValue().ult(Width)) {
+        // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth:
+        // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0
+        Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1);
+        Value *Cmp = Builder.CreateICmpEQ(X, CmpC);
+        return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),
+                                  ConstantInt::getNullValue(Ty));
+      }
+      // TODO: Symmetrical case
+      // iff C1,C3 is pow2 and Log2(C3) >= C2:
+      // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0
+    }
   }
 
   if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),

diff  --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index c06074c79fc52..2a66b5a528cc2 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1778,9 +1778,8 @@ define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {
 
 define i16 @shl_lshr_pow2_const_case1(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i16 4, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr i16 [[SHL]], 6
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[LSHR]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 4, %x
@@ -1791,9 +1790,8 @@ define i16 @shl_lshr_pow2_const_case1(i16 %x) {
 
 define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> <i16 4, i16 4, i16 4>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 6, i16 6, i16 6>
-; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %shl = shl <3 x i16> <i16 4, i16 4, i16 4>, %x
@@ -1804,22 +1802,20 @@ define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {
 
 define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_non_uniform_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> <i16 16, i16 8, i16 4>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 4, i16 3>
-; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 16, i16 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 6, i16 1>
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
-  %shl = shl <3 x i16> <i16 16, i16 8, i16 4>, %x
-  %lshr = lshr <3 x i16> %shl, <i16 5, i16 4, i16 3>
-  %r = and <3 x i16> %lshr, <i16 8, i16 16, i16 4>
+  %shl = shl <3 x i16> <i16 2, i16 8, i16 32>, %x
+  %lshr = lshr <3 x i16> %shl, <i16 5, i16 6, i16 3>
+  %r = and <3 x i16> %lshr, <i16 8, i16 8, i16 8>
   ret <3 x i16> %r
 }
 
 define <3 x i16> @shl_lshr_pow2_const_case1_undef1_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef1_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> <i16 undef, i16 16, i16 16>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 5, i16 5>
-; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 8, i16 4, i16 4>
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %shl = shl <3 x i16> <i16 undef, i16 16, i16 16>, %x
@@ -1868,6 +1864,8 @@ define i16 @shl_lshr_pow2_const_case2(i16 %x) {
   ret i16 %r
 }
 
+; TODO: this pattern can be transform to icmp+select
+
 define i16 @shl_lshr_pow2_not_const_case2(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_not_const_case2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2, [[X:%.*]]

diff  --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
index 9c9f1a99e82dd..87b594231daf4 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
@@ -57,11 +57,9 @@ define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) {
 
 define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = lshr i32 [[SHL]], 4
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %shl = shl i32 2, %0
   %and = and i32 %shl, 16
@@ -72,11 +70,9 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 4, i32 4>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[SHL]], <i32 4, i32 4>
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %shl = shl <2 x i32> <i32 4, i32 4>, %0
   %and = and <2 x i32> %shl, <i32 16, i32 16>
@@ -87,9 +83,8 @@ define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {
 
 define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = lshr i32 [[SHL]], 4
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP0:%.*]], 3
+; CHECK-NEXT:    [[AND_LOBIT:%.*]] = zext i1 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[AND_LOBIT]]
 ;
   %shl = shl i32 2, %0
@@ -101,9 +96,8 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 4, i32 4>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[SHL]], <i32 4, i32 4>
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[AND_LOBIT:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[AND_LOBIT]]
 ;
   %shl = shl <2 x i32> <i32 4, i32 4>, %0