[llvm] bfde861 - [InstCombine] convert mask and shift of power-of-2 to cmp+select

Fri Jun 17 07:52:16 PDT 2022

Author: Sanjay Patel
Date: 2022-06-17T10:51:57-04:00
New Revision: bfde8619355a06d9f1a199e9cb3f8d41aef5c05b

URL: https://github.com/llvm/llvm-project/commit/bfde8619355a06d9f1a199e9cb3f8d41aef5c05b
DIFF: https://github.com/llvm/llvm-project/commit/bfde8619355a06d9f1a199e9cb3f8d41aef5c05b.diff

LOG: [InstCombine] convert mask and shift of power-of-2 to cmp+select

When the mask is a power-of-2 constant and op0 is a shifted-power-of-2
constant, test if the shift amount equals the offset bit index:

(ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0
(ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0

This is an alternate to D127610 with a more general pattern.
We match only shift+and instead of the trailing xor, so we see a few
more tests diffs. I think we discussed this initially in D126617.

Here are proofs for shifts in both directions:
https://alive2.llvm.org/ce/z/CFrLs4

The test diffs look equal or better for IR, and this makes the
patterns more uniform in IR. The backend can partially invert this
in both cases if that is profitable. It is not trivially reversible,
however, so if we find perf regressions that are not easy to undo,
then we may want to revert this.

Differential Revision: https://reviews.llvm.org/D127801

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and.ll
    llvm/test/Transforms/InstCombine/icmp-and-shift.ll
    llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index af0efe12c16e7..cc06e2fd45dbf 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1914,6 +1914,23 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
       }
     }
 
+    // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2
+    // constant, test if the shift amount equals the offset bit index:
+    // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0
+    // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0
+    if (C->isPowerOf2() &&
+        match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) {
+      int Log2ShiftC = ShiftC->exactLogBase2();
+      int Log2C = C->exactLogBase2();
+      bool IsShiftLeft =
+         cast<BinaryOperator>(Op0)->getOpcode() == Instruction::Shl;
+      int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C;
+      assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask");
+      Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum));
+      return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C),
+                                ConstantInt::getNullValue(Ty));
+    }
+
     Constant *C1, *C2;
     const APInt *C3 = C;
     Value *X;

diff  --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index fcf1db2e4012c..6143e61c783be 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1876,8 +1876,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_undef3_vec(<3 x i16> %x) {
 
 define i16 @shl_lshr_pow2_const_case2(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 2
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 16, %x
@@ -1886,13 +1886,10 @@ define i16 @shl_lshr_pow2_const_case2(i16 %x) {
   ret i16 %r
 }
 
-; TODO: this pattern can be transform to icmp+select
-
 define i16 @shl_lshr_pow2_not_const_case2(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_not_const_case2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[TMP1]], 8
-; CHECK-NEXT:    [[R:%.*]] = xor i16 [[AND]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 2
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 0, i16 8
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 16, %x
@@ -1969,8 +1966,8 @@ define i16 @shl_lshr_pow2_const_negative_nopow2_2(i16 %x) {
 
 define i16 @lshr_lshr_pow2_const(i16 %x) {
 ; CHECK-LABEL: @lshr_lshr_pow2_const(
-; CHECK-NEXT:    [[LSHR2:%.*]] = lshr i16 32, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[LSHR2]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 3
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 4, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %lshr1 = lshr i16 2048, %x
@@ -2032,8 +2029,8 @@ define i16 @lshr_lshr_pow2_const_negative_overflow(i16 %x) {
 
 define i16 @lshr_shl_pow2_const_case1(i16 %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_case1(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 1024, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %lshr1 = lshr i16 256, %x
@@ -2042,13 +2039,10 @@ define i16 @lshr_shl_pow2_const_case1(i16 %x) {
   ret i16 %r
 }
 
-; TODO: this pattern can be transform to icmp+select
-
 define i16 @lshr_shl_pow2_const_xor(i16 %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_xor(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 1024, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[TMP1]], 8
-; CHECK-NEXT:    [[R:%.*]] = xor i16 [[AND]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i16 0, i16 8
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %lshr1 = lshr i16 256, %x

diff  --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
index 87b594231daf4..a67017fb81599 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
@@ -329,9 +329,9 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> <i32 2, i32 2>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[AND_LOBIT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
   %lshr = lshr <2 x i32> <i32 8, i32 8>, %0
   %and  = and <2 x i32> %lshr, <i32 4, i32 4>
@@ -342,10 +342,9 @@ define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) {
 
 define i32 @icmp_eq_and_pow2_lshr_pow2(i32 %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2(
-; CHECK-NEXT:    [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %lshr = lshr i32 8, %0
   %and  = and i32 %lshr, 4
@@ -367,10 +366,9 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> <i32 2, i32 2>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
   %lshr = lshr <2 x i32> <i32 8, i32 8>, %0
   %and  = and <2 x i32> %lshr, <i32 4, i32 4>
@@ -381,10 +379,9 @@ define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 
 define i32 @icmp_ne_and_pow2_lshr_pow2(i32 %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2(
-; CHECK-NEXT:    [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %lshr = lshr i32 8, %0
   %and  = and i32 %lshr, 4
@@ -406,9 +403,9 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> <i32 2, i32 2>, [[TMP0:%.*]]
-; CHECK-NEXT:    [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT:    ret <2 x i32> [[AND_LOBIT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
   %lshr = lshr <2 x i32> <i32 8, i32 8>, %0
   %and  = and <2 x i32> %lshr, <i32 4, i32 4>

diff  --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
index 1388e5a016cb7..66860981aacbc 100644
--- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
@@ -193,9 +193,8 @@ define i1 @scalar_i32_lshr_and_signbit_eq_X_is_constant1(i32 %y) {
 
 define i1 @scalar_i32_lshr_and_negC_eq_X_is_constant2(i32 %y) {
 ; CHECK-LABEL: @scalar_i32_lshr_and_negC_eq_X_is_constant2(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 -2147483648, [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt i32 [[LSHR]], -1
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %lshr = lshr i32 2147483648, %y
   %and = and i32 %lshr, 2147483648