[llvm] r363956 - [InstCombine] canonicalize check for power-of-2

Thu Jun 20 10:41:15 PDT 2019

Author: spatel
Date: Thu Jun 20 10:41:15 2019
New Revision: 363956

URL: http://llvm.org/viewvc/llvm-project?rev=363956&view=rev
Log:
[InstCombine] canonicalize check for power-of-2

The form that compares against 0 is better because:
1. It removes a use of the input value.
2. It's the more standard form for this pattern: https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
3. It results in equal or better codegen (tested with x86, AArch64, ARM, PowerPC, MIPS).

This is a root cause for PR42314, but probably doesn't completely answer the codegen request:
https://bugs.llvm.org/show_bug.cgi?id=42314

Alive proof:
https://rise4fun.com/Alive/9kG

  Name: is power-of-2
  %neg = sub i32 0, %x
  %a = and i32 %neg, %x
  %r = icmp eq i32 %a, %x
  =>
  %dec = add i32 %x, -1
  %a2 = and i32 %dec, %x
  %r = icmp eq i32 %a2, 0

  Name: is not power-of-2
  %neg = sub i32 0, %x
  %a = and i32 %neg, %x
  %r = icmp ne i32 %a, %x
  =>
  %dec = add i32 %x, -1
  %a2 = and i32 %dec, %x
  %r = icmp ne i32 %a2, 0

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/trunk/test/Transforms/InstCombine/ispow2.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=363956&r1=363955&r2=363956&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Thu Jun 20 10:41:15 2019
@@ -3830,6 +3830,26 @@ Instruction *InstCombiner::foldICmpEqual
        match(Op1, m_BitReverse(m_Value(B)))))
     return new ICmpInst(Pred, A, B);
 
+  // Canonicalize checking for a power-of-2-or-zero value:
+  // (A & -A) == A --> (A & (A - 1)) == 0
+  // (-A & A) == A --> (A & (A - 1)) == 0
+  // A == (A & -A) --> (A & (A - 1)) == 0
+  // A == (-A & A) --> (A & (A - 1)) == 0
+  // TODO: This could be reduced by using the popct intrinsic.
+  A = nullptr;
+  if (match(Op0, m_OneUse(m_c_And(m_OneUse(m_Neg(m_Specific(Op1))),
+                                  m_Specific(Op1)))))
+    A = Op1;
+  else if (match(Op1, m_OneUse(m_c_And(m_OneUse(m_Neg(m_Specific(Op0))),
+                                       m_Specific(Op0)))))
+    A = Op0;
+  if (A) {
+    Type *Ty = A->getType();
+    Value *Dec = Builder.CreateAdd(A, ConstantInt::getAllOnesValue(Ty));
+    Value *And = Builder.CreateAnd(A, Dec);
+    return new ICmpInst(Pred, And, ConstantInt::getNullValue(Ty));
+  }
+
   return nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/ispow2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/ispow2.ll?rev=363956&r1=363955&r2=363956&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/ispow2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/ispow2.ll Thu Jun 20 10:41:15 2019
@@ -3,9 +3,9 @@
 
 define i1 @is_pow2or0_negate_op(i32 %x) {
 ; CHECK-LABEL: @is_pow2or0_negate_op(
-; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[NEG]], [[X]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %neg = sub i32 0, %x
@@ -16,9 +16,9 @@ define i1 @is_pow2or0_negate_op(i32 %x)
 
 define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @is_pow2or0_negate_op_vec(
-; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[NEG]], [[X]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %x
@@ -55,9 +55,9 @@ define <2 x i1> @is_pow2or0_decrement_op
 
 define i1 @isnot_pow2or0_negate_op(i32 %x) {
 ; CHECK-LABEL: @isnot_pow2or0_negate_op(
-; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[NEG]], [[X]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %neg = sub i32 0, %x
@@ -68,9 +68,9 @@ define i1 @isnot_pow2or0_negate_op(i32 %
 
 define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @isnot_pow2or0_negate_op_vec(
-; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[NEG]], [[X]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %x
@@ -108,9 +108,9 @@ define <2 x i1> @isnot_pow2or0_decrement
 define i1 @is_pow2or0_negate_op_commute1(i32 %p) {
 ; CHECK-LABEL: @is_pow2or0_negate_op_commute1(
 ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[P:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X]], [[NEG]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %x = srem i32 42, %p ; thwart complexity-based canonicalization
@@ -125,9 +125,9 @@ define i1 @is_pow2or0_negate_op_commute1
 define i1 @isnot_pow2or0_negate_op_commute2(i32 %p) {
 ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute2(
 ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[P:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X]], [[NEG]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X]], [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %x = urem i32 42, %p ; thwart complexity-based canonicalization
@@ -140,9 +140,9 @@ define i1 @isnot_pow2or0_negate_op_commu
 define i1 @isnot_pow2or0_negate_op_commute3(i32 %p) {
 ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute3(
 ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[P:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X]], [[NEG]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X]], [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %x = urem i32 42, %p ; thwart complexity-based canonicalization