[llvm] 2f217c1 - [InstCombine] Canonicalize ((X & -X) - 1) --> ((X - 1) & ~X) (PR51784)

Wed Aug 24 08:51:07 PDT 2022

Author: Simon Pilgrim
Date: 2022-08-24T16:50:43+01:00
New Revision: 2f217c12142639900a0d25c5d9ce67142f7bd840

URL: https://github.com/llvm/llvm-project/commit/2f217c12142639900a0d25c5d9ce67142f7bd840
DIFF: https://github.com/llvm/llvm-project/commit/2f217c12142639900a0d25c5d9ce67142f7bd840.diff

LOG: [InstCombine] Canonicalize ((X & -X) - 1) --> ((X - 1) & ~X) (PR51784)

Enables the ctpop((x & -x ) - 1) -> cttz(x, false) fold

Alive2: https://alive2.llvm.org/ce/z/EDk4h7 (((X & -X) - 1) --> (~X & (X - 1)) )

Alive2: https://alive2.llvm.org/ce/z/8Yr3XG (CTPOP -> CTTZ)

Fixes #51126

Differential Revision: https://reviews.llvm.org/D110488

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/add-mask-neg.ll
    llvm/test/Transforms/InstCombine/ctpop-cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 94f21dd3c6cc..8dfe72e36b72 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1415,6 +1415,17 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
     return &I;
   }
 
+  // Canonicalize ((A & -A) - 1) --> ((A - 1) & ~A)
+  // Forms all commutable operations, and simplifies ctpop -> cttz folds.
+  if (match(&I,
+            m_Add(m_OneUse(m_c_And(m_Value(A), m_OneUse(m_Neg(m_Deferred(A))))),
+                  m_AllOnes()))) {
+    Constant *AllOnes = ConstantInt::getAllOnesValue(RHS->getType());
+    Value *Dec = Builder.CreateAdd(A, AllOnes);
+    Value *Not = Builder.CreateXor(A, AllOnes);
+    return BinaryOperator::CreateAnd(Dec, Not);
+  }
+
   // TODO(jingyue): Consider willNotOverflowSignedAdd and
   // willNotOverflowUnsignedAdd to reduce the number of invocations of
   // computeKnownBits.

diff  --git a/llvm/test/Transforms/InstCombine/add-mask-neg.ll b/llvm/test/Transforms/InstCombine/add-mask-neg.ll
index 0fe1b743d7fd..5fad6155d348 100644
--- a/llvm/test/Transforms/InstCombine/add-mask-neg.ll
+++ b/llvm/test/Transforms/InstCombine/add-mask-neg.ll
@@ -2,14 +2,14 @@
 ; RUN: opt < %s -S -passes=instcombine | FileCheck %s
 
 ;
-; TODO: Canonicalize ((X & -X) - 1) --> (~X & (X - 1))
+; Canonicalize ((X & -X) - 1) --> ((X - 1) & ~X)
 ;
 
 define i32 @dec_mask_neg_i32(i32 %X) {
 ; CHECK-LABEL: @dec_mask_neg_i32(
-; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = and i32 [[NEG]], [[X]]
-; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[MASK]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[X]], -1
+; CHECK-NEXT:    [[DEC:%.*]] = and i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[DEC]]
 ;
   %neg = sub i32 0, %X
@@ -21,9 +21,9 @@ define i32 @dec_mask_neg_i32(i32 %X) {
 define i32 @dec_mask_commute_neg_i32(i32 %A) {
 ; CHECK-LABEL: @dec_mask_commute_neg_i32(
 ; CHECK-NEXT:    [[X:%.*]] = sdiv i32 42, [[A:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X]]
-; CHECK-NEXT:    [[MASK:%.*]] = and i32 [[X]], [[NEG]]
-; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[MASK]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[X]], -1
+; CHECK-NEXT:    [[DEC:%.*]] = and i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[DEC]]
 ;
   %X = sdiv i32 42, %A ; thwart complexity-based canonicalization
@@ -35,9 +35,9 @@ define i32 @dec_mask_commute_neg_i32(i32 %A) {
 
 define i32 @dec_commute_mask_neg_i32(i32 %X) {
 ; CHECK-LABEL: @dec_commute_mask_neg_i32(
-; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = and i32 [[NEG]], [[X]]
-; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[MASK]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[X]], -1
+; CHECK-NEXT:    [[DEC:%.*]] = and i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[DEC]]
 ;
   %neg = sub i32 0, %X
@@ -78,9 +78,9 @@ define i32 @dec_mask_multiuse_neg_i32(i32 %X) {
 
 define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) {
 ; CHECK-LABEL: @dec_mask_neg_v2i32(
-; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[NEG]], [[X]]
-; CHECK-NEXT:    [[DEC:%.*]] = add <2 x i32> [[MASK]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[DEC]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %X
@@ -91,9 +91,9 @@ define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) {
 
 define <2 x i32> @dec_mask_neg_v2i32_undef(<2 x i32> %X) {
 ; CHECK-LABEL: @dec_mask_neg_v2i32_undef(
-; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[NEG]], [[X]]
-; CHECK-NEXT:    [[DEC:%.*]] = add <2 x i32> [[MASK]], <i32 -1, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[DEC]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %X

diff  --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
index 84af8b37a7a6..ae3f1d49d762 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
@@ -94,11 +94,8 @@ define i32 @ctpop2_multiuse(i32 %0) {
 ; __builtin_popcount((i & -i) - 1) -> __builtin_cttz(i, false)
 define i32 @ctpop3(i32 %0) {
 ; CHECK-LABEL: @ctpop3(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], -1
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP4]]), !range [[RNG0]]
-; CHECK-NEXT:    ret i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %2 = sub i32 0, %0
   %3 = and i32 %2, %0
@@ -109,11 +106,8 @@ define i32 @ctpop3(i32 %0) {
 
 define <2 x i32> @ctpop3v(<2 x i32> %0) {
 ; CHECK-LABEL: @ctpop3v(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]])
-; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = sub <2 x i32> zeroinitializer, %0
   %3 = and <2 x i32> %2, %0
@@ -124,11 +118,8 @@ define <2 x i32> @ctpop3v(<2 x i32> %0) {
 
 define <2 x i32> @ctpop3v_undef(<2 x i32> %0) {
 ; CHECK-LABEL: @ctpop3v_undef(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], <i32 -1, i32 undef>
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]])
-; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = sub <2 x i32> zeroinitializer, %0
   %3 = and <2 x i32> %2, %0