[llvm] d4ec8ea - [InstCombine] ctpop(X) + ctpop(Y) => ctpop(X | Y) if X and Y have no common bits (PR48999)

Dávid Bolvanský via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 24 08:52:20 PDT 2021


Author: Dávid Bolvanský
Date: 2021-04-24T17:52:10+02:00
New Revision: d4ec8ea19cb1c944f70b584b2bdb8a12ea875261

URL: https://github.com/llvm/llvm-project/commit/d4ec8ea19cb1c944f70b584b2bdb8a12ea875261
DIFF: https://github.com/llvm/llvm-project/commit/d4ec8ea19cb1c944f70b584b2bdb8a12ea875261.diff

LOG: [InstCombine] ctpop(X) + ctpop(Y) => ctpop(X | Y) if X and Y have no common bits (PR48999)

For example:

```
int src(unsigned int a, unsigned int b)
{
    return __builtin_popcount(a << 16) + __builtin_popcount(b >> 16);
}

int tgt(unsigned int a, unsigned int b)
{
    return __builtin_popcount((a << 16)  | (b >> 16));
}
```

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D101210

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/ctpop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 9573d1d9fee5..48808db30446 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1448,6 +1448,14 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
         Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B}));
   }
 
+  // ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.
+  if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&
+      match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&
+      haveNoCommonBitsSet(A, B, DL, &AC, &I, &DT))
+    return replaceInstUsesWith(
+        I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
+                                   {Builder.CreateOr(A, B)}));
+
   return Changed ? &I : nullptr;
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 92e15a98f74d..9a238d27fd31 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -203,12 +203,9 @@ define i32 @ctpop_add(i32 %a, i32 %b) {
 
 define i32 @ctpop_add_no_common_bits(i32 %a, i32 %b) {
 ; CHECK-LABEL: @ctpop_add_no_common_bits(
-; CHECK-NEXT:    [[SHL16:%.*]] = shl i32 [[B:%.*]], 16
-; CHECK-NEXT:    [[CTPOP1:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[SHL16]]), !range [[RNG3:![0-9]+]]
-; CHECK-NEXT:    [[LSHL16:%.*]] = lshr i32 [[B]], 16
-; CHECK-NEXT:    [[CTPOP2:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[LSHL16]]), !range [[RNG3]]
-; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw i32 [[CTPOP1]], [[CTPOP2]]
-; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[B:%.*]], i32 [[B]], i32 16)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %shl16 = shl i32 %b, 16
   %ctpop1 = tail call i32 @llvm.ctpop.i32(i32 %shl16)
@@ -220,12 +217,9 @@ define i32 @ctpop_add_no_common_bits(i32 %a, i32 %b) {
 
 define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @ctpop_add_no_common_bits_vec(
-; CHECK-NEXT:    [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[CTPOP1:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]])
-; CHECK-NEXT:    [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[CTPOP2:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]])
-; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]]
-; CHECK-NEXT:    ret <2 x i32> [[RES]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> <i32 16, i32 16>)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %shl16 = shl <2 x i32> %a, <i32 16, i32 16>
   %ctpop1 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shl16)


        


More information about the llvm-commits mailing list