[llvm] 80b897e - [InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094)

Tue May 4 04:16:38 PDT 2021

Author: Dávid Bolvanský
Date: 2021-05-04T13:16:18+02:00
New Revision: 80b897e21bf0ac56b04d415cf9bf671f81a84416

URL: https://github.com/llvm/llvm-project/commit/80b897e21bf0ac56b04d415cf9bf671f81a84416
DIFF: https://github.com/llvm/llvm-project/commit/80b897e21bf0ac56b04d415cf9bf671f81a84416.diff

LOG: [InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094)

Original pattern: (__builtin_parity(x) ^ __builtin_parity(y))

LLVM rewrites it as: (__builtin_popcount(x) ^ __builtin_popcount(y)) & 1

Optimized form:  __builtin_popcount(X^Y) & 1

Alive proof: https://alive2.llvm.org/ce/z/-GdWFr

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D101802

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/test/Transforms/InstCombine/ctpop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 6175c921a06c..2b9c1f2ad3df 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -220,6 +220,17 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
         SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
       return I;
+    Value *LHS, *RHS;
+    if (DemandedMask == 1 &&
+        match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
+        match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
+      // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
+      IRBuilderBase::InsertPointGuard Guard(Builder);
+      Builder.SetInsertPoint(I);
+      auto *Xor = Builder.CreateXor(LHS, RHS);
+      return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
+    }
+
     assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
     assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
 

diff  --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 9666eb552356..54d8ed99943b 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -386,10 +386,9 @@ define i32 @zext_ctpop_extra_use(i16 %x, i32* %q) {
 
 define i32 @parity_xor(i32 %arg, i32 %arg1) {
 ; CHECK-LABEL: @parity_xor(
-; CHECK-NEXT:    [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]]
-; CHECK-NEXT:    [[I2:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]]
-; CHECK-NEXT:    [[I3:%.*]] = xor i32 [[I2]], [[I]]
-; CHECK-NEXT:    [[I4:%.*]] = and i32 [[I3]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
+; CHECK-NEXT:    [[I4:%.*]] = and i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i32 [[I4]]
 ;
   %i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -401,10 +400,9 @@ define i32 @parity_xor(i32 %arg, i32 %arg1) {
 
 define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
 ; CHECK-LABEL: @parity_xor_trunc(
-; CHECK-NEXT:    [[I:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG:%.*]]), !range [[RNG5:![0-9]+]]
-; CHECK-NEXT:    [[I2:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG1:%.*]]), !range [[RNG5]]
-; CHECK-NEXT:    [[I3:%.*]] = xor i64 [[I2]], [[I]]
-; CHECK-NEXT:    [[I4:%.*]] = trunc i64 [[I3]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]), !range [[RNG5:![0-9]+]]
+; CHECK-NEXT:    [[I4:%.*]] = trunc i64 [[TMP2]] to i32
 ; CHECK-NEXT:    [[I5:%.*]] = and i32 [[I4]], 1
 ; CHECK-NEXT:    ret i32 [[I5]]
 ;
@@ -418,10 +416,9 @@ define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
 
 define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @parity_xor_vec(
-; CHECK-NEXT:    [[I:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG:%.*]])
-; CHECK-NEXT:    [[I2:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG1:%.*]])
-; CHECK-NEXT:    [[I3:%.*]] = xor <2 x i32> [[I2]], [[I]]
-; CHECK-NEXT:    [[I4:%.*]] = and <2 x i32> [[I3]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    [[I4:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[I4]]
 ;
   %i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg)