[PATCH] D101802: [InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094)

Mon May 3 17:04:25 PDT 2021

xbolva00 created this revision.
xbolva00 added reviewers: RKSimon, spatel.
Herald added a subscriber: hiraditya.
xbolva00 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Original pattern: (__builtin_parity(x) ^ __builtin_parity(y))

LLVM rewrites it as: (__builtin_popcount(x) ^ __builtin_popcount(y)) & 1

Optimized form:  __builtin_popcount(X^Y) & 1

Alive proof: https://alive2.llvm.org/ce/z/-GdWFr


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D101802

Files:
  llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
  llvm/test/Transforms/InstCombine/ctpop.ll


Index: llvm/test/Transforms/InstCombine/ctpop.ll
===================================================================

--- llvm/test/Transforms/InstCombine/ctpop.ll
+++ llvm/test/Transforms/InstCombine/ctpop.ll
@@ -386,10 +386,9 @@
 
 define i32 @parity_xor(i32 %arg, i32 %arg1) {
 ; CHECK-LABEL: @parity_xor(
-; CHECK-NEXT:    [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]]
-; CHECK-NEXT:    [[I2:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]]
-; CHECK-NEXT:    [[I3:%.*]] = xor i32 [[I2]], [[I]]
-; CHECK-NEXT:    [[I4:%.*]] = and i32 [[I3]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
+; CHECK-NEXT:    [[I4:%.*]] = and i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i32 [[I4]]
 ;
   %i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -401,10 +400,9 @@
 
 define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
 ; CHECK-LABEL: @parity_xor_trunc(
-; CHECK-NEXT:    [[I:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG:%.*]]), !range [[RNG5:![0-9]+]]
-; CHECK-NEXT:    [[I2:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG1:%.*]]), !range [[RNG5]]
-; CHECK-NEXT:    [[I3:%.*]] = xor i64 [[I2]], [[I]]
-; CHECK-NEXT:    [[I4:%.*]] = trunc i64 [[I3]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]), !range [[RNG5:![0-9]+]]
+; CHECK-NEXT:    [[I4:%.*]] = trunc i64 [[TMP2]] to i32
 ; CHECK-NEXT:    [[I5:%.*]] = and i32 [[I4]], 1
 ; CHECK-NEXT:    ret i32 [[I5]]
 ;
@@ -418,10 +416,9 @@
 
 define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @parity_xor_vec(
-; CHECK-NEXT:    [[I:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG:%.*]])
-; CHECK-NEXT:    [[I2:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG1:%.*]])
-; CHECK-NEXT:    [[I3:%.*]] = xor <2 x i32> [[I2]], [[I]]
-; CHECK-NEXT:    [[I4:%.*]] = and <2 x i32> [[I3]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    [[I4:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[I4]]
 ;
   %i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg)
Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -220,6 +220,17 @@
     if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
         SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
       return I;
+    Value *LHS, *RHS;
+    if (DemandedMask == 1 &&
+        match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
+        match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
+      // ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1
+      IRBuilderBase::InsertPointGuard Guard(Builder);
+      Builder.SetInsertPoint(I);
+      auto *Xor = Builder.CreateXor(LHS, RHS);
+      return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
+    }
+
     assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
     assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101802.342595.patch
Type: text/x-patch
Size: 3473 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210504/566ad7be/attachment.bin>