[llvm] 80b897e - [InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094)
Dávid Bolvanský via llvm-commits
llvm-commits at lists.llvm.org
Tue May 4 04:16:38 PDT 2021
Author: Dávid Bolvanský
Date: 2021-05-04T13:16:18+02:00
New Revision: 80b897e21bf0ac56b04d415cf9bf671f81a84416
URL: https://github.com/llvm/llvm-project/commit/80b897e21bf0ac56b04d415cf9bf671f81a84416
DIFF: https://github.com/llvm/llvm-project/commit/80b897e21bf0ac56b04d415cf9bf671f81a84416.diff
LOG: [InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094)
Original pattern: (__builtin_parity(x) ^ __builtin_parity(y))
LLVM rewrites it as: (__builtin_popcount(x) ^ __builtin_popcount(y)) & 1
Optimized form: __builtin_popcount(X^Y) & 1
Alive proof: https://alive2.llvm.org/ce/z/-GdWFr
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D101802
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
llvm/test/Transforms/InstCombine/ctpop.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 6175c921a06c..2b9c1f2ad3df 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -220,6 +220,17 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
return I;
+ Value *LHS, *RHS;
+ if (DemandedMask == 1 &&
+ match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
+ match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
+ // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ auto *Xor = Builder.CreateXor(LHS, RHS);
+ return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
+ }
+
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 9666eb552356..54d8ed99943b 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -386,10 +386,9 @@ define i32 @zext_ctpop_extra_use(i16 %x, i32* %q) {
define i32 @parity_xor(i32 %arg, i32 %arg1) {
; CHECK-LABEL: @parity_xor(
-; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]]
-; CHECK-NEXT: [[I2:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]]
-; CHECK-NEXT: [[I3:%.*]] = xor i32 [[I2]], [[I]]
-; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
+; CHECK-NEXT: [[I4:%.*]] = and i32 [[TMP2]], 1
; CHECK-NEXT: ret i32 [[I4]]
;
%i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -401,10 +400,9 @@ define i32 @parity_xor(i32 %arg, i32 %arg1) {
define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
; CHECK-LABEL: @parity_xor_trunc(
-; CHECK-NEXT: [[I:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG:%.*]]), !range [[RNG5:![0-9]+]]
-; CHECK-NEXT: [[I2:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG1:%.*]]), !range [[RNG5]]
-; CHECK-NEXT: [[I3:%.*]] = xor i64 [[I2]], [[I]]
-; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[I3]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]), !range [[RNG5:![0-9]+]]
+; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[I5:%.*]] = and i32 [[I4]], 1
; CHECK-NEXT: ret i32 [[I5]]
;
@@ -418,10 +416,9 @@ define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) {
; CHECK-LABEL: @parity_xor_vec(
-; CHECK-NEXT: [[I:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG:%.*]])
-; CHECK-NEXT: [[I2:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG1:%.*]])
-; CHECK-NEXT: [[I3:%.*]] = xor <2 x i32> [[I2]], [[I]]
-; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[I3]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[I4]]
;
%i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg)
More information about the llvm-commits
mailing list