[llvm] 72d1419 - [InstCombine] Add CTPOP -> CTTZ simplifications (PR43513)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 23 04:05:08 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-23T11:04:33Z
New Revision: 72d1419bfba2a1c4d842ab57849a22d9260a1bb6
URL: https://github.com/llvm/llvm-project/commit/72d1419bfba2a1c4d842ab57849a22d9260a1bb6
DIFF: https://github.com/llvm/llvm-project/commit/72d1419bfba2a1c4d842ab57849a22d9260a1bb6.diff
LOG: [InstCombine] Add CTPOP -> CTTZ simplifications (PR43513)
As detailed on PR43513, we can simplify:
ctpop(x | -x) -> bitwidth - cttz(x, false)
Alive2: http://volta.cs.utah.edu:8080/z/caw49X
ctpop(~x & (x - 1)) -> cttz(x, false)
Alive2: http://volta.cs.utah.edu:8080/z/5zfVrx
I've tweaked the initial test cases I added at rG2d712fb75584 to increase commutativity testing.
Differential Revision: https://reviews.llvm.org/D76568
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/ctpop-cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d4fc6594f28d..b8d899df65ad 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1322,19 +1322,38 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
assert(II.getIntrinsicID() == Intrinsic::ctpop &&
"Expected ctpop intrinsic");
+ Type *Ty = II.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
Value *Op0 = II.getArgOperand(0);
Value *X;
+
// ctpop(bitreverse(x)) -> ctpop(x)
// ctpop(bswap(x)) -> ctpop(x)
if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+ // ctpop(x | -x) -> bitwidth - cttz(x, false)
+ if (match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
+ Function *F =
+ Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
+ auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
+ auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
+ return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
+ }
+
+ // ctpop(~x & (x - 1)) -> cttz(x, false)
+ if (match(Op0,
+ m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
+ Function *F =
+ Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
+ return CallInst::Create(F, {X, IC.Builder.getFalse()});
+ }
+
// FIXME: Try to simplify vectors of integers.
- auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ auto *IT = dyn_cast<IntegerType>(Ty);
if (!IT)
return nullptr;
- unsigned BitWidth = IT->getBitWidth();
KnownBits Known(BitWidth);
IC.computeKnownBits(Op0, Known, 0, &II);
diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
index 52e7ef5eb3c1..15244d212dfb 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
@@ -8,14 +8,11 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
; __builtin_popcount(i | -i) -> 32 - __builtin_cttz(i, false)
define i32 @ctpop1(i32 %0) {
; CHECK-LABEL: @ctpop1(
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP3]]), !range !0
-; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i32 32, [[TMP4]]
-; CHECK-NEXT: ret i32 [[TMP5]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range !0
+; CHECK-NEXT: ret i32 [[TMP2]]
;
%2 = sub i32 0, %0
- %3 = or i32 %2, %0
+ %3 = or i32 %0, %2
%4 = tail call i32 @llvm.ctpop.i32(i32 %3)
%5 = sub i32 32, %4
ret i32 %5
@@ -23,31 +20,25 @@ define i32 @ctpop1(i32 %0) {
define <2 x i32> @ctpop1v(<2 x i32> %0) {
; CHECK-LABEL: @ctpop1v(
-; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <2 x i32> <i32 32, i32 32>, [[TMP4]]
-; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> <i32 32, i32 32>, [[TMP2]]
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%2 = sub <2 x i32> zeroinitializer, %0
%3 = or <2 x i32> %2, %0
%4 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %3)
- %5 = sub <2 x i32> <i32 32, i32 32>, %4
- ret <2 x i32> %5
+ ret <2 x i32> %4
}
; PR43513
; __builtin_popcount(~i & (i-1)) -> __builtin_cttz(i, false)
define i32 @ctpop2(i32 %0) {
; CHECK-LABEL: @ctpop2(
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP0:%.*]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], -1
-; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP4]]), !range !0
-; CHECK-NEXT: ret i32 [[TMP5]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range !0
+; CHECK-NEXT: ret i32 [[TMP2]]
;
%2 = xor i32 %0, -1
- %3 = add i32 %0, -1
+ %3 = sub i32 %0, 1
%4 = and i32 %3, %2
%5 = tail call i32 @llvm.ctpop.i32(i32 %4)
ret i32 %5
@@ -55,15 +46,12 @@ define i32 @ctpop2(i32 %0) {
define <2 x i32> @ctpop2v(<2 x i32> %0) {
; CHECK-LABEL: @ctpop2v(
-; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP0:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], <i32 -1, i32 -1>
-; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]])
-; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%2 = xor <2 x i32> %0, <i32 -1, i32 -1>
%3 = add <2 x i32> %0, <i32 -1, i32 -1>
- %4 = and <2 x i32> %3, %2
+ %4 = and <2 x i32> %2, %3
%5 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %4)
ret <2 x i32> %5
}
More information about the llvm-commits
mailing list