[llvm] 72d1419 - [InstCombine] Add CTPOP -> CTTZ simplifications (PR43513)

Mon Mar 23 04:05:08 PDT 2020

Author: Simon Pilgrim
Date: 2020-03-23T11:04:33Z
New Revision: 72d1419bfba2a1c4d842ab57849a22d9260a1bb6

URL: https://github.com/llvm/llvm-project/commit/72d1419bfba2a1c4d842ab57849a22d9260a1bb6
DIFF: https://github.com/llvm/llvm-project/commit/72d1419bfba2a1c4d842ab57849a22d9260a1bb6.diff

LOG: [InstCombine] Add CTPOP -> CTTZ simplifications (PR43513)

As detailed on PR43513, we can simplify:

ctpop(x | -x) -> bitwidth - cttz(x, false)
Alive2: http://volta.cs.utah.edu:8080/z/caw49X

ctpop(~x & (x - 1)) -> cttz(x, false)
Alive2: http://volta.cs.utah.edu:8080/z/5zfVrx

I've tweaked the initial test cases I added at rG2d712fb75584 to increase commutativity testing.

Differential Revision: https://reviews.llvm.org/D76568

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/ctpop-cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d4fc6594f28d..b8d899df65ad 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1322,19 +1322,38 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
 static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
   assert(II.getIntrinsicID() == Intrinsic::ctpop &&
          "Expected ctpop intrinsic");
+  Type *Ty = II.getType();
+  unsigned BitWidth = Ty->getScalarSizeInBits();
   Value *Op0 = II.getArgOperand(0);
   Value *X;
+
   // ctpop(bitreverse(x)) -> ctpop(x)
   // ctpop(bswap(x)) -> ctpop(x)
   if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
     return IC.replaceOperand(II, 0, X);
 
+  // ctpop(x | -x) -> bitwidth - cttz(x, false)
+  if (match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
+    Function *F =
+        Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
+    auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
+    auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
+    return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
+  }
+
+  // ctpop(~x & (x - 1)) -> cttz(x, false)
+  if (match(Op0,
+            m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
+    Function *F =
+        Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
+    return CallInst::Create(F, {X, IC.Builder.getFalse()});
+  }
+
   // FIXME: Try to simplify vectors of integers.
-  auto *IT = dyn_cast<IntegerType>(Op0->getType());
+  auto *IT = dyn_cast<IntegerType>(Ty);
   if (!IT)
     return nullptr;
 
-  unsigned BitWidth = IT->getBitWidth();
   KnownBits Known(BitWidth);
   IC.computeKnownBits(Op0, Known, 0, &II);
 

diff  --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
index 52e7ef5eb3c1..15244d212dfb 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
@@ -8,14 +8,11 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 ; __builtin_popcount(i | -i) -> 32 - __builtin_cttz(i, false)
 define i32 @ctpop1(i32 %0) {
 ; CHECK-LABEL: @ctpop1(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP3]]), !range !0
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw i32 32, [[TMP4]]
-; CHECK-NEXT:    ret i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %2 = sub i32 0, %0
-  %3 = or i32 %2, %0
+  %3 = or i32 %0, %2
   %4 = tail call i32 @llvm.ctpop.i32(i32 %3)
   %5 = sub i32 32, %4
   ret i32 %5
@@ -23,31 +20,25 @@ define i32 @ctpop1(i32 %0) {
 
 define <2 x i32> @ctpop1v(<2 x i32> %0) {
 ; CHECK-LABEL: @ctpop1v(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <2 x i32> <i32 32, i32 32>, [[TMP4]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> <i32 32, i32 32>, [[TMP2]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = sub <2 x i32> zeroinitializer, %0
   %3 = or <2 x i32> %2, %0
   %4 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %3)
-  %5 = sub <2 x i32> <i32 32, i32 32>, %4
-  ret <2 x i32> %5
+  ret <2 x i32> %4
 }
 
 ; PR43513
 ; __builtin_popcount(~i & (i-1)) -> __builtin_cttz(i, false)
 define i32 @ctpop2(i32 %0) {
 ; CHECK-LABEL: @ctpop2(
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP0:%.*]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP4]]), !range !0
-; CHECK-NEXT:    ret i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %2 = xor i32 %0, -1
-  %3 = add i32 %0, -1
+  %3 = sub i32 %0, 1
   %4 = and i32 %3, %2
   %5 = tail call i32 @llvm.ctpop.i32(i32 %4)
   ret i32 %5
@@ -55,15 +46,12 @@ define i32 @ctpop2(i32 %0) {
 
 define <2 x i32> @ctpop2v(<2 x i32> %0) {
 ; CHECK-LABEL: @ctpop2v(
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP0:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP0]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]])
-; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = xor <2 x i32> %0, <i32 -1, i32 -1>
   %3 = add <2 x i32> %0, <i32 -1, i32 -1>
-  %4 = and <2 x i32> %3, %2
+  %4 = and <2 x i32> %2, %3
   %5 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %4)
   ret <2 x i32> %5
 }