[llvm] 413c119 - [InstCombine] If there is a known-bit transform is_pow2 check to just check for any other bits

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 21 12:15:24 PDT 2023


Author: Noah Goldstein
Date: 2023-07-21T14:15:06-05:00
New Revision: 413c119c6a812f91e480c4a67df512e340c41ff3

URL: https://github.com/llvm/llvm-project/commit/413c119c6a812f91e480c4a67df512e340c41ff3
DIFF: https://github.com/llvm/llvm-project/commit/413c119c6a812f91e480c4a67df512e340c41ff3.diff

LOG: [InstCombine] If there is a known-bit transform is_pow2 check to just check for any other bits

in `ctpop(X) eq/ne 1` or `ctpop(X) ugt/ule 1`, if there is any
known-bit in `X`, instead of going through `ctpop`, we can just test
if there are any other known bits in `X`. If there are, `X` is not a
power of 2. If there aren't, `X` is a power of 2.

https://alive2.llvm.org/ce/z/eLMJgU

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D152677

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/ispow2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e43c10cb533c7c..a79cc6bd62275c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3404,6 +3404,44 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
   return nullptr;
 }
 
+static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
+                                      const APInt &CRhs,
+                                      InstCombiner::BuilderTy &Builder,
+                                      const SimplifyQuery &Q) {
+  assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop &&
+         "Non-ctpop intrin in ctpop fold");
+  if (!CtpopLhs->hasOneUse())
+    return nullptr;
+
+  // Power of 2 test:
+  //    isPow2OrZero : ctpop(X) u< 2
+  //    isPow2       : ctpop(X) == 1
+  //    NotPow2OrZero: ctpop(X) u> 1
+  //    NotPow2      : ctpop(X) != 1
+  // If we know any bit of X can be folded to:
+  //    IsPow2       : X & (~Bit) == 0
+  //    NotPow2      : X & (~Bit) != 0
+  const ICmpInst::Predicate Pred = I.getPredicate();
+  if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) ||
+      (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) {
+    Value *Op = CtpopLhs->getArgOperand(0);
+    KnownBits OpKnown = computeKnownBits(Op, Q.DL,
+                                         /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
+    // No need to check for count > 1, that should be already constant folded.
+    if (OpKnown.countMinPopulation() == 1) {
+      Value *And = Builder.CreateAnd(
+          Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One)));
+      return new ICmpInst(
+          (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT)
+              ? ICmpInst::ICMP_EQ
+              : ICmpInst::ICMP_NE,
+          And, Constant::getNullValue(Op->getType()));
+    }
+  }
+
+  return nullptr;
+}
+
 /// Fold an equality icmp with LLVM intrinsic and constant operand.
 Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
     ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
@@ -3749,6 +3787,11 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
             Pred, cast<SaturatingInst>(II), C, Builder))
       return Folded;
     break;
+  case Intrinsic::ctpop: {
+    const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
+    if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q))
+      return R;
+  } break;
   }
 
   if (Cmp.isEquality())

diff  --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index f253fed9ec54be..4d6e4327721000 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -1269,9 +1269,8 @@ define i1 @blsmsk_is_p2_or_z_fail_bad_cmp(i32 %x, i32 %z) {
 declare <2 x i32> @llvm.ctpop.2xi32(<2 x i32>)
 define i1 @is_pow2_nz_known_bits(i32 %xin) {
 ; CHECK-LABEL: @is_pow2_nz_known_bits(
-; CHECK-NEXT:    [[X:%.*]] = or i32 [[XIN:%.*]], 64
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[CNT]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[XIN:%.*]], -65
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = or i32 %xin, 64
@@ -1283,7 +1282,7 @@ define i1 @is_pow2_nz_known_bits(i32 %xin) {
 define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) {
 ; CHECK-LABEL: @is_pow2_nz_known_bits_fail_multiuse(
 ; CHECK-NEXT:    [[X:%.*]] = or i32 [[XIN:%.*]], 64
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]]
+; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[CNT]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[CNT]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
@@ -1297,9 +1296,7 @@ define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) {
 
 define i1 @not_pow2_nz_known_bits(i32 %xin) {
 ; CHECK-LABEL: @not_pow2_nz_known_bits(
-; CHECK-NEXT:    [[X:%.*]] = or i32 [[XIN:%.*]], 1
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[CNT]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i32 [[XIN:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = or i32 %xin, 1
@@ -1323,9 +1320,8 @@ define i1 @not_pow2_nz_known_bits_fail_not_p2_test(i32 %xin) {
 
 define i1 @is_pow2_or_z_known_bits(i32 %xin) {
 ; CHECK-LABEL: @is_pow2_or_z_known_bits(
-; CHECK-NEXT:    [[X:%.*]] = or i32 [[XIN:%.*]], -2147483648
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i32 [[CNT]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[XIN:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = or i32 %xin, 2147483648
@@ -1336,9 +1332,8 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) {
 
 define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) {
 ; CHECK-LABEL: @not_pow2_or_z_known_bits(
-; CHECK-NEXT:    [[X:%.*]] = or <2 x i32> [[XIN:%.*]], <i32 64, i32 64>
-; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]]), !range [[RNG3]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i32> [[CNT]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], <i32 -65, i32 -65>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %x = or <2 x i32> %xin, <i32 64, i32 64>


        


More information about the llvm-commits mailing list