[llvm] 413c119 - [InstCombine] If there is a known-bit transform is_pow2 check to just check for any other bits
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 21 12:15:24 PDT 2023
Author: Noah Goldstein
Date: 2023-07-21T14:15:06-05:00
New Revision: 413c119c6a812f91e480c4a67df512e340c41ff3
URL: https://github.com/llvm/llvm-project/commit/413c119c6a812f91e480c4a67df512e340c41ff3
DIFF: https://github.com/llvm/llvm-project/commit/413c119c6a812f91e480c4a67df512e340c41ff3.diff
LOG: [InstCombine] If there is a known-bit transform is_pow2 check to just check for any other bits
in `ctpop(X) eq/ne 1` or `ctpop(X) ugt/ule 1`, if there is any
known-bit in `X`, instead of going through `ctpop`, we can just test
if there are any other known bits in `X`. If there are, `X` is not a
power of 2. If there aren't, `X` is a power of 2.
https://alive2.llvm.org/ce/z/eLMJgU
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D152677
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/test/Transforms/InstCombine/ispow2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e43c10cb533c7c..a79cc6bd62275c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3404,6 +3404,44 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
return nullptr;
}
+static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
+ const APInt &CRhs,
+ InstCombiner::BuilderTy &Builder,
+ const SimplifyQuery &Q) {
+ assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop &&
+ "Non-ctpop intrin in ctpop fold");
+ if (!CtpopLhs->hasOneUse())
+ return nullptr;
+
+ // Power of 2 test:
+ // isPow2OrZero : ctpop(X) u< 2
+ // isPow2 : ctpop(X) == 1
+ // NotPow2OrZero: ctpop(X) u> 1
+ // NotPow2 : ctpop(X) != 1
+ // If we know any bit of X can be folded to:
+ // IsPow2 : X & (~Bit) == 0
+ // NotPow2 : X & (~Bit) != 0
+ const ICmpInst::Predicate Pred = I.getPredicate();
+ if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) ||
+ (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) {
+ Value *Op = CtpopLhs->getArgOperand(0);
+ KnownBits OpKnown = computeKnownBits(Op, Q.DL,
+ /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
+ // No need to check for count > 1, that should be already constant folded.
+ if (OpKnown.countMinPopulation() == 1) {
+ Value *And = Builder.CreateAnd(
+ Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One)));
+ return new ICmpInst(
+ (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT)
+ ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_NE,
+ And, Constant::getNullValue(Op->getType()));
+ }
+ }
+
+ return nullptr;
+}
+
/// Fold an equality icmp with LLVM intrinsic and constant operand.
Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
@@ -3749,6 +3787,11 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
Pred, cast<SaturatingInst>(II), C, Builder))
return Folded;
break;
+ case Intrinsic::ctpop: {
+ const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
+ if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q))
+ return R;
+ } break;
}
if (Cmp.isEquality())
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index f253fed9ec54be..4d6e4327721000 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -1269,9 +1269,8 @@ define i1 @blsmsk_is_p2_or_z_fail_bad_cmp(i32 %x, i32 %z) {
declare <2 x i32> @llvm.ctpop.2xi32(<2 x i32>)
define i1 @is_pow2_nz_known_bits(i32 %xin) {
; CHECK-LABEL: @is_pow2_nz_known_bits(
-; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 64
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CNT]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[XIN:%.*]], -65
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%x = or i32 %xin, 64
@@ -1283,7 +1282,7 @@ define i1 @is_pow2_nz_known_bits(i32 %xin) {
define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) {
; CHECK-LABEL: @is_pow2_nz_known_bits_fail_multiuse(
; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 64
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]]
+; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]]
; CHECK-NEXT: call void @use.i32(i32 [[CNT]])
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CNT]], 1
; CHECK-NEXT: ret i1 [[R]]
@@ -1297,9 +1296,7 @@ define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) {
define i1 @not_pow2_nz_known_bits(i32 %xin) {
; CHECK-LABEL: @not_pow2_nz_known_bits(
-; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 1
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]]
-; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[CNT]], 1
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[XIN:%.*]], 1
; CHECK-NEXT: ret i1 [[R]]
;
%x = or i32 %xin, 1
@@ -1323,9 +1320,8 @@ define i1 @not_pow2_nz_known_bits_fail_not_p2_test(i32 %xin) {
define i1 @is_pow2_or_z_known_bits(i32 %xin) {
; CHECK-LABEL: @is_pow2_or_z_known_bits(
-; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], -2147483648
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]]
-; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[CNT]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[XIN:%.*]], 2147483647
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%x = or i32 %xin, 2147483648
@@ -1336,9 +1332,8 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) {
define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) {
; CHECK-LABEL: @not_pow2_or_z_known_bits(
-; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], <i32 64, i32 64>
-; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]]), !range [[RNG3]]
-; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i32> [[CNT]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], <i32 -65, i32 -65>
+; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%x = or <2 x i32> %xin, <i32 64, i32 64>
More information about the llvm-commits
mailing list