[llvm] [InstCombine] Fold `ctpop(X) <u 2` into `ctpop(X) == 1` if X is non-zero (PR #67268)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 24 10:28:37 PDT 2023


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/67268

>From ae93a1ed893c10d950fa529d5933d92cc9259262 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 24 Sep 2023 23:19:25 +0800
Subject: [PATCH 1/2] [InstCombine] Simplify pattern `isPow2OrZero` if X is
 non-zero

---
 llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 9 ++++++++-
 llvm/test/Transforms/InstCombine/ispow2.ll              | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a219dac7acfbe16..9aafd83d42d0756 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3412,6 +3412,14 @@ static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
                                       const SimplifyQuery &Q) {
   assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop &&
          "Non-ctpop intrin in ctpop fold");
+
+  const ICmpInst::Predicate Pred = I.getPredicate();
+  // If we know X is non-zero, we can fold isPow2OrZero into isPow2.
+  if (Pred == ICmpInst::ICMP_ULT && CRhs == 2 &&
+      isKnownNonZero(CtpopLhs, Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT))
+    return ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, CtpopLhs,
+                            ConstantInt::get(CtpopLhs->getType(), 1));
+
   if (!CtpopLhs->hasOneUse())
     return nullptr;
 
@@ -3423,7 +3431,6 @@ static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
   // If we know any bit of X can be folded to:
   //    IsPow2       : X & (~Bit) == 0
   //    NotPow2      : X & (~Bit) != 0
-  const ICmpInst::Predicate Pred = I.getPredicate();
   if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) ||
       (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) {
     Value *Op = CtpopLhs->getArgOperand(0);
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index 356f23e1a01d3ff..90bba6aefac971d 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -198,7 +198,7 @@ define i1 @is_pow2_non_zero_ult_2(i32 %x) {
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[T0]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T0]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %notzero = icmp ne i32 %x, 0

>From 0b124db71b6a9ab622055c787be0c12e45e3a405 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 25 Sep 2023 01:25:32 +0800
Subject: [PATCH 2/2] [InstCombine] Fold pattern `ctpop(X) == 1` into `ctpop(X)
 <u 2` if X is non-zero

---
 llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 9 +++++----
 llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll        | 4 ++--
 llvm/test/Transforms/InstCombine/ispow2.ll              | 6 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9aafd83d42d0756..2aadc0a4bd7ee00 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3414,11 +3414,12 @@ static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
          "Non-ctpop intrin in ctpop fold");
 
   const ICmpInst::Predicate Pred = I.getPredicate();
-  // If we know X is non-zero, we can fold isPow2OrZero into isPow2.
-  if (Pred == ICmpInst::ICMP_ULT && CRhs == 2 &&
+  // If we know X is non-zero, we can fold `ctpop(X) == 1` into `ctpop(X) <u 2`
+  // since the latter gives better codegen.
+  if (Pred == ICmpInst::ICMP_EQ && CRhs == 1 &&
       isKnownNonZero(CtpopLhs, Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT))
-    return ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, CtpopLhs,
-                            ConstantInt::get(CtpopLhs->getType(), 1));
+    return ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, CtpopLhs,
+                            ConstantInt::get(CtpopLhs->getType(), 2));
 
   if (!CtpopLhs->hasOneUse())
     return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll b/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
index 224ea3cd76cc6d1..6ee38de4d7fb089 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
@@ -311,7 +311,7 @@ define i32 @pow2_32_nonconst_assume(i32 %x, i32 %y) {
 define i32 @pow2_32_gtnonconst_assume(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pow2_32_gtnonconst_assume(
 ; CHECK-NEXT:    [[CTPOP:%.*]] = call i32 @llvm.ctpop.i32(i32 [[Y:%.*]]), !range [[RNG0]]
-; CHECK-NEXT:    [[YP2:%.*]] = icmp eq i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[YP2:%.*]] = icmp ult i32 [[CTPOP]], 2
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[YP2]])
 ; CHECK-NEXT:    [[YGT:%.*]] = icmp ugt i32 [[Y]], [[X:%.*]]
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[YGT]])
@@ -518,7 +518,7 @@ define i32 @maybe_pow2_32_noncont(i32 %x, i32 %y) {
 ; CHECK-NEXT:    br i1 [[YGT8]], label [[CONT1:%.*]], label [[CONT2:%.*]]
 ; CHECK:       Cont1:
 ; CHECK-NEXT:    [[CTPOP:%.*]] = call i32 @llvm.ctpop.i32(i32 [[Y]]), !range [[RNG0]]
-; CHECK-NEXT:    [[YP2:%.*]] = icmp eq i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[YP2:%.*]] = icmp ult i32 [[CTPOP]], 2
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[YP2]])
 ; CHECK-NEXT:    br i1 true, label [[CONT2]], label [[FALSE:%.*]]
 ; CHECK:       Cont2:
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index 90bba6aefac971d..ef910ef0b51bd5a 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -198,7 +198,7 @@ define i1 @is_pow2_non_zero_ult_2(i32 %x) {
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[T0]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %notzero = icmp ne i32 %x, 0
@@ -213,7 +213,7 @@ define i1 @is_pow2_non_zero_eq_1(i32 %x) {
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[T0]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %notzero = icmp ne i32 %x, 0
@@ -1436,7 +1436,7 @@ define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) {
 ; CHECK-NEXT:    [[X:%.*]] = or i32 [[XIN:%.*]], 64
 ; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[CNT]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[CNT]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i32 [[CNT]], 2
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = or i32 %xin, 64



More information about the llvm-commits mailing list