[llvm] [InstCombine] Fold the `log2_ceil` idiom (PR #76661)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 6 10:03:57 PST 2024


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/76661

>From a3404fb4a585525b2626f860e34563664e3d31b2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 31 Dec 2023 23:13:26 +0800
Subject: [PATCH 1/3] [InstCombine] Add pre-commit tests. NFC.

---
 .../InstCombine/fold-log2-ceil-idiom.ll       | 236 ++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll

diff --git a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
new file mode 100644
index 00000000000000..84069470b3827b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @log2_ceil_idiom(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i5 @log2_ceil_idiom_trunc(i32 %x) {
+; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5
+; CHECK-NEXT:    [[XOR:%.*]] = xor i5 [[TRUNC]], -1
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i5
+; CHECK-NEXT:    [[RET:%.*]] = add i5 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i5 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %trunc = trunc i32 %ctlz to i5
+  %xor = xor i5 %trunc, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i5
+  %ret = add i5 %xor, %zext
+  ret i5 %ret
+}
+
+define i64 @log2_ceil_idiom_zext(i32 %x) {
+; CHECK-LABEL: define i64 @log2_ceil_idiom_zext(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg i32 [[XOR]] to i64
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i64
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i64 [[EXT]], [[ZEXT]]
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ext = zext nneg i32 %xor to i64
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i64
+  %ret = add i64 %ext, %zext
+  ret i64 %ret
+}
+
+define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_power2_test2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_commuted(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_commuted(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %zext, %xor
+  ret i32 %ret
+}
+
+; Negative tests
+
+define i32 @log2_ceil_idiom_x_may_be_zero(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_x_may_be_zero(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i4 @log2_ceil_idiom_trunc_too_short(i32 %x) {
+; CHECK-LABEL: define i4 @log2_ceil_idiom_trunc_too_short(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i4
+; CHECK-NEXT:    [[XOR:%.*]] = xor i4 [[TRUNC]], -1
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i4
+; CHECK-NEXT:    [[RET:%.*]] = add i4 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i4 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %trunc = trunc i32 %ctlz to i4
+  %xor = xor i4 %trunc, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i4
+  %ret = add i4 %xor, %zext
+  ret i4 %ret
+}
+
+define i32 @log2_ceil_idiom_mismatched_operands(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_mismatched_operands(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[Y]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %y)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_wrong_constant(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_wrong_constant(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 30
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 30
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_not_a_power2_test1(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_not_a_power2_test2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 2
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 2
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.ctpop.i32(i32)
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 33}
+;.

>From 8f79898dc9a11ff5157b4965281528ef32d4bf6b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 31 Dec 2023 23:19:02 +0800
Subject: [PATCH 2/3] [InstCombine] Fold the log2_ceil idiom

---
 .../InstCombine/InstCombineAddSub.cpp         | 22 +++++++++
 .../InstCombine/fold-log2-ceil-idiom.ll       | 49 +++++++------------
 2 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 96b612254ca500..11b2a69703779a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1723,6 +1723,28 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
         I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
                                    {Builder.CreateOr(A, B)}));
 
+  // Fold the log2_ceil idiom:
+  // zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) --> BW - ctlz(A - 1)
+  // TODO: Add one-use checks?
+  const APInt *XorC;
+  if (match(
+          &I,
+          m_c_Add(m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+                                m_One())),
+                  m_ZExtOrSelf(m_Xor(m_TruncOrSelf(m_Intrinsic<Intrinsic::ctlz>(
+                                         m_Deferred(A), m_One())),
+                                     m_APInt(XorC))))) &&
+      (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+      *XorC == A->getType()->getScalarSizeInBits() - 1) {
+    Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+    Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+                                          {Sub, Builder.getFalse()});
+    Value *Ret = Builder.CreateSub(
+        ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+        Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+    return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
+  }
+
   if (Instruction *Res = foldSquareSumInt(I))
     return Res;
 
diff --git a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
index 84069470b3827b..7e5977d67195a4 100644
--- a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
+++ b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
@@ -4,12 +4,9 @@
 define i32 @log2_ceil_idiom(i32 %x) {
 ; CHECK-LABEL: define i32 @log2_ceil_idiom(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
-; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -24,13 +21,10 @@ define i32 @log2_ceil_idiom(i32 %x) {
 define i5 @log2_ceil_idiom_trunc(i32 %x) {
 ; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5
-; CHECK-NEXT:    [[XOR:%.*]] = xor i5 [[TRUNC]], -1
-; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i5
-; CHECK-NEXT:    [[RET:%.*]] = add i5 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw i32 0, [[TMP2]]
+; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[TMP3]] to i5
 ; CHECK-NEXT:    ret i5 [[RET]]
 ;
   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -46,13 +40,10 @@ define i5 @log2_ceil_idiom_trunc(i32 %x) {
 define i64 @log2_ceil_idiom_zext(i32 %x) {
 ; CHECK-LABEL: define i64 @log2_ceil_idiom_zext(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
-; CHECK-NEXT:    [[EXT:%.*]] = zext nneg i32 [[XOR]] to i64
-; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i64
-; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i64 [[EXT]], [[ZEXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT:    [[RET:%.*]] = zext i32 [[TMP3]] to i64
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -68,12 +59,9 @@ define i64 @log2_ceil_idiom_zext(i32 %x) {
 define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
 ; CHECK-LABEL: define i32 @log2_ceil_idiom_power2_test2(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
-; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[CTPOP]], 1
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -88,12 +76,9 @@ define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
 define i32 @log2_ceil_idiom_commuted(i32 %x) {
 ; CHECK-LABEL: define i32 @log2_ceil_idiom_commuted(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
-; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)

>From 9a36cbf926e556a6817aa28c506c9def0de45f24 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 7 Jan 2024 01:52:49 +0800
Subject: [PATCH 3/3] [InstCombine] Add one-use constraints for folding of
 log2-ceil idioms

---
 .../InstCombine/InstCombineAddSub.cpp         |  20 +--
 .../InstCombine/fold-log2-ceil-idiom.ll       | 116 ++++++++++++++++++
 2 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 11b2a69703779a..c7e6f32c5406a6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1724,16 +1724,18 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
                                    {Builder.CreateOr(A, B)}));
 
   // Fold the log2_ceil idiom:
-  // zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) --> BW - ctlz(A - 1)
-  // TODO: Add one-use checks?
+  // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
+  // -->
+  // BW - ctlz(A - 1, false)
   const APInt *XorC;
-  if (match(
-          &I,
-          m_c_Add(m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
-                                m_One())),
-                  m_ZExtOrSelf(m_Xor(m_TruncOrSelf(m_Intrinsic<Intrinsic::ctlz>(
-                                         m_Deferred(A), m_One())),
-                                     m_APInt(XorC))))) &&
+  if (match(&I,
+            m_c_Add(
+                m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+                              m_One())),
+                m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
+                    m_OneUse(m_TruncOrSelf(m_OneUse(
+                        m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
+                    m_APInt(XorC))))))) &&
       (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
       *XorC == A->getType()->getScalarSizeInBits() - 1) {
     Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
diff --git a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
index 7e5977d67195a4..2594c3fce81464 100644
--- a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
+++ b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
@@ -90,6 +90,26 @@ define i32 @log2_ceil_idiom_commuted(i32 %x) {
   ret i32 %ret
 }
 
+define i32 @log2_ceil_idiom_multiuse1(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    call void @use32(i32 [[CTPOP]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  call void @use32(i32 %ctpop)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
 ; Negative tests
 
 define i32 @log2_ceil_idiom_x_may_be_zero(i32 %x) {
@@ -214,6 +234,102 @@ define i32 @log2_ceil_idiom_not_a_power2_test2(i32 %x) {
   ret i32 %ret
 }
 
+define i32 @log2_ceil_idiom_multiuse2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    call void @use32(i32 [[CTLZ]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  call void @use32(i32 %ctlz)
+  %xor = xor i32 %ctlz, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_multiuse3(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse3(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    call void @use32(i32 [[XOR]])
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  call void @use32(i32 %xor)
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i32
+  %ret = add i32 %xor, %zext
+  ret i32 %ret
+}
+
+define i5 @log2_ceil_idiom_trunc_multiuse4(i32 %x) {
+; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc_multiuse4(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5
+; CHECK-NEXT:    call void @use5(i5 [[TRUNC]])
+; CHECK-NEXT:    [[XOR:%.*]] = xor i5 [[TRUNC]], -1
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i5
+; CHECK-NEXT:    [[RET:%.*]] = add i5 [[XOR]], [[ZEXT]]
+; CHECK-NEXT:    ret i5 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %trunc = trunc i32 %ctlz to i5
+  call void @use5(i5 %trunc)
+  %xor = xor i5 %trunc, 31
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i5
+  %ret = add i5 %xor, %zext
+  ret i5 %ret
+}
+
+define i64 @log2_ceil_idiom_zext_multiuse5(i32 %x) {
+; CHECK-LABEL: define i64 @log2_ceil_idiom_zext_multiuse5(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg i32 [[XOR]] to i64
+; CHECK-NEXT:    call void @use64(i64 [[EXT]])
+; CHECK-NEXT:    [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i64
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i64 [[EXT]], [[ZEXT]]
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %xor = xor i32 %ctlz, 31
+  %ext = zext nneg i32 %xor to i64
+  call void @use64(i64 %ext)
+  %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ugt i32 %ctpop, 1
+  %zext = zext i1 %cmp to i64
+  %ret = add i64 %ext, %zext
+  ret i64 %ret
+}
+
+declare void @use5(i5)
+declare void @use32(i32)
+declare void @use64(i64)
+
 declare i32 @llvm.ctlz.i32(i32, i1)
 declare i32 @llvm.ctpop.i32(i32)
 ;.



More information about the llvm-commits mailing list