[llvm] [InstCombine] Add CTLZ -> CTTZ simplification (PR #164733)

Mihail Mihov via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 22 16:56:01 PDT 2025


https://github.com/MihailMihov created https://github.com/llvm/llvm-project/pull/164733

This PR adds the simplification `ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)` ([Alive2](https://alive2.llvm.org/ce/z/vVDRCu)).

For the implementation I initially referenced [D76568](https://reviews.llvm.org/D76568), which added `ctpop(~x & (x - 1)) -> cttz(x, false)` and `ctpop(x | -x) -> bitwidth - cttz(x, false)`. I also saw that later fdcb27105537f77c78c4473d4f7c47146ddbab69 made both of the previous only apply when the and/or is one use and later a1fe6beb1ec3903e9c9351ae58c6747545189a58 made that only apply to the case that emits more than one instruction. From this and reading the contributor guide I concluded that the current simplification should also check for one use. While writing the tests however I noticed that checking one use on `Op0` (%and) only ignored the case where %and is used again, but I don't see why %not and %dec shouldn't be checked too, so I've checked for the one use of those too. Is this incorrect?

For the tests I couldn't find a good place to add them, so I made a new `llvm/test/Transforms/InstCombine/ctlz-cttz.ll`, but if they have a good place already I'll move them there. Also, while attempting to follow the contributor guide, I added a few different tests, but I'm not sure if they're too little/too much and I don't know if the `sub` in the simplificaton requires any flag tests?

issue #164436 

>From 4e481e15eb5c6458d37ef8c736bc9f539a135dad Mon Sep 17 00:00:00 2001
From: Mihail Mihov <mihovmihailp at gmail.com>
Date: Thu, 23 Oct 2025 01:26:25 +0200
Subject: [PATCH 1/2] [InstCombine] Add failing test for CTLZ -> CTTZ
 simplification

---
 llvm/test/Transforms/InstCombine/ctlz-cttz.ll | 107 ++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/ctlz-cttz.ll

diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll
new file mode 100644
index 0000000000000..5357b793ee6ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+; ctpop(~i & (i - 1)) -> bitwidth - cttz(i, false)
+define i8 @ctlz_to_sub_bw_cttz(i8 %a0) {
+; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz(
+; CHECK-SAME: i8 [[A0:%.*]]) {
+; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
+; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    ret i8 [[CLZ]]
+;
+  %dec = add i8 %a0, -1
+  %not = xor i8 %a0, -1
+  %and = and i8 %dec, %not
+  %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false)
+  ret i8 %clz
+}
+
+declare void @use(i8)
+
+define i8 @ctlz_to_sub_bw_cttz_multi_use_dec(i8 %a0) {
+; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_dec(
+; CHECK-SAME: i8 [[A0:%.*]]) {
+; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
+; CHECK-NEXT:    call void @use(i8 [[DEC]])
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
+; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    ret i8 [[CLZ]]
+;
+  %dec = add i8 %a0, -1
+  call void @use(i8 %dec)
+  %not = xor i8 %a0, -1
+  %and = and i8 %dec, %not
+  %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false)
+  ret i8 %clz
+}
+
+define i8 @ctlz_to_sub_bw_cttz_multi_use_not(i8 %a0) {
+; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_not(
+; CHECK-SAME: i8 [[A0:%.*]]) {
+; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
+; CHECK-NEXT:    call void @use(i8 [[NOT]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
+; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    ret i8 [[CLZ]]
+;
+  %dec = add i8 %a0, -1
+  %not = xor i8 %a0, -1
+  call void @use(i8 %not)
+  %and = and i8 %dec, %not
+  %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false)
+  ret i8 %clz
+}
+
+define i8 @ctlz_to_sub_bw_cttz_multi_use_and(i8 %a0) {
+; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_and(
+; CHECK-SAME: i8 [[A0:%.*]]) {
+; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    ret i8 [[CLZ]]
+;
+  %dec = add i8 %a0, -1
+  %not = xor i8 %a0, -1
+  %and = and i8 %dec, %not
+  call void @use(i8 %and)
+  %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false)
+  ret i8 %clz
+}
+
+define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) {
+; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_commute_and(
+; CHECK-SAME: i8 [[A0:%.*]]) {
+; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
+; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    ret i8 [[CLZ]]
+;
+  %dec = add i8 %a0, -1
+  %not = xor i8 %a0, -1
+  %and = and i8 %not, %dec
+  %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false)
+  ret i8 %clz
+}
+
+define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(<2 x i8> %a0) {
+; CHECK-LABEL: define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(
+; CHECK-SAME: <2 x i8> [[A0:%.*]]) {
+; CHECK-NEXT:    [[DEC:%.*]] = add <2 x i8> [[A0]], splat (i8 -1)
+; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i8> [[A0]], splat (i8 -1)
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[DEC]], [[NOT]]
+; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[AND]], i1 false)
+; CHECK-NEXT:    ret <2 x i8> [[CLZ]]
+;
+  %dec = add <2 x i8> %a0, <i8 -1, i8 -1>
+  %not = xor <2 x i8> %a0, <i8 -1, i8 -1>
+  %and = and <2 x i8> %dec, %not
+  %clz = tail call <2 x i8>@llvm.ctlz.v2i8(<2 x i8> %and, i1 false)
+  ret <2 x i8> %clz
+}

>From 2d8559db7b4f0ef98d71bc9381afa3dd2781e66b Mon Sep 17 00:00:00 2001
From: Mihail Mihov <mihovmihailp at gmail.com>
Date: Thu, 23 Oct 2025 01:28:20 +0200
Subject: [PATCH 2/2] [InstCombine] Add CTLZ -> CTTZ simplification

---
 .../InstCombine/InstCombineCalls.cpp           | 12 ++++++++++++
 llvm/test/Transforms/InstCombine/ctlz-cttz.ll  | 18 ++++++------------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e1e24a99d0474..2855263ab09c5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -585,6 +585,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
           IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
       return BinaryOperator::CreateSub(ConstCtlz, X);
     }
+
+    // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
+    if (Op0->hasOneUse() &&
+        match(Op0, m_c_And(m_OneUse(m_Not(m_Value(X))),
+                           m_OneUse(m_Add(m_Deferred(X), m_AllOnes()))))) {
+      Type *Ty = II.getType();
+      unsigned BitWidth = Ty->getScalarSizeInBits();
+      auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
+                                              {X, IC.Builder.getFalse()});
+      auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
+      return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
+    }
   }
 
   // cttz(Pow2) -> Log2(Pow2)
diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll
index 5357b793ee6ea..2a97c1efd9f16 100644
--- a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll
@@ -5,10 +5,8 @@
 define i8 @ctlz_to_sub_bw_cttz(i8 %a0) {
 ; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz(
 ; CHECK-SAME: i8 [[A0:%.*]]) {
-; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
-; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
-; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false)
+; CHECK-NEXT:    [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[CLZ]]
 ;
   %dec = add i8 %a0, -1
@@ -77,10 +75,8 @@ define i8 @ctlz_to_sub_bw_cttz_multi_use_and(i8 %a0) {
 define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) {
 ; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_commute_and(
 ; CHECK-SAME: i8 [[A0:%.*]]) {
-; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[A0]], -1
-; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A0]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[DEC]], [[NOT]]
-; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false)
+; CHECK-NEXT:    [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[CLZ]]
 ;
   %dec = add i8 %a0, -1
@@ -93,10 +89,8 @@ define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) {
 define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(<2 x i8> %a0) {
 ; CHECK-LABEL: define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(
 ; CHECK-SAME: <2 x i8> [[A0:%.*]]) {
-; CHECK-NEXT:    [[DEC:%.*]] = add <2 x i8> [[A0]], splat (i8 -1)
-; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i8> [[A0]], splat (i8 -1)
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[DEC]], [[NOT]]
-; CHECK-NEXT:    [[CLZ:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[AND]], i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[A0]], i1 false)
+; CHECK-NEXT:    [[CLZ:%.*]] = sub nuw nsw <2 x i8> splat (i8 8), [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[CLZ]]
 ;
   %dec = add <2 x i8> %a0, <i8 -1, i8 -1>



More information about the llvm-commits mailing list