[llvm] [InstCombine] Fold Minimum over trailing or leading zeros (PR #173768)

Aryan Kadole via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 29 11:07:46 PST 2025


https://github.com/ak1932 updated https://github.com/llvm/llvm-project/pull/173768

>From e567baa60da736c3df09592b0a2b22e344fff0a1 Mon Sep 17 00:00:00 2001
From: ak1932 <aryankadole95 at gmail.com>
Date: Sun, 28 Dec 2025 17:29:24 +0530
Subject: [PATCH 1/2] [InstCombine] Add baseline test for minimum ctlz cttz

---
 .../Transforms/InstCombine/umin_cttz_ctlz.ll  | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index c8228057eeb10..3a960e16cfef7 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -380,3 +380,129 @@ define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
   %ret = add i16 %ctlz, %op0
   ret i16 %ret
 }
+
+define i32 @umin_cttz_i32_zero_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
+  %4 = call i32 @llvm.cttz.i32(i32 %1, i1 false)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @umin_cttz_i32_zero_def(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+  %4 = call i32 @llvm.cttz.i32(i32 %1, i1 true)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @umin_cttz_i32_zero_def_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+  %4 = call i32 @llvm.cttz.i32(i32 %1, i1 false)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @umin_ctlz_i32_zero_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
+  %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @umin_ctlz_i32_zero_def(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
+  %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 true)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @umin_ctlz_i32_zero_def_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
+  %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_def(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_def(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+  %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 true)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
+  %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_def_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+  %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+  ret i32 %5
+}

>From 9925ec20e84549f98bb27d4401b378998199bd87 Mon Sep 17 00:00:00 2001
From: ak1932 <aryankadole95 at gmail.com>
Date: Tue, 30 Dec 2025 00:37:03 +0530
Subject: [PATCH 2/2] [InstCombine] Fold Minimum over trailing or leading zeros

---
 .../InstCombine/InstCombineCalls.cpp          | 24 ++++++++++-----
 .../Transforms/InstCombine/umin_cttz_ctlz.ll  | 30 ++++++++-----------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8c64105b63fe4..bdb52e2c02c2d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1689,9 +1689,11 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
 }
 
 /// Fold an unsigned minimum of trailing or leading zero bits counts:
-///   umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
-///   umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
+///   umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
+///   umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
 ///                                              >> ConstOp))
+///   umin(cttz(CtOp1), cttz(CtOp2))        --> cttz(CtOp1 | CtOp2)
+///   umin(ctlz(CtOp1), ctlz(CtOp2))        --> ctlz(CtOp1 | CtOp2)
 template <Intrinsic::ID IntrID>
 static Value *
 foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
@@ -1700,12 +1702,18 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
   static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
                 "This helper only supports cttz and ctlz intrinsics");
 
-  Value *CtOp;
-  Value *ZeroUndef;
-  if (!match(I0,
-             m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
+  Value *CtOp1, *CtOp2;
+  Value *ZeroUndef1, *ZeroUndef2;
+  if (!match(I0, m_OneUse(
+                     m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
     return nullptr;
 
+  if (match(I1,
+            m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
+    return Builder.CreateBinaryIntrinsic(
+        IntrID, Builder.CreateOr(CtOp1, CtOp2),
+        Builder.CreateOr(ZeroUndef1, ZeroUndef2));
+
   unsigned BitWidth = I1->getType()->getScalarSizeInBits();
   auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
   if (!match(I1, m_CheckedInt(LessBitWidth)))
@@ -1721,8 +1729,8 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
           : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
       cast<Constant>(I1), DL);
   return Builder.CreateBinaryIntrinsic(
-      IntrID, Builder.CreateOr(CtOp, NewConst),
-      ConstantInt::getTrue(ZeroUndef->getType()));
+      IntrID, Builder.CreateOr(CtOp1, NewConst),
+      ConstantInt::getTrue(ZeroUndef1->getType()));
 }
 
 /// Return whether "X LOp (Y ROp Z)" is always equal to
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index 3a960e16cfef7..9e5dd6398d273 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -384,9 +384,8 @@ define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
 define i32 @umin_cttz_i32_zero_undef(i32 %0, i32 %1) {
 ; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undef(
 ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP3]], i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
@@ -398,9 +397,8 @@ define i32 @umin_cttz_i32_zero_undef(i32 %0, i32 %1) {
 define i32 @umin_cttz_i32_zero_def(i32 %0, i32 %1) {
 ; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def(
 ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP3]], i1 true)
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
@@ -412,9 +410,8 @@ define i32 @umin_cttz_i32_zero_def(i32 %0, i32 %1) {
 define i32 @umin_cttz_i32_zero_def_undef(i32 %0, i32 %1) {
 ; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def_undef(
 ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP3]], i1 true)
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
@@ -426,9 +423,8 @@ define i32 @umin_cttz_i32_zero_def_undef(i32 %0, i32 %1) {
 define i32 @umin_ctlz_i32_zero_undef(i32 %0, i32 %1) {
 ; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undef(
 ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP3]], i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
@@ -440,9 +436,8 @@ define i32 @umin_ctlz_i32_zero_undef(i32 %0, i32 %1) {
 define i32 @umin_ctlz_i32_zero_def(i32 %0, i32 %1) {
 ; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def(
 ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP3]], i1 true)
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
@@ -454,9 +449,8 @@ define i32 @umin_ctlz_i32_zero_def(i32 %0, i32 %1) {
 define i32 @umin_ctlz_i32_zero_def_undef(i32 %0, i32 %1) {
 ; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def_undef(
 ; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT:    [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT:    [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP3]], i1 true)
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
   %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)



More information about the llvm-commits mailing list