[llvm] [InstCombine] Fold Minimum over trailing or leading zeros (PR #173768)
Aryan Kadole via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 29 11:07:46 PST 2025
https://github.com/ak1932 updated https://github.com/llvm/llvm-project/pull/173768
>From e567baa60da736c3df09592b0a2b22e344fff0a1 Mon Sep 17 00:00:00 2001
From: ak1932 <aryankadole95 at gmail.com>
Date: Sun, 28 Dec 2025 17:29:24 +0530
Subject: [PATCH 1/2] [InstCombine] Add baseline test for minimum ctlz cttz
---
.../Transforms/InstCombine/umin_cttz_ctlz.ll | 126 ++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index c8228057eeb10..3a960e16cfef7 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -380,3 +380,129 @@ define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
%ret = add i16 %ctlz, %op0
ret i16 %ret
}
+
+define i32 @umin_cttz_i32_zero_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
+ %4 = call i32 @llvm.cttz.i32(i32 %1, i1 false)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @umin_cttz_i32_zero_def(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+ %4 = call i32 @llvm.cttz.i32(i32 %1, i1 true)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @umin_cttz_i32_zero_def_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+ %4 = call i32 @llvm.cttz.i32(i32 %1, i1 false)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @umin_ctlz_i32_zero_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
+ %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @umin_ctlz_i32_zero_def(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
+ %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 true)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @umin_ctlz_i32_zero_def_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
+ %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_def(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_def(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+ %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 true)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
+ %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_def_undef(i32 %0, i32 %1) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
+ %4 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ %5 = call i32 @llvm.umin.i32(i32 %3, i32 %4)
+ ret i32 %5
+}
>From 9925ec20e84549f98bb27d4401b378998199bd87 Mon Sep 17 00:00:00 2001
From: ak1932 <aryankadole95 at gmail.com>
Date: Tue, 30 Dec 2025 00:37:03 +0530
Subject: [PATCH 2/2] [InstCombine] Fold Minimum over trailing or leading zeros
---
.../InstCombine/InstCombineCalls.cpp | 24 ++++++++++-----
.../Transforms/InstCombine/umin_cttz_ctlz.ll | 30 ++++++++-----------
2 files changed, 28 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8c64105b63fe4..bdb52e2c02c2d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1689,9 +1689,11 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
}
/// Fold an unsigned minimum of trailing or leading zero bits counts:
-/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
-/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
+/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
+/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
/// >> ConstOp))
+/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
+/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
template <Intrinsic::ID IntrID>
static Value *
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
@@ -1700,12 +1702,18 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
"This helper only supports cttz and ctlz intrinsics");
- Value *CtOp;
- Value *ZeroUndef;
- if (!match(I0,
- m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
+ Value *CtOp1, *CtOp2;
+ Value *ZeroUndef1, *ZeroUndef2;
+ if (!match(I0, m_OneUse(
+ m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
return nullptr;
+ if (match(I1,
+ m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
+ return Builder.CreateBinaryIntrinsic(
+ IntrID, Builder.CreateOr(CtOp1, CtOp2),
+ Builder.CreateOr(ZeroUndef1, ZeroUndef2));
+
unsigned BitWidth = I1->getType()->getScalarSizeInBits();
auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
if (!match(I1, m_CheckedInt(LessBitWidth)))
@@ -1721,8 +1729,8 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
cast<Constant>(I1), DL);
return Builder.CreateBinaryIntrinsic(
- IntrID, Builder.CreateOr(CtOp, NewConst),
- ConstantInt::getTrue(ZeroUndef->getType()));
+ IntrID, Builder.CreateOr(CtOp1, NewConst),
+ ConstantInt::getTrue(ZeroUndef1->getType()));
}
/// Return whether "X LOp (Y ROp Z)" is always equal to
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index 3a960e16cfef7..9e5dd6398d273 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -384,9 +384,8 @@ define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
define i32 @umin_cttz_i32_zero_undef(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undef(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP3]], i1 false)
; CHECK-NEXT: ret i32 [[TMP4]]
;
%3 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
@@ -398,9 +397,8 @@ define i32 @umin_cttz_i32_zero_undef(i32 %0, i32 %1) {
define i32 @umin_cttz_i32_zero_def(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP3]], i1 true)
; CHECK-NEXT: ret i32 [[TMP4]]
;
%3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
@@ -412,9 +410,8 @@ define i32 @umin_cttz_i32_zero_def(i32 %0, i32 %1) {
define i32 @umin_cttz_i32_zero_def_undef(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def_undef(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP3]], i1 true)
; CHECK-NEXT: ret i32 [[TMP4]]
;
%3 = call i32 @llvm.cttz.i32(i32 %0, i1 true)
@@ -426,9 +423,8 @@ define i32 @umin_cttz_i32_zero_def_undef(i32 %0, i32 %1) {
define i32 @umin_ctlz_i32_zero_undef(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undef(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP3]], i1 false)
; CHECK-NEXT: ret i32 [[TMP4]]
;
%3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
@@ -440,9 +436,8 @@ define i32 @umin_ctlz_i32_zero_undef(i32 %0, i32 %1) {
define i32 @umin_ctlz_i32_zero_def(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP3]], i1 true)
; CHECK-NEXT: ret i32 [[TMP4]]
;
%3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
@@ -454,9 +449,8 @@ define i32 @umin_ctlz_i32_zero_def(i32 %0, i32 %1) {
define i32 @umin_ctlz_i32_zero_def_undef(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def_undef(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP3]], i1 true)
; CHECK-NEXT: ret i32 [[TMP4]]
;
%3 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
More information about the llvm-commits
mailing list