[llvm] 362b653 - [InstCombine] Fold Minimum over trailing or leading zeros (#173768)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 04:56:57 PST 2026
Author: Aryan Kadole
Date: 2026-01-11T20:56:52+08:00
New Revision: 362b653c69be12a4d444acf26c14165e1bb52537
URL: https://github.com/llvm/llvm-project/commit/362b653c69be12a4d444acf26c14165e1bb52537
DIFF: https://github.com/llvm/llvm-project/commit/362b653c69be12a4d444acf26c14165e1bb52537.diff
LOG: [InstCombine] Fold Minimum over trailing or leading zeros (#173768)
Add support for
`umin(clz(x), clz(y)) => clz(x | y)`
`umin(ctz(x), ctz(y)) => ctz(x | y)`
[C++ source](https://godbolt.org/z/E8abbjT7G)
[alive proof](https://alive2.llvm.org/ce/z/mh94_n)
Fixes #173691
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 41a89aa214b30..d7d38bf716aa1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1689,9 +1689,11 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
}
/// Fold an unsigned minimum of trailing or leading zero bits counts:
-/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
-/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
+/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
+/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
/// >> ConstOp))
+/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
+/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
template <Intrinsic::ID IntrID>
static Value *
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
@@ -1700,12 +1702,18 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
"This helper only supports cttz and ctlz intrinsics");
- Value *CtOp;
- Value *ZeroUndef;
- if (!match(I0,
- m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
+ Value *CtOp1, *CtOp2;
+ Value *ZeroUndef1, *ZeroUndef2;
+ if (!match(I0, m_OneUse(
+ m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
return nullptr;
+ if (match(I1,
+ m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
+ return Builder.CreateBinaryIntrinsic(
+ IntrID, Builder.CreateOr(CtOp1, CtOp2),
+ Builder.CreateOr(ZeroUndef1, ZeroUndef2));
+
unsigned BitWidth = I1->getType()->getScalarSizeInBits();
auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
if (!match(I1, m_CheckedInt(LessBitWidth)))
@@ -1721,8 +1729,8 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
cast<Constant>(I1), DL);
return Builder.CreateBinaryIntrinsic(
- IntrID, Builder.CreateOr(CtOp, NewConst),
- ConstantInt::getTrue(ZeroUndef->getType()));
+ IntrID, Builder.CreateOr(CtOp1, NewConst),
+ ConstantInt::getTrue(ZeroUndef1->getType()));
}
/// Return whether "X LOp (Y ROp Z)" is always equal to
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index c8228057eeb10..a3da6ae25ae4f 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -380,3 +380,155 @@ define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
%ret = add i16 %ctlz, %op0
ret i16 %ret
}
+
+define i32 @umin_cttz_i32_zero_undef(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 false)
+ %cttz_Y = call i32 @llvm.cttz.i32(i32 %Y, i1 false)
+ %res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %cttz_Y)
+ ret i32 %res
+}
+
+define i32 @umin_cttz_i32_zero_def(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+ %cttz_Y = call i32 @llvm.cttz.i32(i32 %Y, i1 true)
+ %res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %cttz_Y)
+ ret i32 %res
+}
+
+define i32 @umin_cttz_i32_zero_def_undef(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+ %cttz_Y = call i32 @llvm.cttz.i32(i32 %Y, i1 false)
+ %res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %cttz_Y)
+ ret i32 %res
+}
+
+define i32 @umin_ctlz_i32_zero_undef(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %ctlz_X = call i32 @llvm.ctlz.i32(i32 %X, i1 false)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 false)
+ %res = call i32 @llvm.umin.i32(i32 %ctlz_X, i32 %ctlz_Y)
+ ret i32 %res
+}
+
+define i32 @umin_ctlz_i32_zero_def(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %ctlz_X = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 true)
+ %res = call i32 @llvm.umin.i32(i32 %ctlz_X, i32 %ctlz_Y)
+ ret i32 %res
+}
+
+define i32 @umin_ctlz_i32_zero_def_undef(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %ctlz_X = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 false)
+ %res = call i32 @llvm.umin.i32(i32 %ctlz_X, i32 %ctlz_Y)
+ ret i32 %res
+}
+
+define i32 @mul_umin_cttz_i32(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @mul_umin_cttz_i32(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTTZ_X:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[CTTZ_Y:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[Y]], i1 false)
+; CHECK-NEXT: [[UMIN_RES:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ_X]], i32 [[CTTZ_Y]])
+; CHECK-NEXT: [[RES:%.*]] = add nuw nsw i32 [[UMIN_RES]], [[CTTZ_X]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+ %cttz_Y = call i32 @llvm.cttz.i32(i32 %Y, i1 false)
+ %umin_res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %cttz_Y)
+ %res = add i32 %umin_res, %cttz_X
+ ret i32 %res
+}
+
+define i32 @mul_umin_ctlz_i32(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @mul_umin_ctlz_i32(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTLZ_X:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[CTLZ_Y:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[Y]], i1 false)
+; CHECK-NEXT: [[UMIN_RES:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ_X]], i32 [[CTLZ_Y]])
+; CHECK-NEXT: [[RES:%.*]] = add nuw nsw i32 [[UMIN_RES]], [[CTLZ_X]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %ctlz_X = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 false)
+ %umin_res = call i32 @llvm.umin.i32(i32 %ctlz_X, i32 %ctlz_Y)
+ %res = add i32 %umin_res, %ctlz_X
+ ret i32 %res
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_def(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_def(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTTZ_X:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[CTLZ_Y:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[Y]], i1 true)
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ_X]], i32 [[CTLZ_Y]])
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 true)
+ %res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %ctlz_Y)
+ ret i32 %res
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_undef(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_undef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTTZ_X:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 false)
+; CHECK-NEXT: [[CTLZ_Y:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[Y]], i1 false)
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ_X]], i32 [[CTLZ_Y]])
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 false)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 false)
+ %res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %ctlz_Y)
+ ret i32 %res
+}
+
+define i32 @neg_umin_cttz_ctlz_i32_zero_def_undef(i32 %X, i32 %Y) {
+; CHECK-LABEL: define i32 @neg_umin_cttz_ctlz_i32_zero_def_undef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTTZ_X:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[CTLZ_Y:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[Y]], i1 false)
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ_X]], i32 [[CTLZ_Y]])
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cttz_X = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+ %ctlz_Y = call i32 @llvm.ctlz.i32(i32 %Y, i1 false)
+ %res = call i32 @llvm.umin.i32(i32 %cttz_X, i32 %ctlz_Y)
+ ret i32 %res
+}
More information about the llvm-commits
mailing list