[llvm] [InstCombine] Handle ceil division idiom (PR #100977)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 28 23:27:57 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Antonio Frighetto (antoniofrighetto)
<details>
<summary>Changes</summary>
The expression `add (udiv (sub A, Bias), B), Bias` can be folded to `udiv (add A, B - 1), B)` when the sum between `A` and `B` is known not to overflow, and `Bias = A != 0`.
Fixes: https://github.com/llvm/llvm-project/issues/95652.
Proof: https://alive2.llvm.org/ce/z/hiWHQA.
---
Full diff: https://github.com/llvm/llvm-project/pull/100977.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+71-24)
- (added) llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll (+253)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3bd086230cbec..aded338982fcf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1250,6 +1250,75 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
return nullptr;
}
+static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) {
+ assert(I.getOpcode() == Instruction::Add && "Expecting add instruction.");
+ Value *A, *B;
+ ICmpInst::Predicate Pred;
+ auto &ICB = IC.Builder;
+
+ // Fold the log2 ceil idiom:
+ // zext (ctpop(A) >u/!= 1) + (ctlz (A, true) ^ (BW - 1))
+ // -> BW - ctlz (A - 1, false)
+ const APInt *XorC;
+ if (match(&I,
+ m_c_Add(
+ m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+ m_One())),
+ m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
+ m_OneUse(m_TruncOrSelf(m_OneUse(
+ m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
+ m_APInt(XorC))))))) &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+ *XorC == A->getType()->getScalarSizeInBits() - 1) {
+ Value *Sub = ICB.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+ Value *Ctlz = ICB.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+ {Sub, ICB.getFalse()});
+ Value *Ret = ICB.CreateSub(
+ ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+ Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+ return ICB.CreateZExtOrTrunc(Ret, I.getType());
+ }
+
+ // Fold the ceil division idiom:
+ // add (udiv (sub A, Bias), B), Bias
+ // -> udiv (add A, B - 1), B)
+ // with Bias = A != 0; A + B not to overflow
+ auto MatchDivision = [&IC](Instruction *Div, Value *&DivOp0, Value *&DivOp1) {
+ if (match(Div, m_UDiv(m_Value(DivOp0), m_Value(DivOp1))))
+ return true;
+
+ Value *N;
+ const APInt *C;
+ if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) &&
+ match(N,
+ m_OneUse(m_Sub(m_APInt(C), m_Intrinsic<Intrinsic::ctlz>(
+ m_Specific(DivOp1), m_Zero())))) &&
+ (*C == Div->getType()->getScalarSizeInBits() - 1) &&
+ IC.isKnownToBeAPowerOfTwo(DivOp1, true, 0, Div))
+ return true;
+
+ return false;
+ };
+
+ Instruction *Div;
+ Value *Bias, *Sub;
+ if (match(&I, m_c_Add(m_Instruction(Div), m_Value(Bias))) &&
+ MatchDivision(Div, Sub, B) &&
+ match(Sub, m_Sub(m_Value(A), m_Value(Bias))) &&
+ match(Bias, m_ZExt(m_ICmp(Pred, m_Specific(A), m_ZeroInt()))) &&
+ Pred == ICmpInst::ICMP_NE && Bias->hasNUses(2)) {
+ WithCache<const Value *> LHSCache(A), RHSCache(B);
+ auto OR = IC.computeOverflowForUnsignedAdd(LHSCache, RHSCache, &I);
+ if (OR == OverflowResult::NeverOverflows) {
+ auto *BMinusOne =
+ ICB.CreateAdd(B, Constant::getAllOnesValue(I.getType()));
+ return ICB.CreateUDiv(ICB.CreateAdd(A, BMinusOne), B);
+ }
+ }
+
+ return nullptr;
+}
+
// Transform:
// (add A, (shl (neg B), Y))
// -> (sub A, (shl B, Y))
@@ -1785,30 +1854,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateOr(A, B)}));
- // Fold the log2_ceil idiom:
- // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
- // -->
- // BW - ctlz(A - 1, false)
- const APInt *XorC;
- ICmpInst::Predicate Pred;
- if (match(&I,
- m_c_Add(
- m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
- m_One())),
- m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
- m_OneUse(m_TruncOrSelf(m_OneUse(
- m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
- m_APInt(XorC))))))) &&
- (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
- *XorC == A->getType()->getScalarSizeInBits() - 1) {
- Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
- Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
- {Sub, Builder.getFalse()});
- Value *Ret = Builder.CreateSub(
- ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
- Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
- return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
- }
+ if (Value *V = foldCeilIdioms(I, *this))
+ return replaceInstUsesWith(I, V);
if (Instruction *Res = foldSquareSumInt(I))
return Res;
diff --git a/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll
new file mode 100644
index 0000000000000..b9cc0fa6ce050
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i8 @ceil_div_idiom(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+ %ov = extractvalue {i8, i1} %wo, 1
+ %ov.not = xor i1 %ov, true
+ call void @llvm.assume(i1 %ov.not)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %div = udiv i8 %sub, %y
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_2(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_2(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[OV_NOT:%.*]] = add nuw i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[OV_NOT]] to i1
+; CHECK-NEXT: call void @llvm.assume(i1 [[TRUNC]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %ov.not = add nuw i8 %x, %y
+ %trunc = trunc i8 %ov.not to i1
+ call void @llvm.assume(i1 %trunc)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %div = udiv i8 %sub, %y
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]])
+; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT: [[N:%.*]] = xor i8 [[CTLZ]], 7
+; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+ %ov = extractvalue {i8, i1} %wo, 1
+ %ov.not = xor i1 %ov, true
+ call void @llvm.assume(i1 %ov.not)
+
+ %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y)
+ %is_pow_2 = icmp eq i8 %ctpopulation, 1
+ call void @llvm.assume(i1 %is_pow_2)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+ %n = sub i8 7, %ctlz
+ %div = lshr i8 %sub, %n
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_add_may_overflow(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_add_may_overflow(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %div = udiv i8 %sub, %y
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_multiuse_bias(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_multiuse_bias(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: call void @use(i8 [[BIAS]])
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+ %ov = extractvalue {i8, i1} %wo, 1
+ %ov.not = xor i1 %ov, true
+ call void @llvm.assume(i1 %ov.not)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %div = udiv i8 %sub, %y
+ %add = add i8 %div, %bias
+ call void @use(i8 %bias)
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr_not_power_2(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_not_power_2(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT: [[N:%.*]] = xor i8 [[CTLZ]], 7
+; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+ %ov = extractvalue {i8, i1} %wo, 1
+ %ov.not = xor i1 %ov, true
+ call void @llvm.assume(i1 %ov.not)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+ %n = sub i8 7, %ctlz
+ %div = lshr i8 %sub, %n
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr_wrong_bw(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_wrong_bw(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]])
+; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT: [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]]
+; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+ %ov = extractvalue {i8, i1} %wo, 1
+ %ov.not = xor i1 %ov, true
+ call void @llvm.assume(i1 %ov.not)
+
+ %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y)
+ %is_pow_2 = icmp eq i8 %ctpopulation, 1
+ call void @llvm.assume(i1 %is_pow_2)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+ %n = sub i8 8, %ctlz
+ %div = lshr i8 %sub, %n
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr_multiuse_n(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_multiuse_n(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]])
+; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]])
+; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT: [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]]
+; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT: call void @use(i8 [[N]])
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+ %ov = extractvalue {i8, i1} %wo, 1
+ %ov.not = xor i1 %ov, true
+ call void @llvm.assume(i1 %ov.not)
+
+ %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y)
+ %is_pow_2 = icmp eq i8 %ctpopulation, 1
+ call void @llvm.assume(i1 %is_pow_2)
+
+ %nonzero = icmp ne i8 %x, 0
+ %bias = zext i1 %nonzero to i8
+ %sub = sub i8 %x, %bias
+ %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+ %n = sub i8 8, %ctlz
+ %div = lshr i8 %sub, %n
+ call void @use(i8 %n)
+ %add = add i8 %div, %bias
+ ret i8 %add
+}
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8)
+declare i8 @llvm.ctpop.i8(i8)
+declare void @llvm.assume(i1)
+declare void @use(i8)
``````````
</details>
https://github.com/llvm/llvm-project/pull/100977
More information about the llvm-commits
mailing list