[llvm] [InstCombine] Handle ceil division idiom (PR #100977)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 28 23:27:57 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Antonio Frighetto (antoniofrighetto)

<details>
<summary>Changes</summary>

The expression `add (udiv (sub A, Bias), B), Bias` can be folded to `udiv (add A, B - 1), B)` when the sum between `A` and `B` is known not to overflow, and `Bias = A != 0`.

Fixes: https://github.com/llvm/llvm-project/issues/95652.

Proof: https://alive2.llvm.org/ce/z/hiWHQA.

---
Full diff: https://github.com/llvm/llvm-project/pull/100977.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+71-24) 
- (added) llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll (+253) 


``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3bd086230cbec..aded338982fcf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1250,6 +1250,75 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
   return nullptr;
 }
 
+static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) {
+  assert(I.getOpcode() == Instruction::Add && "Expecting add instruction.");
+  Value *A, *B;
+  ICmpInst::Predicate Pred;
+  auto &ICB = IC.Builder;
+
+  // Fold the log2 ceil idiom:
+  // zext (ctpop(A) >u/!= 1) + (ctlz (A, true) ^ (BW - 1))
+  //      -> BW - ctlz (A - 1, false)
+  const APInt *XorC;
+  if (match(&I,
+            m_c_Add(
+                m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+                              m_One())),
+                m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
+                    m_OneUse(m_TruncOrSelf(m_OneUse(
+                        m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
+                    m_APInt(XorC))))))) &&
+      (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+      *XorC == A->getType()->getScalarSizeInBits() - 1) {
+    Value *Sub = ICB.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+    Value *Ctlz = ICB.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+                                      {Sub, ICB.getFalse()});
+    Value *Ret = ICB.CreateSub(
+        ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+        Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+    return ICB.CreateZExtOrTrunc(Ret, I.getType());
+  }
+
+  // Fold the ceil division idiom:
+  // add (udiv (sub A, Bias), B), Bias
+  //      -> udiv (add A, B - 1), B)
+  // with Bias = A != 0; A + B not to overflow
+  auto MatchDivision = [&IC](Instruction *Div, Value *&DivOp0, Value *&DivOp1) {
+    if (match(Div, m_UDiv(m_Value(DivOp0), m_Value(DivOp1))))
+      return true;
+
+    Value *N;
+    const APInt *C;
+    if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) &&
+        match(N,
+              m_OneUse(m_Sub(m_APInt(C), m_Intrinsic<Intrinsic::ctlz>(
+                                             m_Specific(DivOp1), m_Zero())))) &&
+        (*C == Div->getType()->getScalarSizeInBits() - 1) &&
+        IC.isKnownToBeAPowerOfTwo(DivOp1, true, 0, Div))
+      return true;
+
+    return false;
+  };
+
+  Instruction *Div;
+  Value *Bias, *Sub;
+  if (match(&I, m_c_Add(m_Instruction(Div), m_Value(Bias))) &&
+      MatchDivision(Div, Sub, B) &&
+      match(Sub, m_Sub(m_Value(A), m_Value(Bias))) &&
+      match(Bias, m_ZExt(m_ICmp(Pred, m_Specific(A), m_ZeroInt()))) &&
+      Pred == ICmpInst::ICMP_NE && Bias->hasNUses(2)) {
+    WithCache<const Value *> LHSCache(A), RHSCache(B);
+    auto OR = IC.computeOverflowForUnsignedAdd(LHSCache, RHSCache, &I);
+    if (OR == OverflowResult::NeverOverflows) {
+      auto *BMinusOne =
+          ICB.CreateAdd(B, Constant::getAllOnesValue(I.getType()));
+      return ICB.CreateUDiv(ICB.CreateAdd(A, BMinusOne), B);
+    }
+  }
+
+  return nullptr;
+}
+
 // Transform:
 //  (add A, (shl (neg B), Y))
 //      -> (sub A, (shl B, Y))
@@ -1785,30 +1854,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
         I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
                                    {Builder.CreateOr(A, B)}));
 
-  // Fold the log2_ceil idiom:
-  // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
-  // -->
-  // BW - ctlz(A - 1, false)
-  const APInt *XorC;
-  ICmpInst::Predicate Pred;
-  if (match(&I,
-            m_c_Add(
-                m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
-                              m_One())),
-                m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
-                    m_OneUse(m_TruncOrSelf(m_OneUse(
-                        m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
-                    m_APInt(XorC))))))) &&
-      (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
-      *XorC == A->getType()->getScalarSizeInBits() - 1) {
-    Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
-    Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
-                                          {Sub, Builder.getFalse()});
-    Value *Ret = Builder.CreateSub(
-        ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
-        Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
-    return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
-  }
+  if (Value *V = foldCeilIdioms(I, *this))
+    return replaceInstUsesWith(I, V);
 
   if (Instruction *Res = foldSquareSumInt(I))
     return Res;
diff --git a/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll
new file mode 100644
index 0000000000000..b9cc0fa6ce050
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i8 @ceil_div_idiom(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT:    [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+  %ov = extractvalue {i8, i1} %wo, 1
+  %ov.not = xor i1 %ov, true
+  call void @llvm.assume(i1 %ov.not)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %div = udiv i8 %sub, %y
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_2(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_2(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[OV_NOT:%.*]] = add nuw i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[OV_NOT]] to i1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TRUNC]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %ov.not = add nuw i8 %x, %y
+  %trunc = trunc i8 %ov.not to i1
+  call void @llvm.assume(i1 %trunc)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %div = udiv i8 %sub, %y
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT:    [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT:    [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]])
+; CHECK-NEXT:    [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_POW_2]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT:    [[N:%.*]] = xor i8 [[CTLZ]], 7
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+  %ov = extractvalue {i8, i1} %wo, 1
+  %ov.not = xor i1 %ov, true
+  call void @llvm.assume(i1 %ov.not)
+
+  %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y)
+  %is_pow_2 = icmp eq i8 %ctpopulation, 1
+  call void @llvm.assume(i1 %is_pow_2)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+  %n = sub i8 7, %ctlz
+  %div = lshr i8 %sub, %n
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_add_may_overflow(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_add_may_overflow(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %div = udiv i8 %sub, %y
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_multiuse_bias(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_multiuse_bias(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT:    [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    call void @use(i8 [[BIAS]])
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+  %ov = extractvalue {i8, i1} %wo, 1
+  %ov.not = xor i1 %ov, true
+  call void @llvm.assume(i1 %ov.not)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %div = udiv i8 %sub, %y
+  %add = add i8 %div, %bias
+  call void @use(i8 %bias)
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr_not_power_2(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_not_power_2(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT:    [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT:    [[N:%.*]] = xor i8 [[CTLZ]], 7
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+  %ov = extractvalue {i8, i1} %wo, 1
+  %ov.not = xor i1 %ov, true
+  call void @llvm.assume(i1 %ov.not)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+  %n = sub i8 7, %ctlz
+  %div = lshr i8 %sub, %n
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr_wrong_bw(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_wrong_bw(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT:    [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT:    [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]])
+; CHECK-NEXT:    [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_POW_2]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT:    [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]]
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+  %ov = extractvalue {i8, i1} %wo, 1
+  %ov.not = xor i1 %ov, true
+  call void @llvm.assume(i1 %ov.not)
+
+  %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y)
+  %is_pow_2 = icmp eq i8 %ctpopulation, 1
+  call void @llvm.assume(i1 %is_pow_2)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+  %n = sub i8 8, %ctlz
+  %div = lshr i8 %sub, %n
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+define i8 @ceil_div_idiom_with_lshr_multiuse_n(i8 %x, i8 %y) {
+; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_multiuse_n(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
+; CHECK-NEXT:    [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1
+; CHECK-NEXT:    [[OV_NOT:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[OV_NOT]])
+; CHECK-NEXT:    [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]])
+; CHECK-NEXT:    [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_POW_2]])
+; CHECK-NEXT:    [[NONZERO:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X]], [[BIAS]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true)
+; CHECK-NEXT:    [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]]
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i8 [[SUB]], [[N]]
+; CHECK-NEXT:    call void @use(i8 [[N]])
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y)
+  %ov = extractvalue {i8, i1} %wo, 1
+  %ov.not = xor i1 %ov, true
+  call void @llvm.assume(i1 %ov.not)
+
+  %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y)
+  %is_pow_2 = icmp eq i8 %ctpopulation, 1
+  call void @llvm.assume(i1 %is_pow_2)
+
+  %nonzero = icmp ne i8 %x, 0
+  %bias = zext i1 %nonzero to i8
+  %sub = sub i8 %x, %bias
+  %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true)
+  %n = sub i8 8, %ctlz
+  %div = lshr i8 %sub, %n
+  call void @use(i8 %n)
+  %add = add i8 %div, %bias
+  ret i8 %add
+}
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8)
+declare i8 @llvm.ctpop.i8(i8)
+declare void @llvm.assume(i1)
+declare void @use(i8)

``````````

</details>


https://github.com/llvm/llvm-project/pull/100977


More information about the llvm-commits mailing list