[llvm] [InstCombine] Handle ceil division idiom (PR #100977)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 06:59:56 PDT 2024
================
@@ -1250,6 +1250,74 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
return nullptr;
}
+static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) {
+ assert(I.getOpcode() == Instruction::Add && "Expecting add instruction.");
+ Value *A, *B;
+ ICmpInst::Predicate Pred;
+ auto &ICB = IC.Builder;
+
+ // Fold the log2 ceil idiom:
+ // zext (ctpop(A) >u/!= 1) + (ctlz (A, true) ^ (BW - 1))
+ // -> BW - ctlz (A - 1, false)
+ const APInt *XorC;
+ if (match(&I,
+ m_c_Add(
+ m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+ m_One())),
+ m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
+ m_OneUse(m_TruncOrSelf(m_OneUse(
+ m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
+ m_APInt(XorC))))))) &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+ *XorC == A->getType()->getScalarSizeInBits() - 1) {
+ Value *Sub = ICB.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+ Value *Ctlz = ICB.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+ {Sub, ICB.getFalse()});
+ Value *Ret = ICB.CreateSub(
+ ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+ Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+ return ICB.CreateZExtOrTrunc(Ret, I.getType());
+ }
+
+ // Fold the ceil division idiom:
+ // add (udiv (sub A, Bias), B), Bias
+ // -> udiv (add A, B - 1), B)
+ // with Bias = A != 0; A + B not to overflow
+ auto MatchDivision = [&IC](Instruction *Div, Value *&DivOp0, Value *&DivOp1) {
+ if (match(Div, m_UDiv(m_Value(DivOp0), m_Value(DivOp1))))
+ return true;
+
+ Value *N;
+ const APInt *C;
+ if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) &&
+ match(N, m_OneUse(m_Sub(m_APInt(C), m_Intrinsic<Intrinsic::ctlz>(
----------------
antoniofrighetto wrote:
I just thought it could make sense to restrain the shifted amount as one-use only, although now I realize that `N` could be used for other log2 calculations, so maybe better to drop it.
https://github.com/llvm/llvm-project/pull/100977
More information about the llvm-commits
mailing list