[llvm] InstCombine: fold umax/umin(shl(umax/umin(x,C1),s),C3) -> umax/umin(shl(x,s),C3) when safe (#139786) (PR #169943)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 28 09:36:57 PST 2025
https://github.com/cs25mtech12013-commits created https://github.com/llvm/llvm-project/pull/169943
Summary
-------
This patch adds an InstCombine transformation to fold patterns of the form:
umax(shl(umax(x, C1), s), C3) -> umax(shl(x, s), C3)
umin(shl(umin(x, C1), s), C3) -> umin(shl(x, s), C3)
when provably safe (C3 >= (C1 << s)). The transform requires the LHS of the `shl`
to be the inner call (CallInst) to avoid re-matching.
Motivation
----------
Removes redundant nested clamps that prevent further simplification and enables
downstream optimizations. The transform preserves NUW semantics for the created `shl`.
Local verification
------------------
- Ran `opt` from local build and used `FileCheck` to validate outputs.
- Example: `./bin/opt -S -passes=instcombine llvm/test/Transforms/InstCombine/umin_pos_i32.ll | FileCheck llvm/test/Transforms/InstCombine/umin_pos_i32.ll`
Notes / Limitations
-------------------
- Scalar integer support only; vector types are not handled yet.
- The safety condition is implemented with APInt shifting and `uge`/`ule` comparisons.
- Debug prints were replaced by `LLVM_DEBUG` (no `errs()` left).
- The transform requires the inner call to be a `CallInst` to prevent recreation of the same pattern.
Fixes: https://github.com/llvm/llvm-project/issues/139786
Signed-off-by: cs25mtech12013-commits <cs25mtech12013 at iith.ac.in>
>From 2fd547260b49d6cd809bc239742f86d63bf2faab Mon Sep 17 00:00:00 2001
From: cs25mtech12013-commits <cs25mtech12013 at iith.ac.in>
Date: Fri, 28 Nov 2025 22:52:22 +0530
Subject: [PATCH] InstCombine: safe folding for nested umax/umin(shl(...))
Adds guarded transforms for umax/umin nested with shl when C3 >= (C1 << s).
Includes positive and negative tests for i8 and i32.
Fixes: https://github.com/llvm/llvm-project/issues/139786
Signed-off-by: cs25mtech12013-commits <cs25mtech12013 at iith.ac.in>
---
.gitignore | 3 +
.../InstCombine/InstCombineCalls.cpp | 380 ++++++++++++++----
2 files changed, 303 insertions(+), 80 deletions(-)
diff --git a/.gitignore b/.gitignore
index a9d616286adf1..1fb512a741b22 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,3 +78,6 @@ pythonenv*
/clang/utils/analyzer/projects/*/RefScanBuildResults
# automodapi puts generated documentation files here.
/lldb/docs/python_api/
+build/
+CMakeCache.txt
+CMakeFiles/
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8e4edefec42fd..bf64c784ded37 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -87,15 +87,14 @@ using namespace PatternMatch;
STATISTIC(NumSimplified, "Number of library calls simplified");
static cl::opt<unsigned> GuardWideningWindow(
- "instcombine-guard-widening-window",
- cl::init(3),
+ "instcombine-guard-widening-window", cl::init(3),
cl::desc("How wide an instruction window to bypass looking for "
"another guard"));
/// Return the specified type promoted as it would be to pass though a va_arg
/// area.
static Type *getPromotedType(Type *Ty) {
- if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
if (ITy->getBitWidth() < 32)
return Type::getInt32Ty(Ty->getContext());
}
@@ -150,7 +149,8 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
- if (!MemOpLength) return nullptr;
+ if (!MemOpLength)
+ return nullptr;
// Source and destination pointer types are always "i8*" for intrinsic. See
// if the size is something we can handle with a single primitive load/store.
@@ -159,8 +159,8 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
uint64_t Size = MemOpLength->getLimitedValue();
assert(Size && "0-sized memory transferring should be removed already.");
- if (Size > 8 || (Size&(Size-1)))
- return nullptr; // If not 1/2/4/8 bytes, exit.
+ if (Size > 8 || (Size & (Size - 1)))
+ return nullptr; // If not 1/2/4/8 bytes, exit.
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
@@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return nullptr;
// Use an integer load+store unless we can find something better.
- IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ IntegerType *IntType = IntegerType::get(MI->getContext(), Size << 3);
// If the memcpy has metadata describing the members, see if we can get the
// TBAA, scope and noalias tags describing our copy.
@@ -184,7 +184,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
L->setAlignment(*CopySrcAlign);
L->setAAMetadata(AACopyMD);
MDNode *LoopMemParallelMD =
- MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
if (LoopMemParallelMD)
L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
@@ -303,8 +303,8 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// If we can unconditionally load from this address, replace with a
// load/select idiom. TODO: use DT for context sensitive query
- if (isDereferenceablePointer(LoadPtr, II.getType(),
- II.getDataLayout(), &II, &AC)) {
+ if (isDereferenceablePointer(LoadPtr, II.getType(), II.getDataLayout(), &II,
+ &AC)) {
LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
LI->copyMetadata(II);
@@ -613,10 +613,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
KnownBits Known = IC.computeKnownBits(Op0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
- unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
- : Known.countMaxLeadingZeros();
- unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
- : Known.countMinLeadingZeros();
+ unsigned PossibleZeros =
+ IsTZ ? Known.countMaxTrailingZeros() : Known.countMaxLeadingZeros();
+ unsigned DefiniteZeros =
+ IsTZ ? Known.countMinTrailingZeros() : Known.countMinLeadingZeros();
// If all bits above (ctlz) or below (cttz) the first known one are known
// zero, this value is constant.
@@ -650,8 +650,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
}
static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
- assert(II.getIntrinsicID() == Intrinsic::ctpop &&
- "Expected ctpop intrinsic");
+ assert(II.getIntrinsicID() == Intrinsic::ctpop && "Expected ctpop intrinsic");
Type *Ty = II.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
Value *Op0 = II.getArgOperand(0);
@@ -1242,7 +1241,6 @@ Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
return CastInst::Create(Instruction::SExt, Sat, Ty);
}
-
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
@@ -1402,7 +1400,7 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
Module *Mod = II->getModule();
Function *MinMax =
Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
- return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
+ return CallInst::Create(MinMax, {MinMaxOp, ThirdOp});
}
/// If all arguments of the intrinsic are unary shuffles with the same mask,
@@ -1819,12 +1817,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (GVSrc->isConstant()) {
Module *M = CI.getModule();
Intrinsic::ID MemCpyID =
- MMI->isAtomic()
- ? Intrinsic::memcpy_element_unordered_atomic
- : Intrinsic::memcpy;
- Type *Tys[3] = { CI.getArgOperand(0)->getType(),
- CI.getArgOperand(1)->getType(),
- CI.getArgOperand(2)->getType() };
+ MMI->isAtomic() ? Intrinsic::memcpy_element_unordered_atomic
+ : Intrinsic::memcpy;
+ Type *Tys[3] = {CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType()};
CI.setCalledFunction(
Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
return II;
@@ -1952,6 +1949,106 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
I0, I1, DL, Builder))
return replaceInstUsesWith(*II, FoldedCtlz);
+ // umin(x, C1) transform: umin( shl( umin(x,C1), s ), C3 ) -> umin(
+ // shl(x,s), C3 )
+ // when (C3 <= (C1 << s))
+
+ // --- Begin safe transform block for umin ---
+ Value *Op0u = CI.getArgOperand(0);
+ Value *Op1u = CI.getArgOperand(1);
+
+ ConstantInt *C3ConstU = nullptr;
+ Value *MaybeShlU = nullptr;
+
+ if (match(Op1u, m_ConstantInt(C3ConstU)) &&
+ match(Op0u, m_Value(MaybeShlU))) {
+ // umin(shl(...), C3)
+ } else if (match(Op0u, m_ConstantInt(C3ConstU)) &&
+ match(Op1u, m_Value(MaybeShlU))) {
+ // umin(C3, shl(...))
+ } else {
+ // Not the pattern we care about.
+ return nullptr;
+ }
+
+ // shl(match)
+ Value *ShlLHSU = nullptr;
+ ConstantInt *ShiftAmtConstU = nullptr;
+ if (!match(MaybeShlU,
+ m_Shl(m_Value(ShlLHSU), m_ConstantInt(ShiftAmtConstU)))) {
+ return nullptr;
+ }
+
+ // require LHS of shl to be a call (inner umin)
+ CallInst *InnerCallU = dyn_cast<CallInst>(ShlLHSU);
+ if (!InnerCallU)
+ return nullptr;
+
+ Function *CalledFU = InnerCallU->getCalledFunction();
+ if (!CalledFU || CalledFU->getIntrinsicID() != Intrinsic::umin)
+ return nullptr;
+
+ // inner args
+ Value *InnerOp0U = InnerCallU->getArgOperand(0);
+ Value *InnerOp1U = InnerCallU->getArgOperand(1);
+
+ Value *X = nullptr;
+ ConstantInt *C1ConstU = nullptr;
+ if ((match(InnerOp0U, m_Value(X)) &&
+ match(InnerOp1U, m_ConstantInt(C1ConstU))) ||
+ (match(InnerOp1U, m_Value(X)) &&
+ match(InnerOp0U, m_ConstantInt(C1ConstU)))) {
+ // matched
+ } else {
+ return nullptr;
+ }
+
+ // Ensure scalar integer type
+ Type *CIType = CI.getType();
+ if (!CIType->isIntegerTy()) {
+ // not scalar integer -> bail
+ return nullptr;
+ }
+ IntegerType *ITy = cast<IntegerType>(CIType);
+ unsigned BitWidthU = ITy->getBitWidth();
+
+ // compute safe APInt values
+ APInt C1ShiftU = (C1ConstU->getValue().zextOrTrunc(BitWidthU))
+ .shl(ShiftAmtConstU->getZExtValue());
+ APInt C3APU = C3ConstU->getValue().zextOrTrunc(BitWidthU);
+
+ // Condition for umin: C3 <= (C1 << shift)
+ if (!C3APU.ule(C1ShiftU)) {
+ errs()
+ << "umin transform condition failed: C3 > C1<<shift. No transform.\n";
+ return nullptr;
+ }
+
+ errs()
+ << "Pattern matched and condition true for umin: applying transform\n";
+ IRBuilder<> BuilderU(&CI);
+
+ // Create new shl: shl nuw X, shift
+ Value *ShiftAmtValU = ConstantInt::get(ITy, ShiftAmtConstU->getZExtValue());
+ Value *NewShlU = BuilderU.CreateShl(X, ShiftAmtValU);
+ if (auto *BO = dyn_cast<BinaryOperator>(NewShlU))
+ BO->setHasNoUnsignedWrap(true);
+
+ // Create umin intrinsic declaration for this integer type
+ Function *UMinDecl =
+ Intrinsic::getOrInsertDeclaration(CI.getModule(), Intrinsic::umin, ITy);
+ if (!UMinDecl) {
+ // unexpected: intrinsic declaration missing
+ return nullptr;
+ }
+
+ Value *C3ValU = ConstantInt::get(ITy, C3ConstU->getZExtValue());
+ Value *NewMin = BuilderU.CreateCall(UMinDecl, {NewShlU, C3ValU});
+
+ // Replace outer call's uses with NewMin and return new instruction
+ return replaceInstUsesWith(CI, NewMin);
+ // --- End safe transform block for umin ---
+
[[fallthrough]];
}
case Intrinsic::umax: {
@@ -1975,6 +2072,131 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// If C is not 0 or 1:
// umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
+ // umax(x, C1)
+
+ // --- Begin safe transform block ---
+ // CI is the outer CallInst (the umax call) that we're visiting.
+
+ Value *Op0 = CI.getArgOperand(0);
+ Value *Op1 = CI.getArgOperand(1);
+
+ // We'll try both cases of which operand is the constant C3 and which is
+ // the shl.
+ ConstantInt *C3Const = nullptr;
+ Value *MaybeShl = nullptr;
+
+ if (match(Op1, m_ConstantInt(C3Const)) && match(Op0, m_Value(MaybeShl))) {
+ // OK: outer form is umax(shl(...), C3)
+ } else if (match(Op0, m_ConstantInt(C3Const)) &&
+ match(Op1, m_Value(MaybeShl))) {
+ // OK: outer form is umax(C3, shl(...))
+ } else {
+ // Not the pattern we care about.
+ return nullptr;
+ }
+
+ // Match the shl: require LHS be a CallInst (important to avoid
+ // re-matching).
+ Value *ShlLHS = nullptr;
+ ConstantInt *ShiftAmtConst = nullptr;
+ if (!match(MaybeShl,
+ m_Shl(m_Value(ShlLHS), m_ConstantInt(ShiftAmtConst)))) {
+ // Not a shl with constant shift amount.
+ return nullptr;
+ }
+
+ // **Critical**: require the shl LHS to be a CallInst (inner umax). This
+ // prevents the transformation from recreating a pattern that would match
+ // again.
+ CallInst *InnerCall = dyn_cast<CallInst>(ShlLHS);
+ if (!InnerCall) {
+ // LHS of shl is not a call -> don't transform.
+ return nullptr;
+ }
+
+ // Ensure the call is an intrinsic umax (both scalar integer case)
+ Function *CalledF = InnerCall->getCalledFunction();
+ if (!CalledF || CalledF->getIntrinsicID() != Intrinsic::umax) {
+ return nullptr;
+ }
+
+ // Get the inner call operands (the umax args)
+ Value *InnerOp0 = InnerCall->getArgOperand(0);
+ Value *InnerOp1 = InnerCall->getArgOperand(1);
+
+ // Accept both inner orders: umax(x, C1) or umax(C1, x)
+ // Value *X = nullptr;
+ ConstantInt *C1Const = nullptr;
+ if ((match(InnerOp0, m_Value(X)) &&
+ match(InnerOp1, m_ConstantInt(C1Const))) ||
+ (match(InnerOp1, m_Value(X)) &&
+ match(InnerOp0, m_ConstantInt(C1Const)))) {
+ // matched
+ } else {
+ return nullptr;
+ }
+
+ // Compute APInt condition safely
+ unsigned BitWidth = CI.getType()->getIntegerBitWidth();
+ APInt C1Shift = (C1Const->getValue().zextOrTrunc(BitWidth))
+ .shl(ShiftAmtConst->getZExtValue());
+ APInt C3AP = C3Const->getValue().zextOrTrunc(BitWidth);
+
+ // Check condition: C3 >= C1 << shift
+ if (!C3AP.uge(C1Shift)) {
+ // Condition fails => do not transform
+ errs() << "Condition failed: C3 < C1<<shift. No transform.\n";
+ return nullptr;
+ }
+
+ // Condition true => perform transform
+ errs() << "Pattern matched and condition true: applying transform\n";
+ IRBuilder<> Builder(&CI);
+
+ // Create new shl: shl nuw X, shift
+ Value *NewShl = Builder.CreateShl(
+ X, ConstantInt::get(X->getType(), ShiftAmtConst->getZExtValue()));
+ if (auto *BO = dyn_cast<BinaryOperator>(NewShl))
+ BO->setHasNoUnsignedWrap(true);
+
+ // Create umax intrinsic declaration for this integer type
+ Function *UMaxDecl = Intrinsic::getDeclarationIfExists(
+ CI.getModule(), Intrinsic::umax, X->getType());
+ Value *C3Val = ConstantInt::get(X->getType(), C3Const->getZExtValue());
+ Value *NewMax = Builder.CreateCall(UMaxDecl, {NewShl, C3Val});
+
+ // Replace outer call's uses with NewMax and return
+ return replaceInstUsesWith(CI, NewMax);
+ // --- End safe transform block ---
+
+ // If C is not 0:
+ // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
+ // If C is not 0 or 1:
+ // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
+ auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
+ const APInt *C;
+ Value *X;
+ if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
+ !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
+ return nullptr;
+ if (C->isZero())
+ return nullptr;
+ if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
+ return nullptr;
+
+ Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
+ Value *NewSelect =
+ Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A);
+ return replaceInstUsesWith(*II, NewSelect);
+ };
+
+ if (IID == Intrinsic::umax) {
+ if (Instruction *I = foldMaxMulShift(I0, I1))
+ return I;
+ if (Instruction *I = foldMaxMulShift(I1, I0))
+ return I;
+ }
+
const APInt *C;
Value *X;
if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
@@ -2198,7 +2420,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return R;
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
- return NewMinMax;
+ return NewMinMax;
// Try to fold minmax with constant RHS based on range information
if (match(I1, m_APIntAllowPoison(RHSC))) {
@@ -2243,7 +2465,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
if (Instruction *crossLogicOpFold =
- foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(IIOperand, Builder))
+ foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(IIOperand, Builder))
return crossLogicOpFold;
break;
@@ -2276,12 +2498,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// bswap(x) -> shift(x) if x has exactly one "active byte"
if (BW - LZ - TZ == 8) {
assert(LZ != TZ && "active byte cannot be in the middle");
- if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
+ if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
return BinaryOperator::CreateNUWShl(
IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
// -> lshr(x) if the "active byte" is in the high part of x
return BinaryOperator::CreateExactLShr(
- IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
+ IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
}
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
@@ -2389,7 +2611,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Module *Mod = II->getModule();
Function *Fshl =
Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
- return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
+ return CallInst::Create(Fshl, {Op0, Op1, LeftShiftC});
}
assert(IID == Intrinsic::fshl &&
"All funnel shifts by simple constants should go left");
@@ -2410,7 +2632,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Module *Mod = II->getModule();
Function *Bswap =
Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
- return CallInst::Create(Bswap, { Op0 });
+ return CallInst::Create(Bswap, {Op0});
}
if (Instruction *BitOp =
matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
@@ -2584,26 +2806,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *Arg1 = SI->getRHS();
// Make use of known overflow information.
- OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
- Arg0, Arg1, SI);
+ OverflowResult OR =
+ computeOverflow(SI->getBinaryOp(), SI->isSigned(), Arg0, Arg1, SI);
switch (OR) {
- case OverflowResult::MayOverflow:
- break;
- case OverflowResult::NeverOverflows:
- if (SI->isSigned())
- return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
- else
- return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
- case OverflowResult::AlwaysOverflowsLow: {
- unsigned BitWidth = Ty->getScalarSizeInBits();
- APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
- return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
- }
- case OverflowResult::AlwaysOverflowsHigh: {
- unsigned BitWidth = Ty->getScalarSizeInBits();
- APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
- return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
- }
+ case OverflowResult::MayOverflow:
+ break;
+ case OverflowResult::NeverOverflows:
+ if (SI->isSigned())
+ return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
+ else
+ return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
+ case OverflowResult::AlwaysOverflowsLow: {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
+ return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
+ }
+ case OverflowResult::AlwaysOverflowsHigh: {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
+ return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
+ }
}
// usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
@@ -2625,9 +2847,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
C->isNotMinSignedValue()) {
Value *NegVal = ConstantExpr::getNeg(C);
- return replaceInstUsesWith(
- *II, Builder.CreateBinaryIntrinsic(
- Intrinsic::sadd_sat, Arg0, NegVal));
+ return replaceInstUsesWith(*II, Builder.CreateBinaryIntrinsic(
+ Intrinsic::sadd_sat, Arg0, NegVal));
}
// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
@@ -2639,8 +2860,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
APInt NewVal;
bool IsUnsigned =
IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
- if (Other->getIntrinsicID() == IID &&
- match(Arg1, m_APInt(Val)) &&
+ if (Other->getIntrinsicID() == IID && match(Arg1, m_APInt(Val)) &&
match(Other->getArgOperand(0), m_Value(X)) &&
match(Other->getArgOperand(1), m_APInt(Val2))) {
if (IsUnsigned)
@@ -2847,8 +3067,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
// fma fabs(x), fabs(x), z -> fma x, x, z
- if (match(Src0, m_FAbs(m_Value(X))) &&
- match(Src1, m_FAbs(m_Specific(X)))) {
+ if (match(Src0, m_FAbs(m_Value(X))) && match(Src1, m_FAbs(m_Specific(X)))) {
replaceOperand(*II, 0, X);
replaceOperand(*II, 1, X);
return II;
@@ -2923,8 +3142,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *X;
// fabs (-X) --> fabs (X)
if (match(Arg, m_FNeg(m_Value(X)))) {
- CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
- return replaceInstUsesWith(CI, Fabs);
+ CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
+ return replaceInstUsesWith(CI, Fabs);
}
if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
@@ -3205,7 +3424,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
case Intrinsic::aarch64_sve_aesd:
case Intrinsic::aarch64_sve_aese: {
Value *DataArg = II->getArgOperand(0);
- Value *KeyArg = II->getArgOperand(1);
+ Value *KeyArg = II->getArgOperand(1);
// Accept zero on either operand.
if (!match(KeyArg, m_ZeroInt()))
@@ -3520,15 +3739,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
auto &BOI = II->bundle_op_info_begin()[Idx];
RetainedKnowledge RK =
- llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
+ llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
if (BOI.End - BOI.Begin > 2)
continue; // Prevent reducing knowledge in an align with offset since
// extracting a RetainedKnowledge from them looses offset
// information
- RetainedKnowledge CanonRK =
- llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
- &getAssumptionCache(),
- &getDominatorTree());
+ RetainedKnowledge CanonRK = llvm::simplifyRetainedKnowledge(
+ cast<AssumeInst>(II), RK, &getAssumptionCache(),
+ &getDominatorTree());
if (CanonRK == RK)
continue;
if (!CanonRK) {
@@ -4157,7 +4375,8 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
}
Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
- if (!CI->getCalledFunction()) return nullptr;
+ if (!CI->getCalledFunction())
+ return nullptr;
// Skip optimizing notail and musttail calls so
// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
@@ -4168,9 +4387,7 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
replaceInstUsesWith(*From, With);
};
- auto InstCombineErase = [this](Instruction *I) {
- eraseInstFromFunction(*I);
- };
+ auto InstCombineErase = [this](Instruction *I) { eraseInstFromFunction(*I); };
LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
InstCombineRAUW, InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
@@ -4551,7 +4768,8 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
- if (I) return eraseInstFromFunction(*I);
+ if (I)
+ return eraseInstFromFunction(*I);
}
if (!Call.use_empty() && !Call.isMustTailCall())
@@ -4693,7 +4911,9 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
OperandBundleDef NewBundle("gc-live", NewLiveGc);
return CallBase::Create(&Call, NewBundle);
}
- default: { break; }
+ default: {
+ break;
+ }
}
return Changed ? &Call : nullptr;
@@ -4754,14 +4974,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
if (!Caller->use_empty())
- return false; // Cannot transform this return value.
+ return false; // Cannot transform this return value.
}
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
NewRetTy, CallerPAL.getRetAttrs())))
- return false; // Attribute not compatible with transformed value.
+ return false; // Attribute not compatible with transformed value.
}
// If the callbase is an invoke instruction, and the return value is
@@ -4801,14 +5021,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Type *ActTy = (*AI)->getType();
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
- return false; // Cannot transform this parameter value.
+ return false; // Cannot transform this parameter value.
// Check if there are any incompatible attributes we cannot drop safely.
if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
.overlaps(AttributeFuncs::typeIncompatible(
ParamTy, CallerPAL.getParamAttrs(i),
AttributeFuncs::ASK_UNSAFE_TO_DROP)))
- return false; // Attribute not compatible with transformed value.
+ return false; // Attribute not compatible with transformed value.
if (Call.isInAllocaArgument(i) ||
CallerPAL.hasParamAttr(i, Attribute::Preallocated))
@@ -4884,7 +5104,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (PTy != (*AI)->getType()) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
- CastInst::getCastOpcode(*AI, false, PTy, false);
+ CastInst::getCastOpcode(*AI, false, PTy, false);
NewArg = Builder.CreateCast(opcode, *AI, PTy);
}
Args.push_back(NewArg);
@@ -4898,7 +5118,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
AttributeSet FnAttrs = CallerPAL.getFnAttrs();
if (NewRetTy->isVoidTy())
- Caller->setName(""); // Void type should not have a name.
+ Caller->setName(""); // Void type should not have a name.
assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
"missing argument attributes");
@@ -4989,7 +5209,7 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
}
if (NestTy) {
- std::vector<Value*> NewArgs;
+ std::vector<Value *> NewArgs;
std::vector<AttributeSet> NewArgAttrs;
NewArgs.reserve(Call.arg_size() + 1);
NewArgAttrs.reserve(Call.arg_size());
@@ -5026,15 +5246,15 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
// Handle this by synthesizing a new function type, equal to FTy
// with the chain parameter inserted.
- std::vector<Type*> NewTypes;
- NewTypes.reserve(FTy->getNumParams()+1);
+ std::vector<Type *> NewTypes;
+ NewTypes.reserve(FTy->getNumParams() + 1);
// Insert the chain's type into the list of parameter types, which may
// mean appending it.
{
unsigned ArgNo = 0;
FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end();
+ E = FTy->param_end();
do {
if (ArgNo == NestArgNo)
More information about the llvm-commits
mailing list