[llvm] [IndVarSimplify] Introduce `simulateFPIVTripCount` to canonicalize fp loops (PR #169707)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 10:19:12 PST 2025
https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/169707
Add support to brute-force floating-point IVs, by actually evaluating the loop (up to cut-off), in order to compute its integer trip count. This should be desirable when any of the value in the recurrence cannot be represented as an exact integer (e.g., with fractional increments); in an attempt to further canonicalize loops using floating-point IVs, and bring such loops in a more amenable form to SCEV users.
Proofs: https://alive2.llvm.org/ce/z/PeLqcb, https://alive2.llvm.org/ce/z/Pxpzm3.
Missed optimization that GCC catches via cunrolli, featuring a similar bruteforce evaluation: https://godbolt.org/z/E1Ea68W76.
Rebased over: https://github.com/llvm/llvm-project/pull/169706.
>From ed0bc40d4dd2093dac4f91569f3b28a897d88476 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 21 Nov 2025 18:13:32 +0100
Subject: [PATCH 1/3] [IndVarSimplify] Refactor `handleFloatingPointIV`,
modernize pass (NFC)
`handleFloatingPointIV` is now abstracted out into different routines,
particularly:
- `maybeFloatingPointRecurrence` which establishes whether we handle a
floating-point iv recurrence;
- `tryConvertToIntegerIV` which attempts to convert the fp start, step
and exit values into integer ones;
- `canonicalizeToIntegerIV` which rewrites the recurrence.
Minor opportunity to modernize the code where possible.
---
llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 290 ++++++++++++------
1 file changed, 191 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 19d801acd928e..4b5896b5870b8 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -198,195 +198,265 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
return true;
}
-// Ensure we stay within the bounds of fp values that can be represented as
-// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and double
-// precision respectively (both on negative and positive side).
-static bool isRepresentableAsExactInteger(ConstantFP *FPVal, int64_t IntVal) {
- const auto &InitValueFltSema = FPVal->getValueAPF().getSemantics();
- if (!APFloat::isIEEELikeFP(InitValueFltSema))
+/// Ensure we stay within the bounds of fp values that can be represented as
+/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
+/// double precision respectively (both on negative and positive side).
+static bool isRepresentableAsExactInteger(const APFloat &FPVal,
+ int64_t IntVal) {
+ const auto &FltSema = FPVal.getSemantics();
+ if (!APFloat::isIEEELikeFP(FltSema))
return false;
+ return isUIntN(APFloat::semanticsPrecision(FltSema), AbsoluteValue(IntVal));
+}
+
+/// Represents a floating-point induction variable pattern that may be
+/// convertible to integer form.
+struct FloatingPointIV {
+ APFloat InitValue;
+ APFloat IncrValue;
+ APFloat ExitValue;
+ FCmpInst *Compare;
+ BinaryOperator *Add;
+
+ FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare,
+ BinaryOperator *Add)
+ : InitValue(std::move(Init)), IncrValue(std::move(Incr)),
+ ExitValue(std::move(Exit)), Compare(Compare), Add(Add) {}
+};
+
+/// Represents the integer values for a converted IV.
+struct IntegerIV {
+ int64_t InitValue;
+ int64_t IncrValue;
+ int64_t ExitValue;
+ CmpInst::Predicate NewPred;
+};
- return isUIntN(APFloat::semanticsPrecision(InitValueFltSema),
- AbsoluteValue(IntVal));
+static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred) {
+ switch (FPPred) {
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ:
+ return CmpInst::ICMP_EQ;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE:
+ return CmpInst::ICMP_NE;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ return CmpInst::ICMP_SGT;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE:
+ return CmpInst::ICMP_SGE;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ return CmpInst::ICMP_SLT;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE:
+ return CmpInst::ICMP_SLE;
+ default:
+ return CmpInst::BAD_ICMP_PREDICATE;
+ }
}
-/// If the loop has floating induction variable then insert corresponding
-/// integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-/// bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-/// bar((double)i);
-bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
+/// Analyze a PN to determine whether it represents a simple floating-point
+/// induction variable, with constant fp init, increment, and exit values.
+///
+/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
+static std::optional<FloatingPointIV>
+maybeFloatingPointRecurrence(Loop *L, PHINode *PN) {
+ // Identify incoming and backedge for the PN.
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
- unsigned BackEdge = IncomingEdge^1;
+ unsigned BackEdge = IncomingEdge ^ 1;
// Check incoming value.
auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
- int64_t InitValue;
- if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue) ||
- !isRepresentableAsExactInteger(InitValueVal, InitValue))
- return false;
+ if (!InitValueVal)
+ return std::nullopt;
// Check IV increment. Reject this PN if increment operation is not
// an add or increment value can not be represented by an integer.
auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
+ if (!Incr || Incr->getOpcode() != Instruction::FAdd)
+ return std::nullopt;
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
- ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
- int64_t IncValue;
- if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
- !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
- return false;
+ auto *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+ if (!IncValueVal || Incr->getOperand(0) != PN)
+ return std::nullopt;
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
- Value::user_iterator IncrUse = Incr->user_begin();
- Instruction *U1 = cast<Instruction>(*IncrUse++);
- if (IncrUse == Incr->user_end()) return false;
- Instruction *U2 = cast<Instruction>(*IncrUse++);
- if (IncrUse != Incr->user_end()) return false;
+ // TODO: Should relax this, so as to allow any `fpext` that may occur.
+ if (!Incr->hasNUses(2))
+ return std::nullopt;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
// only used by a branch, we can't transform it.
- FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
- if (!Compare)
- Compare = dyn_cast<FCmpInst>(U2);
- if (!Compare || !Compare->hasOneUse() ||
- !isa<BranchInst>(Compare->user_back()))
- return false;
+ auto It = llvm::find_if(Incr->users(),
+ [](const User *U) { return isa<FCmpInst>(U); });
+ if (It == Incr->users().end())
+ return std::nullopt;
- BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
+ FCmpInst *Compare = cast<FCmpInst>(*It);
+ if (!Compare->hasOneUse())
+ return std::nullopt;
// We need to verify that the branch actually controls the iteration count
// of the loop. If not, the new IV can overflow and no one will notice.
// The branch block must be in the loop and one of the successors must be out
// of the loop.
- assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
- if (!L->contains(TheBr->getParent()) ||
- (L->contains(TheBr->getSuccessor(0)) &&
- L->contains(TheBr->getSuccessor(1))))
- return false;
+ auto *BI = dyn_cast<BranchInst>(Compare->user_back());
+ assert(BI->isConditional() && "Can't use fcmp if not conditional");
+ if (!L->contains(BI->getParent()) ||
+ (L->contains(BI->getSuccessor(0)) && L->contains(BI->getSuccessor(1))))
+ return std::nullopt;
// If it isn't a comparison with an integer-as-fp (the exit value), we can't
// transform it.
- ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
- int64_t ExitValue;
- if (ExitValueVal == nullptr ||
- !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue) ||
- !isRepresentableAsExactInteger(ExitValueVal, ExitValue))
- return false;
+ auto *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+ if (!ExitValueVal)
+ return std::nullopt;
- // Find new predicate for integer comparison.
- CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
- switch (Compare->getPredicate()) {
- default: return false; // Unknown comparison.
- case CmpInst::FCMP_OEQ:
- case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
- case CmpInst::FCMP_ONE:
- case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
- case CmpInst::FCMP_OGT:
- case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
- case CmpInst::FCMP_OGE:
- case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
- case CmpInst::FCMP_OLT:
- case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
- case CmpInst::FCMP_OLE:
- case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
- }
+ return FloatingPointIV(InitValueVal->getValueAPF(),
+ IncValueVal->getValueAPF(),
+ ExitValueVal->getValueAPF(), Compare, Incr);
+}
+
+/// Ensure that the floating-point IV can be converted to a semantics-preserving
+/// signed 32-bit integer IV.
+///
+/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
+static std::optional<IntegerIV>
+tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
+ // Convert floating-point predicate to integer.
+ auto NewPred = getIntegerPredicate(FPIV.Compare->getPredicate());
+ if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
+ return std::nullopt;
+
+ // Convert APFloat values to signed integers.
+ int64_t InitValue, IncrValue, ExitValue;
+ if (!ConvertToSInt(FPIV.InitValue, InitValue) ||
+ !ConvertToSInt(FPIV.IncrValue, IncrValue) ||
+ !ConvertToSInt(FPIV.ExitValue, ExitValue))
+ return std::nullopt;
+
+ // Bail out if integers cannot be represented exactly.
+ if (!isRepresentableAsExactInteger(FPIV.InitValue, InitValue) ||
+ !isRepresentableAsExactInteger(FPIV.ExitValue, ExitValue))
+ return std::nullopt;
// We convert the floating point induction variable to a signed i32 value if
- // we can. This is only safe if the comparison will not overflow in a way
- // that won't be trapped by the integer equivalent operations. Check for this
- // now.
+ // we can. This is only safe if the comparison will not overflow in a way that
+ // won't be trapped by the integer equivalent operations. Check for this now.
// TODO: We could use i64 if it is native and the range requires it.
// The start/stride/exit values must all fit in signed i32.
- if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
- return false;
+ if (!isInt<32>(InitValue) || !isInt<32>(IncrValue) || !isInt<32>(ExitValue))
+ return std::nullopt;
// If not actually striding (add x, 0.0), avoid touching the code.
- if (IncValue == 0)
- return false;
+ if (IncrValue == 0)
+ return std::nullopt;
// Positive and negative strides have different safety conditions.
- if (IncValue > 0) {
+ if (IncrValue > 0) {
// If we have a positive stride, we require the init to be less than the
// exit value.
if (InitValue >= ExitValue)
- return false;
+ return std::nullopt;
- uint32_t Range = uint32_t(ExitValue-InitValue);
+ uint32_t Range = uint32_t(ExitValue - InitValue);
// Check for infinite loop, either:
// while (i <= Exit) or until (i > Exit)
if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
- if (++Range == 0) return false; // Range overflows.
+ if (++Range == 0)
+ return std::nullopt; // Range overflows.
}
- unsigned Leftover = Range % uint32_t(IncValue);
+ unsigned Leftover = Range % uint32_t(IncrValue);
// If this is an equality comparison, we require that the strided value
// exactly land on the exit value, otherwise the IV condition will wrap
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
- return false;
+ return std::nullopt;
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
- if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
- return false;
+ if (Leftover != 0 && int32_t(ExitValue + IncrValue) < ExitValue)
+ return std::nullopt;
} else {
// If we have a negative stride, we require the init to be greater than the
// exit value.
if (InitValue <= ExitValue)
- return false;
+ return std::nullopt;
- uint32_t Range = uint32_t(InitValue-ExitValue);
+ uint32_t Range = uint32_t(InitValue - ExitValue);
// Check for infinite loop, either:
// while (i >= Exit) or until (i < Exit)
if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
- if (++Range == 0) return false; // Range overflows.
+ if (++Range == 0)
+ return std::nullopt; // Range overflows.
}
- unsigned Leftover = Range % uint32_t(-IncValue);
+ unsigned Leftover = Range % uint32_t(-IncrValue);
// If this is an equality comparison, we require that the strided value
// exactly land on the exit value, otherwise the IV condition will wrap
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
- return false;
+ return std::nullopt;
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
- if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
- return false;
+ if (Leftover != 0 && int32_t(ExitValue + IncrValue) > ExitValue)
+ return std::nullopt;
}
+ return IntegerIV{InitValue, IncrValue, ExitValue, NewPred};
+}
+
+/// Rewrite the floating-point IV as an integer IV.
+static void canonicalizeToIntegerIV(Loop *L, PHINode *PN,
+ const FloatingPointIV &FPIV,
+ const IntegerIV &IIV,
+ const TargetLibraryInfo *TLI,
+ std::unique_ptr<MemorySSAUpdater> &MSSAU) {
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge ^ 1;
+
IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+ auto *Incr = cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ auto *BI = cast<BranchInst>(FPIV.Compare->user_back());
+ assert(Incr && BI);
+
+ LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
+ << " Init: " << IIV.InitValue << "\n"
+ << " Incr: " << IIV.IncrValue << "\n"
+ << " Exit: " << IIV.ExitValue << "\n"
+ << " Pred: " << CmpInst::getPredicateName(IIV.NewPred)
+ << "\n"
+ << " Original PN: " << *PN << "\n");
// Insert new integer induction variable.
PHINode *NewPHI =
PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
- NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
+ NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, IIV.InitValue),
PN->getIncomingBlock(IncomingEdge));
NewPHI->setDebugLoc(PN->getDebugLoc());
Instruction *NewAdd = BinaryOperator::CreateAdd(
- NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
+ NewPHI, ConstantInt::getSigned(Int32Ty, IIV.IncrValue),
Incr->getName() + ".int", Incr->getIterator());
NewAdd->setDebugLoc(Incr->getDebugLoc());
NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
ICmpInst *NewCompare = new ICmpInst(
- TheBr->getIterator(), NewPred, NewAdd,
- ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
- NewCompare->setDebugLoc(Compare->getDebugLoc());
+ BI->getIterator(), IIV.NewPred, NewAdd,
+ ConstantInt::getSigned(Int32Ty, IIV.ExitValue), FPIV.Compare->getName());
+ NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
// In the following deletions, PN may become dead and may be deleted.
// Use a WeakTrackingVH to observe whether this happens.
@@ -394,9 +464,9 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// Delete the old floating point exit comparison. The branch starts using the
// new comparison.
- NewCompare->takeName(Compare);
- Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
+ NewCompare->takeName(FPIV.Compare);
+ FPIV.Compare->replaceAllUsesWith(NewCompare);
+ RecursivelyDeleteTriviallyDeadInstructions(FPIV.Compare, TLI, MSSAU.get());
// Delete the old floating point increment.
Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
@@ -416,6 +486,28 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
}
+}
+
+/// If the loop has a floating induction variable, then insert corresponding
+/// integer induction variable if possible. For example, the following:
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
+bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
+ // See if the PN matches a floating-point IV pattern.
+ auto FPIV = maybeFloatingPointRecurrence(L, PN);
+ if (!FPIV)
+ return false;
+
+ // Can we safely convert the floating-point values to integer ones?
+ auto IIV = tryConvertToIntegerIV(*FPIV);
+ if (!IIV)
+ return false;
+
+ // Perform the rewriting.
+ canonicalizeToIntegerIV(L, PN, *FPIV, *IIV, TLI, MSSAU);
return true;
}
>From c1b5f2b07b0039db04090bfa73f7716d4c289489 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Wed, 26 Nov 2025 19:15:58 +0100
Subject: [PATCH 2/3] [IndVarSimplify] Precommit tests (NFC)
---
.../IndVarSimplify/floating-point-iv.ll | 211 ++++++++++++++++++
1 file changed, 211 insertions(+)
diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
index c4933678d0391..4cf98a8c250ea 100644
--- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -553,4 +553,215 @@ exit:
ret void
}
+; Evaluating loop `for (float i = 0; i < 499; i += .49999f) ++rv;`
+; Trip count: 999.
+define i32 @test_fp_simulate_tc_rounded_fadd() {
+; CHECK-LABEL: @test_fp_simulate_tc_rounded_fadd(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RV_NEXT]] = add i32 [[RV]], 1
+; CHECK-NEXT: [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[NEXT]], 4.990000e+02
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi float [ 0.0, %entry ], [ %next, %loop ]
+ %rv = phi i32 [ 0, %entry ], [ %rv.next, %loop ]
+ %rv.next = add i32 %rv, 1
+ %next = fadd float %i, 0x3FDFFFD600000000
+ %cmp = fcmp olt float %next, 499.0
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ %phi = phi i32 [ %rv.next, %loop ]
+ ret i32 %phi
+}
+
+; Same as above, using double, addition will not get rounded.
+define i32 @test_fp_simulate_tc_exact_fadd_via_double() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_via_double(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RV_NEXT]] = add i32 [[RV]], 1
+; CHECK-NEXT: [[NEXT]] = fadd double [[I]], 0x3FDFFFD600000000
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[NEXT]], 4.990000e+02
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi double [ 0.0, %entry ], [ %next, %loop ]
+ %rv = phi i32 [ 0, %entry ], [ %rv.next, %loop ]
+ %rv.next = add i32 %rv, 1
+ %next = fadd double %i, 0x3FDFFFD600000000
+ %cmp = fcmp olt double %next, 499.0
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ %phi = phi i32 [ %rv.next, %loop ]
+ ret i32 %phi
+}
+
+; As above, but %exit branch taken on true.
+; Trip count: 0, exit count: 1, condition always satisfied.
+; While SCEV already infers this, make sure we continue handling this
+; properly when simulating the loop too.
+define i32 @test_fp_simulate_tc_rounded_fadd_inverted_exit() {
+; CHECK-LABEL: @test_fp_simulate_tc_rounded_fadd_inverted_exit(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: call void @opaque()
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi float [ 0.0, %entry ], [ %next, %loop ]
+ call void @opaque()
+ %next = fadd float %i, 0x3FDFFFD600000000
+ %cmp = fcmp une float %next, 499.0
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret i32 0
+}
+
+; As above, inverted condition, %exit branch taken on true.
+; Trip count: 999.
+define i32 @test_fp_simulate_tc_rounded_fadd_inverted_pred_exit() {
+; CHECK-LABEL: @test_fp_simulate_tc_rounded_fadd_inverted_pred_exit(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[RV_NEXT]] = add i32 [[RV]], 1
+; CHECK-NEXT: [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[NEXT]], 4.990000e+02
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi float [ 0.0, %entry ], [ %next, %loop ]
+ %rv = phi i32 [ 0, %entry ], [ %rv.next, %loop ]
+ %rv.next = add i32 %rv, 1
+ %next = fadd float %i, 0x3FDFFFD600000000
+ %cmp = fcmp oge float %next, 499.0
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %phi = phi i32 [ %rv.next, %loop ]
+ ret i32 %phi
+}
+
+; Evaluating loop `for (float i = 0; i < 499; i += 124.75f) opaque();`
+; Trip count: 4.
+define i32 @test_fp_simulate_tc_exact_fadd_pred_ult() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_pred_ult(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: call void @opaque()
+; CHECK-NEXT: [[ADD]] = fadd float [[I]], 1.255000e+02
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ADD]], 4.990000e+02
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
+ call void @opaque()
+ %add = fadd float %i, 0x405F600000000000
+ %cmp = fcmp ult float %add, 4.990000e+02
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 0
+}
+
+; Evaluating loop `for (float i = 0; i == 499; i += 124.75f) opaque();`
+; Trip count: 0, exit count: 1, condition never satisfied.
+; While SCEV already infers this, make sure we continue handling this
+; properly when simulating the loop too.
+define i32 @test_fp_simulate_tc_exact_fadd_pred_ueq() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_pred_ueq(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: call void @opaque()
+; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
+ call void @opaque()
+ %add = fadd float %i, 0x405F600000000000
+ %cmp = fcmp ueq float %add, 4.990000e+02
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 0
+}
+
+; Negative test, fast-math flags are present.
+define i32 @test_fp_simulate_tc_exact_fadd_fcmp_flags() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_fcmp_flags(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: call void @opaque()
+; CHECK-NEXT: [[ADD]] = fadd reassoc float [[I]], 1.255000e+02
+; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt float [[ADD]], 4.990000e+02
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
+ call void @opaque()
+ %add = fadd reassoc float %i, 0x405F600000000000
+ %cmp = fcmp fast olt float %add, 4.990000e+02
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 0
+}
+
declare void @opaque()
>From 406b975c2653abed8e0247ec9c10cc9fe46080ac Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Wed, 26 Nov 2025 19:16:45 +0100
Subject: [PATCH 3/3] [IndVarSimplify] Introduce `simulateFPIVTripCount` to
canonicalize fp loops
Add support to brute-force floating-point IVs, by actually evaluating
the loop (up to cut-off), in order to compute its integer trip count.
This should be desirable when any of the value in the recurrence cannot
be represented as an exact integer (e.g., with fractional increments);
in an attempt to further canonicalize loops using floating-point IVs,
and bring such loops in a more amenable form to SCEV users.
Proofs: https://alive2.llvm.org/ce/z/PeLqcb, https://alive2.llvm.org/ce/z/Pxpzm3.
---
llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 97 ++++++++++++++++++-
.../IndVarSimplify/floating-point-iv.ll | 36 ++-----
2 files changed, 104 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 4b5896b5870b8..eac72d6ec5318 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -126,6 +126,10 @@ static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
+static cl::opt<unsigned> MaxTripCountIterations(
+ "indvars-fp-tripcount-max-iters", cl::Hidden, cl::init(1000),
+ cl::desc("Max number of iterations to brute force integer trip count"));
+
namespace {
class IndVarSimplify {
@@ -418,6 +422,91 @@ tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
return IntegerIV{InitValue, IncrValue, ExitValue, NewPred};
}
+/// Simulate floating-point loop execution to determine exact trip count.
+///
+/// Returns an IntegerIV representation if successful, std::nullopt otherwise.
+static std::optional<IntegerIV>
+simulateFPIVTripCount(Loop *L, const FloatingPointIV &FPIV) {
+ auto EvaluateFCmpPred = [](FCmpInst::Predicate Pred,
+ APFloat::cmpResult CmpRes) -> std::optional<bool> {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_ULT:
+ return CmpRes == APFloat::cmpLessThan;
+ case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_ULE:
+ return CmpRes == APFloat::cmpLessThan || CmpRes == APFloat::cmpEqual;
+ case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_UGT:
+ return CmpRes == APFloat::cmpGreaterThan;
+ case FCmpInst::FCMP_OGE:
+ case FCmpInst::FCMP_UGE:
+ return CmpRes == APFloat::cmpGreaterThan || CmpRes == APFloat::cmpEqual;
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ return CmpRes == APFloat::cmpEqual;
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return CmpRes != APFloat::cmpEqual;
+ default:
+ return std::nullopt;
+ }
+ };
+
+ // Conservatively bail out if fast-math flags are present.
+ // TODO: Should possibly reject only specific flags.
+ if (FPIV.Add->getFastMathFlags().any() ||
+ FPIV.Compare->getFastMathFlags().any())
+ return std::nullopt;
+
+ APFloat Current = FPIV.InitValue;
+ const APFloat &Increment = FPIV.IncrValue;
+ const APFloat &Limit = FPIV.ExitValue;
+ // Do not continue if handling non-finite values or zero increment.
+ if (!Current.isFinite() || !Increment.isFinite() || !Limit.isFinite() ||
+ Increment.isZero())
+ return std::nullopt;
+
+ FCmpInst::Predicate FPred = FPIV.Compare->getPredicate();
+ auto *BI = cast<BranchInst>(FPIV.Compare->user_back());
+ bool ExitOnTrue = !L->contains(BI->getSuccessor(0));
+ if (ExitOnTrue)
+ FPred = FPIV.Compare->getInversePredicate();
+
+ int64_t TripCount = 0;
+ for (; TripCount < MaxTripCountIterations; ++TripCount) {
+ auto Res = EvaluateFCmpPred(FPred, Current.compare(Limit));
+ if (!Res)
+ return std::nullopt;
+
+ // If comparison turns out to be false, we found an exact trip count.
+ if (!*Res)
+ break;
+
+ APFloat Next = Current;
+ APFloat::opStatus Status =
+ Next.add(Increment, APFloat::rmNearestTiesToEven);
+ if (!Next.isFinite() ||
+ (Status != APFloat::opOK && Status != APFloat::opInexact))
+ return std::nullopt;
+
+ Current = std::move(Next);
+ }
+
+ if (TripCount == MaxTripCountIterations)
+ return std::nullopt;
+
+ LLVM_DEBUG(dbgs() << "INDVARS: Simulated FP loop, found trip count: "
+ << TripCount << "\n");
+
+ // Stride always fixed to 1, the trip count is our exit value. If the loop
+ // exits immediately (i.e., trip count zero), the loop body still executes
+ // once, as per the PN recurrence we handle. Account for the inversion as
+ // well, the new integer predicate depends on the exit branch.
+ return IntegerIV{0, 1, TripCount ? TripCount : 1,
+ ExitOnTrue ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT};
+}
+
/// Rewrite the floating-point IV as an integer IV.
static void canonicalizeToIntegerIV(Loop *L, PHINode *PN,
const FloatingPointIV &FPIV,
@@ -503,8 +592,12 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// Can we safely convert the floating-point values to integer ones?
auto IIV = tryConvertToIntegerIV(*FPIV);
- if (!IIV)
- return false;
+ if (!IIV) {
+ // As a last try, brute force the integer trip count by running the loop.
+ IIV = simulateFPIVTripCount(L, *FPIV);
+ if (!IIV)
+ return false;
+ }
// Perform the rewriting.
canonicalizeToIntegerIV(L, PN, *FPIV, *IIV, TLI, MSSAU);
diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
index 4cf98a8c250ea..e4e7e7756a62f 100644
--- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -560,15 +560,9 @@ define i32 @test_fp_simulate_tc_rounded_fadd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[RV_NEXT]] = add i32 [[RV]], 1
-; CHECK-NEXT: [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[NEXT]], 4.990000e+02
-; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
-; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK-NEXT: ret i32 999
;
entry:
br label %loop
@@ -592,15 +586,9 @@ define i32 @test_fp_simulate_tc_exact_fadd_via_double() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[RV_NEXT]] = add i32 [[RV]], 1
-; CHECK-NEXT: [[NEXT]] = fadd double [[I]], 0x3FDFFFD600000000
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[NEXT]], 4.990000e+02
-; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
-; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK-NEXT: ret i32 999
;
entry:
br label %loop
@@ -653,15 +641,9 @@ define i32 @test_fp_simulate_tc_rounded_fadd_inverted_pred_exit() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[RV_NEXT]] = add i32 [[RV]], 1
-; CHECK-NEXT: [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[NEXT]], 4.990000e+02
-; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
-; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK-NEXT: ret i32 999
;
entry:
br label %loop
@@ -686,10 +668,10 @@ define i32 @test_fp_simulate_tc_exact_fadd_pred_ult() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[I_INT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_INT:%.*]], [[LOOP]] ]
; CHECK-NEXT: call void @opaque()
-; CHECK-NEXT: [[ADD]] = fadd float [[I]], 1.255000e+02
-; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ADD]], 4.990000e+02
+; CHECK-NEXT: [[ADD_INT]] = add nuw nsw i32 [[I_INT]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i32 [[ADD_INT]], 4
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
More information about the llvm-commits
mailing list