[llvm] [IndVarSimplify] Introduce `simulateFPIVTripCount` to canonicalize fp loops (PR #169707)

Wed Nov 26 10:19:12 PST 2025

https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/169707

Add support to brute-force floating-point IVs, by actually evaluating the loop (up to cut-off), in order to compute its integer trip count. This should be desirable when any of the value in the recurrence cannot be represented as an exact integer (e.g., with fractional increments); in an attempt to further canonicalize loops using floating-point IVs, and bring such loops in a more amenable form to SCEV users.

Proofs: https://alive2.llvm.org/ce/z/PeLqcb, https://alive2.llvm.org/ce/z/Pxpzm3.

Missed optimization that GCC catches via cunrolli, featuring a similar bruteforce evaluation: https://godbolt.org/z/E1Ea68W76.

Rebased over: https://github.com/llvm/llvm-project/pull/169706.

>From ed0bc40d4dd2093dac4f91569f3b28a897d88476 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 21 Nov 2025 18:13:32 +0100
Subject: [PATCH 1/3] [IndVarSimplify] Refactor `handleFloatingPointIV`,
 modernize pass (NFC)

`handleFloatingPointIV` is now abstracted out into different routines,
particularly:
- `maybeFloatingPointRecurrence` which establishes whether we handle a
  floating-point iv recurrence;
- `tryConvertToIntegerIV` which attempts to convert the fp start, step
  and exit values into integer ones;
- `canonicalizeToIntegerIV` which rewrites the recurrence.

Minor opportunity to modernize the code where possible.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 290 ++++++++++++------
 1 file changed, 191 insertions(+), 99 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 19d801acd928e..4b5896b5870b8 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -198,195 +198,265 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
   return true;
 }
 
-// Ensure we stay within the bounds of fp values that can be represented as
-// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and double
-// precision respectively (both on negative and positive side).
-static bool isRepresentableAsExactInteger(ConstantFP *FPVal, int64_t IntVal) {
-  const auto &InitValueFltSema = FPVal->getValueAPF().getSemantics();
-  if (!APFloat::isIEEELikeFP(InitValueFltSema))
+/// Ensure we stay within the bounds of fp values that can be represented as
+/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
+/// double precision respectively (both on negative and positive side).
+static bool isRepresentableAsExactInteger(const APFloat &FPVal,
+                                          int64_t IntVal) {
+  const auto &FltSema = FPVal.getSemantics();
+  if (!APFloat::isIEEELikeFP(FltSema))
     return false;
+  return isUIntN(APFloat::semanticsPrecision(FltSema), AbsoluteValue(IntVal));
+}
+
+/// Represents a floating-point induction variable pattern that may be
+/// convertible to integer form.
+struct FloatingPointIV {
+  APFloat InitValue;
+  APFloat IncrValue;
+  APFloat ExitValue;
+  FCmpInst *Compare;
+  BinaryOperator *Add;
+
+  FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare,
+                  BinaryOperator *Add)
+      : InitValue(std::move(Init)), IncrValue(std::move(Incr)),
+        ExitValue(std::move(Exit)), Compare(Compare), Add(Add) {}
+};
+
+/// Represents the integer values for a converted IV.
+struct IntegerIV {
+  int64_t InitValue;
+  int64_t IncrValue;
+  int64_t ExitValue;
+  CmpInst::Predicate NewPred;
+};
 
-  return isUIntN(APFloat::semanticsPrecision(InitValueFltSema),
-                 AbsoluteValue(IntVal));
+static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred) {
+  switch (FPPred) {
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ:
+    return CmpInst::ICMP_EQ;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE:
+    return CmpInst::ICMP_NE;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT:
+    return CmpInst::ICMP_SGT;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE:
+    return CmpInst::ICMP_SGE;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT:
+    return CmpInst::ICMP_SLT;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE:
+    return CmpInst::ICMP_SLE;
+  default:
+    return CmpInst::BAD_ICMP_PREDICATE;
+  }
 }
 
-/// If the loop has floating induction variable then insert corresponding
-/// integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-///   bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-///   bar((double)i);
-bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
+/// Analyze a PN to determine whether it represents a simple floating-point
+/// induction variable, with constant fp init, increment, and exit values.
+///
+/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
+static std::optional<FloatingPointIV>
+maybeFloatingPointRecurrence(Loop *L, PHINode *PN) {
+  // Identify incoming and backedge for the PN.
   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
-  unsigned BackEdge     = IncomingEdge^1;
+  unsigned BackEdge = IncomingEdge ^ 1;
 
   // Check incoming value.
   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
-  int64_t InitValue;
-  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue) ||
-      !isRepresentableAsExactInteger(InitValueVal, InitValue))
-    return false;
+  if (!InitValueVal)
+    return std::nullopt;
 
   // Check IV increment. Reject this PN if increment operation is not
   // an add or increment value can not be represented by an integer.
   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
-  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
+  if (!Incr || Incr->getOpcode() != Instruction::FAdd)
+    return std::nullopt;
 
   // If this is not an add of the PHI with a constantfp, or if the constant fp
   // is not an integer, bail out.
-  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
-  int64_t IncValue;
-  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
-      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
-    return false;
+  auto *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  if (!IncValueVal || Incr->getOperand(0) != PN)
+    return std::nullopt;
 
   // Check Incr uses. One user is PN and the other user is an exit condition
   // used by the conditional terminator.
-  Value::user_iterator IncrUse = Incr->user_begin();
-  Instruction *U1 = cast<Instruction>(*IncrUse++);
-  if (IncrUse == Incr->user_end()) return false;
-  Instruction *U2 = cast<Instruction>(*IncrUse++);
-  if (IncrUse != Incr->user_end()) return false;
+  // TODO: Should relax this, so as to allow any `fpext` that may occur.
+  if (!Incr->hasNUses(2))
+    return std::nullopt;
 
   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
   // only used by a branch, we can't transform it.
-  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
-  if (!Compare)
-    Compare = dyn_cast<FCmpInst>(U2);
-  if (!Compare || !Compare->hasOneUse() ||
-      !isa<BranchInst>(Compare->user_back()))
-    return false;
+  auto It = llvm::find_if(Incr->users(),
+                          [](const User *U) { return isa<FCmpInst>(U); });
+  if (It == Incr->users().end())
+    return std::nullopt;
 
-  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
+  FCmpInst *Compare = cast<FCmpInst>(*It);
+  if (!Compare->hasOneUse())
+    return std::nullopt;
 
   // We need to verify that the branch actually controls the iteration count
   // of the loop.  If not, the new IV can overflow and no one will notice.
   // The branch block must be in the loop and one of the successors must be out
   // of the loop.
-  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
-  if (!L->contains(TheBr->getParent()) ||
-      (L->contains(TheBr->getSuccessor(0)) &&
-       L->contains(TheBr->getSuccessor(1))))
-    return false;
+  auto *BI = dyn_cast<BranchInst>(Compare->user_back());
+  assert(BI->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(BI->getParent()) ||
+      (L->contains(BI->getSuccessor(0)) && L->contains(BI->getSuccessor(1))))
+    return std::nullopt;
 
   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
   // transform it.
-  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
-  int64_t ExitValue;
-  if (ExitValueVal == nullptr ||
-      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue) ||
-      !isRepresentableAsExactInteger(ExitValueVal, ExitValue))
-    return false;
+  auto *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  if (!ExitValueVal)
+    return std::nullopt;
 
-  // Find new predicate for integer comparison.
-  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
-  switch (Compare->getPredicate()) {
-  default: return false;  // Unknown comparison.
-  case CmpInst::FCMP_OEQ:
-  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
-  case CmpInst::FCMP_ONE:
-  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
-  case CmpInst::FCMP_OGT:
-  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
-  case CmpInst::FCMP_OGE:
-  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
-  case CmpInst::FCMP_OLT:
-  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
-  case CmpInst::FCMP_OLE:
-  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
-  }
+  return FloatingPointIV(InitValueVal->getValueAPF(),
+                         IncValueVal->getValueAPF(),
+                         ExitValueVal->getValueAPF(), Compare, Incr);
+}
+
+/// Ensure that the floating-point IV can be converted to a semantics-preserving
+/// signed 32-bit integer IV.
+///
+/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
+static std::optional<IntegerIV>
+tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
+  // Convert floating-point predicate to integer.
+  auto NewPred = getIntegerPredicate(FPIV.Compare->getPredicate());
+  if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
+    return std::nullopt;
+
+  // Convert APFloat values to signed integers.
+  int64_t InitValue, IncrValue, ExitValue;
+  if (!ConvertToSInt(FPIV.InitValue, InitValue) ||
+      !ConvertToSInt(FPIV.IncrValue, IncrValue) ||
+      !ConvertToSInt(FPIV.ExitValue, ExitValue))
+    return std::nullopt;
+
+  // Bail out if integers cannot be represented exactly.
+  if (!isRepresentableAsExactInteger(FPIV.InitValue, InitValue) ||
+      !isRepresentableAsExactInteger(FPIV.ExitValue, ExitValue))
+    return std::nullopt;
 
   // We convert the floating point induction variable to a signed i32 value if
-  // we can.  This is only safe if the comparison will not overflow in a way
-  // that won't be trapped by the integer equivalent operations.  Check for this
-  // now.
+  // we can. This is only safe if the comparison will not overflow in a way that
+  // won't be trapped by the integer equivalent operations. Check for this now.
   // TODO: We could use i64 if it is native and the range requires it.
 
   // The start/stride/exit values must all fit in signed i32.
-  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
-    return false;
+  if (!isInt<32>(InitValue) || !isInt<32>(IncrValue) || !isInt<32>(ExitValue))
+    return std::nullopt;
 
   // If not actually striding (add x, 0.0), avoid touching the code.
-  if (IncValue == 0)
-    return false;
+  if (IncrValue == 0)
+    return std::nullopt;
 
   // Positive and negative strides have different safety conditions.
-  if (IncValue > 0) {
+  if (IncrValue > 0) {
     // If we have a positive stride, we require the init to be less than the
     // exit value.
     if (InitValue >= ExitValue)
-      return false;
+      return std::nullopt;
 
-    uint32_t Range = uint32_t(ExitValue-InitValue);
+    uint32_t Range = uint32_t(ExitValue - InitValue);
     // Check for infinite loop, either:
     // while (i <= Exit) or until (i > Exit)
     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
-      if (++Range == 0) return false;  // Range overflows.
+      if (++Range == 0)
+        return std::nullopt; // Range overflows.
     }
 
-    unsigned Leftover = Range % uint32_t(IncValue);
+    unsigned Leftover = Range % uint32_t(IncrValue);
 
     // If this is an equality comparison, we require that the strided value
     // exactly land on the exit value, otherwise the IV condition will wrap
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
-      return false;
+      return std::nullopt;
 
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
-      return false;
+    if (Leftover != 0 && int32_t(ExitValue + IncrValue) < ExitValue)
+      return std::nullopt;
   } else {
     // If we have a negative stride, we require the init to be greater than the
     // exit value.
     if (InitValue <= ExitValue)
-      return false;
+      return std::nullopt;
 
-    uint32_t Range = uint32_t(InitValue-ExitValue);
+    uint32_t Range = uint32_t(InitValue - ExitValue);
     // Check for infinite loop, either:
     // while (i >= Exit) or until (i < Exit)
     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
-      if (++Range == 0) return false;  // Range overflows.
+      if (++Range == 0)
+        return std::nullopt; // Range overflows.
     }
 
-    unsigned Leftover = Range % uint32_t(-IncValue);
+    unsigned Leftover = Range % uint32_t(-IncrValue);
 
     // If this is an equality comparison, we require that the strided value
     // exactly land on the exit value, otherwise the IV condition will wrap
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
-      return false;
+      return std::nullopt;
 
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
-      return false;
+    if (Leftover != 0 && int32_t(ExitValue + IncrValue) > ExitValue)
+      return std::nullopt;
   }
 
+  return IntegerIV{InitValue, IncrValue, ExitValue, NewPred};
+}
+
+/// Rewrite the floating-point IV as an integer IV.
+static void canonicalizeToIntegerIV(Loop *L, PHINode *PN,
+                                    const FloatingPointIV &FPIV,
+                                    const IntegerIV &IIV,
+                                    const TargetLibraryInfo *TLI,
+                                    std::unique_ptr<MemorySSAUpdater> &MSSAU) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge = IncomingEdge ^ 1;
+
   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+  auto *Incr = cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  auto *BI = cast<BranchInst>(FPIV.Compare->user_back());
+  assert(Incr && BI);
+
+  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
+                    << "   Init: " << IIV.InitValue << "\n"
+                    << "   Incr: " << IIV.IncrValue << "\n"
+                    << "   Exit: " << IIV.ExitValue << "\n"
+                    << "   Pred: " << CmpInst::getPredicateName(IIV.NewPred)
+                    << "\n"
+                    << "  Original PN: " << *PN << "\n");
 
   // Insert new integer induction variable.
   PHINode *NewPHI =
       PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
-  NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
+  NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, IIV.InitValue),
                       PN->getIncomingBlock(IncomingEdge));
   NewPHI->setDebugLoc(PN->getDebugLoc());
 
   Instruction *NewAdd = BinaryOperator::CreateAdd(
-      NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
+      NewPHI, ConstantInt::getSigned(Int32Ty, IIV.IncrValue),
       Incr->getName() + ".int", Incr->getIterator());
   NewAdd->setDebugLoc(Incr->getDebugLoc());
   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 
   ICmpInst *NewCompare = new ICmpInst(
-      TheBr->getIterator(), NewPred, NewAdd,
-      ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
-  NewCompare->setDebugLoc(Compare->getDebugLoc());
+      BI->getIterator(), IIV.NewPred, NewAdd,
+      ConstantInt::getSigned(Int32Ty, IIV.ExitValue), FPIV.Compare->getName());
+  NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
 
   // In the following deletions, PN may become dead and may be deleted.
   // Use a WeakTrackingVH to observe whether this happens.
@@ -394,9 +464,9 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 
   // Delete the old floating point exit comparison.  The branch starts using the
   // new comparison.
-  NewCompare->takeName(Compare);
-  Compare->replaceAllUsesWith(NewCompare);
-  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
+  NewCompare->takeName(FPIV.Compare);
+  FPIV.Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(FPIV.Compare, TLI, MSSAU.get());
 
   // Delete the old floating point increment.
   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
@@ -416,6 +486,28 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
     PN->replaceAllUsesWith(Conv);
     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
   }
+}
+
+/// If the loop has a floating induction variable, then insert corresponding
+/// integer induction variable if possible. For example, the following:
+/// for(double i = 0; i < 10000; ++i)
+///   bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+///   bar((double)i);
+bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
+  // See if the PN matches a floating-point IV pattern.
+  auto FPIV = maybeFloatingPointRecurrence(L, PN);
+  if (!FPIV)
+    return false;
+
+  // Can we safely convert the floating-point values to integer ones?
+  auto IIV = tryConvertToIntegerIV(*FPIV);
+  if (!IIV)
+    return false;
+
+  // Perform the rewriting.
+  canonicalizeToIntegerIV(L, PN, *FPIV, *IIV, TLI, MSSAU);
   return true;
 }
 

>From c1b5f2b07b0039db04090bfa73f7716d4c289489 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Wed, 26 Nov 2025 19:15:58 +0100
Subject: [PATCH 2/3] [IndVarSimplify] Precommit tests (NFC)

---
 .../IndVarSimplify/floating-point-iv.ll       | 211 ++++++++++++++++++
 1 file changed, 211 insertions(+)

diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
index c4933678d0391..4cf98a8c250ea 100644
--- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -553,4 +553,215 @@ exit:
   ret void
 }
 
+; Evaluating loop `for (float i = 0; i < 499; i += .49999f) ++rv;`
+; Trip count: 999.
+define i32 @test_fp_simulate_tc_rounded_fadd() {
+; CHECK-LABEL: @test_fp_simulate_tc_rounded_fadd(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RV_NEXT]] = add i32 [[RV]], 1
+; CHECK-NEXT:    [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[NEXT]], 4.990000e+02
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi float [ 0.0, %entry ], [ %next, %loop ]
+  %rv = phi i32 [ 0, %entry ], [ %rv.next, %loop ]
+  %rv.next = add i32 %rv, 1
+  %next = fadd float %i, 0x3FDFFFD600000000
+  %cmp = fcmp olt float %next, 499.0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %phi = phi i32 [ %rv.next, %loop ]
+  ret i32 %phi
+}
+
+; Same as above, using double, addition will not get rounded.
+define i32 @test_fp_simulate_tc_exact_fadd_via_double() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_via_double(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RV_NEXT]] = add i32 [[RV]], 1
+; CHECK-NEXT:    [[NEXT]] = fadd double [[I]], 0x3FDFFFD600000000
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[NEXT]], 4.990000e+02
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi double [ 0.0, %entry ], [ %next, %loop ]
+  %rv = phi i32 [ 0, %entry ], [ %rv.next, %loop ]
+  %rv.next = add i32 %rv, 1
+  %next = fadd double %i, 0x3FDFFFD600000000
+  %cmp = fcmp olt double %next, 499.0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %phi = phi i32 [ %rv.next, %loop ]
+  ret i32 %phi
+}
+
+; As above, but %exit branch taken on true.
+; Trip count: 0, exit count: 1, condition always satisfied.
+; While SCEV already infers this, make sure we continue handling this
+; properly when simulating the loop too.
+define i32 @test_fp_simulate_tc_rounded_fadd_inverted_exit() {
+; CHECK-LABEL: @test_fp_simulate_tc_rounded_fadd_inverted_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    call void @opaque()
+; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi float [ 0.0, %entry ], [ %next, %loop ]
+  call void @opaque()
+  %next = fadd float %i, 0x3FDFFFD600000000
+  %cmp = fcmp une float %next, 499.0
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret i32 0
+}
+
+; As above, inverted condition, %exit branch taken on true.
+; Trip count: 999.
+define i32 @test_fp_simulate_tc_rounded_fadd_inverted_pred_exit() {
+; CHECK-LABEL: @test_fp_simulate_tc_rounded_fadd_inverted_pred_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RV_NEXT]] = add i32 [[RV]], 1
+; CHECK-NEXT:    [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oge float [[NEXT]], 4.990000e+02
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi float [ 0.0, %entry ], [ %next, %loop ]
+  %rv = phi i32 [ 0, %entry ], [ %rv.next, %loop ]
+  %rv.next = add i32 %rv, 1
+  %next = fadd float %i, 0x3FDFFFD600000000
+  %cmp = fcmp oge float %next, 499.0
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %phi = phi i32 [ %rv.next, %loop ]
+  ret i32 %phi
+}
+
+; Evaluating loop `for (float i = 0; i < 499; i += 124.75f) opaque();`
+; Trip count: 4.
+define i32 @test_fp_simulate_tc_exact_fadd_pred_ult() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_pred_ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @opaque()
+; CHECK-NEXT:    [[ADD]] = fadd float [[I]], 1.255000e+02
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult float [[ADD]], 4.990000e+02
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
+  call void @opaque()
+  %add = fadd float %i, 0x405F600000000000
+  %cmp = fcmp ult float %add, 4.990000e+02
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+; Evaluating loop `for (float i = 0; i == 499; i += 124.75f) opaque();`
+; Trip count: 0, exit count: 1, condition never satisfied.
+; While SCEV already infers this, make sure we continue handling this
+; properly when simulating the loop too.
+define i32 @test_fp_simulate_tc_exact_fadd_pred_ueq() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_pred_ueq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    call void @opaque()
+; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
+  call void @opaque()
+  %add = fadd float %i, 0x405F600000000000
+  %cmp = fcmp ueq float %add, 4.990000e+02
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+; Negative test, fast-math flags are present.
+define i32 @test_fp_simulate_tc_exact_fadd_fcmp_flags() {
+; CHECK-LABEL: @test_fp_simulate_tc_exact_fadd_fcmp_flags(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @opaque()
+; CHECK-NEXT:    [[ADD]] = fadd reassoc float [[I]], 1.255000e+02
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[ADD]], 4.990000e+02
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
+  call void @opaque()
+  %add = fadd reassoc float %i, 0x405F600000000000
+  %cmp = fcmp fast olt float %add, 4.990000e+02
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
 declare void @opaque()

>From 406b975c2653abed8e0247ec9c10cc9fe46080ac Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Wed, 26 Nov 2025 19:16:45 +0100
Subject: [PATCH 3/3] [IndVarSimplify] Introduce `simulateFPIVTripCount` to
 canonicalize fp loops

Add support to brute-force floating-point IVs, by actually evaluating
the loop (up to cut-off), in order to compute its integer trip count.
This should be desirable when any of the value in the recurrence cannot
be represented as an exact integer (e.g., with fractional increments);
in an attempt to further canonicalize loops using floating-point IVs,
and bring such loops in a more amenable form to SCEV users.

Proofs: https://alive2.llvm.org/ce/z/PeLqcb, https://alive2.llvm.org/ce/z/Pxpzm3.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 97 ++++++++++++++++++-
 .../IndVarSimplify/floating-point-iv.ll       | 36 ++-----
 2 files changed, 104 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 4b5896b5870b8..eac72d6ec5318 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -126,6 +126,10 @@ static cl::opt<bool>
 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
                 cl::desc("Allow widening of indvars to eliminate s/zext"));
 
+static cl::opt<unsigned> MaxTripCountIterations(
+    "indvars-fp-tripcount-max-iters", cl::Hidden, cl::init(1000),
+    cl::desc("Max number of iterations to brute force integer trip count"));
+
 namespace {
 
 class IndVarSimplify {
@@ -418,6 +422,91 @@ tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
   return IntegerIV{InitValue, IncrValue, ExitValue, NewPred};
 }
 
+/// Simulate floating-point loop execution to determine exact trip count.
+///
+/// Returns an IntegerIV representation if successful, std::nullopt otherwise.
+static std::optional<IntegerIV>
+simulateFPIVTripCount(Loop *L, const FloatingPointIV &FPIV) {
+  auto EvaluateFCmpPred = [](FCmpInst::Predicate Pred,
+                             APFloat::cmpResult CmpRes) -> std::optional<bool> {
+    switch (Pred) {
+    case FCmpInst::FCMP_OLT:
+    case FCmpInst::FCMP_ULT:
+      return CmpRes == APFloat::cmpLessThan;
+    case FCmpInst::FCMP_OLE:
+    case FCmpInst::FCMP_ULE:
+      return CmpRes == APFloat::cmpLessThan || CmpRes == APFloat::cmpEqual;
+    case FCmpInst::FCMP_OGT:
+    case FCmpInst::FCMP_UGT:
+      return CmpRes == APFloat::cmpGreaterThan;
+    case FCmpInst::FCMP_OGE:
+    case FCmpInst::FCMP_UGE:
+      return CmpRes == APFloat::cmpGreaterThan || CmpRes == APFloat::cmpEqual;
+    case FCmpInst::FCMP_OEQ:
+    case FCmpInst::FCMP_UEQ:
+      return CmpRes == APFloat::cmpEqual;
+    case FCmpInst::FCMP_ONE:
+    case FCmpInst::FCMP_UNE:
+      return CmpRes != APFloat::cmpEqual;
+    default:
+      return std::nullopt;
+    }
+  };
+
+  // Conservatively bail out if fast-math flags are present.
+  // TODO: Should possibly reject only specific flags.
+  if (FPIV.Add->getFastMathFlags().any() ||
+      FPIV.Compare->getFastMathFlags().any())
+    return std::nullopt;
+
+  APFloat Current = FPIV.InitValue;
+  const APFloat &Increment = FPIV.IncrValue;
+  const APFloat &Limit = FPIV.ExitValue;
+  // Do not continue if handling non-finite values or zero increment.
+  if (!Current.isFinite() || !Increment.isFinite() || !Limit.isFinite() ||
+      Increment.isZero())
+    return std::nullopt;
+
+  FCmpInst::Predicate FPred = FPIV.Compare->getPredicate();
+  auto *BI = cast<BranchInst>(FPIV.Compare->user_back());
+  bool ExitOnTrue = !L->contains(BI->getSuccessor(0));
+  if (ExitOnTrue)
+    FPred = FPIV.Compare->getInversePredicate();
+
+  int64_t TripCount = 0;
+  for (; TripCount < MaxTripCountIterations; ++TripCount) {
+    auto Res = EvaluateFCmpPred(FPred, Current.compare(Limit));
+    if (!Res)
+      return std::nullopt;
+
+    // If comparison turns out to be false, we found an exact trip count.
+    if (!*Res)
+      break;
+
+    APFloat Next = Current;
+    APFloat::opStatus Status =
+        Next.add(Increment, APFloat::rmNearestTiesToEven);
+    if (!Next.isFinite() ||
+        (Status != APFloat::opOK && Status != APFloat::opInexact))
+      return std::nullopt;
+
+    Current = std::move(Next);
+  }
+
+  if (TripCount == MaxTripCountIterations)
+    return std::nullopt;
+
+  LLVM_DEBUG(dbgs() << "INDVARS: Simulated FP loop, found trip count: "
+                    << TripCount << "\n");
+
+  // Stride always fixed to 1, the trip count is our exit value. If the loop
+  // exits immediately (i.e., trip count zero), the loop body still executes
+  // once, as per the PN recurrence we handle. Account for the inversion as
+  // well, the new integer predicate depends on the exit branch.
+  return IntegerIV{0, 1, TripCount ? TripCount : 1,
+                   ExitOnTrue ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT};
+}
+
 /// Rewrite the floating-point IV as an integer IV.
 static void canonicalizeToIntegerIV(Loop *L, PHINode *PN,
                                     const FloatingPointIV &FPIV,
@@ -503,8 +592,12 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 
   // Can we safely convert the floating-point values to integer ones?
   auto IIV = tryConvertToIntegerIV(*FPIV);
-  if (!IIV)
-    return false;
+  if (!IIV) {
+    // As a last try, brute force the integer trip count by running the loop.
+    IIV = simulateFPIVTripCount(L, *FPIV);
+    if (!IIV)
+      return false;
+  }
 
   // Perform the rewriting.
   canonicalizeToIntegerIV(L, PN, *FPIV, *IIV, TLI, MSSAU);
diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
index 4cf98a8c250ea..e4e7e7756a62f 100644
--- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -560,15 +560,9 @@ define i32 @test_fp_simulate_tc_rounded_fadd() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[RV_NEXT]] = add i32 [[RV]], 1
-; CHECK-NEXT:    [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[NEXT]], 4.990000e+02
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
-; CHECK-NEXT:    ret i32 [[PHI]]
+; CHECK-NEXT:    ret i32 999
 ;
 entry:
   br label %loop
@@ -592,15 +586,9 @@ define i32 @test_fp_simulate_tc_exact_fadd_via_double() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[RV_NEXT]] = add i32 [[RV]], 1
-; CHECK-NEXT:    [[NEXT]] = fadd double [[I]], 0x3FDFFFD600000000
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[NEXT]], 4.990000e+02
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
-; CHECK-NEXT:    ret i32 [[PHI]]
+; CHECK-NEXT:    ret i32 999
 ;
 entry:
   br label %loop
@@ -653,15 +641,9 @@ define i32 @test_fp_simulate_tc_rounded_fadd_inverted_pred_exit() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[RV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[RV_NEXT]] = add i32 [[RV]], 1
-; CHECK-NEXT:    [[NEXT]] = fadd float [[I]], 0x3FDFFFD600000000
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp oge float [[NEXT]], 4.990000e+02
-; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[RV_NEXT]], [[LOOP]] ]
-; CHECK-NEXT:    ret i32 [[PHI]]
+; CHECK-NEXT:    ret i32 999
 ;
 entry:
   br label %loop
@@ -686,10 +668,10 @@ define i32 @test_fp_simulate_tc_exact_fadd_pred_ult() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_INT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_INT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    call void @opaque()
-; CHECK-NEXT:    [[ADD]] = fadd float [[I]], 1.255000e+02
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult float [[ADD]], 4.990000e+02
+; CHECK-NEXT:    [[ADD_INT]] = add nuw nsw i32 [[I_INT]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ult i32 [[ADD_INT]], 4
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0