[llvm] [SCEV] Rewrite more SCEVAddExpr when applying guards. (PR #159942)

Sat Oct 11 14:26:01 PDT 2025

https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/159942

>From 956297ac49519d154b277362a44c55011871c76d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 11 Oct 2025 21:29:36 +0100
Subject: [PATCH 1/4] [SCEV] Improve handling of divisibility information from
 loop guards.

At the moment, the effectivness of guards that contain divisibility
information (A % B == 0 ) depends on the order of the conditions.

This patch makes using divisibility information independent of the
order, by collecting and applying the divisibility information
separately.

We first collect all conditions in a vector, then collect the
divisibility information from all guards.

When processing other guards, we apply divisibility info collected
earlier.

After all guards have been processed, we add the divisibility info,
rewriting the existing rewrite. This ensures we apply the divisibility
info to the largest rewrite expression.

This helps to improve results in a few cases, one in
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2921 and another one
in a different large C/C++ based IR corpus.
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  |   8 +-
 llvm/lib/Analysis/ScalarEvolution.cpp         | 290 ++++++++++--------
 .../IndVarSimplify/loop-guard-order.ll        |   5 +-
 3 files changed, 171 insertions(+), 132 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 8876e4ed6ae4f..76800d43828c2 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -763,6 +763,10 @@ class ScalarEvolution {
   getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops,
                              bool Sequential = false);
 
+  /// Try to match the pattern generated by getURemExpr(A, B). If successful,
+  /// Assign A and B to LHS and RHS, respectively.
+  LLVM_ABI bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
+
   /// Transitively follow the chain of pointer-type operands until reaching a
   /// SCEV that does not have a single pointer operand. This returns a
   /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner
@@ -2316,10 +2320,6 @@ class ScalarEvolution {
   /// an add rec on said loop.
   void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed);
 
-  /// Try to match the pattern generated by getURemExpr(A, B). If successful,
-  /// Assign A and B to LHS and RHS, respectively.
-  LLVM_ABI bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
-
   /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
   /// `UniqueSCEVs`.  Return if found, else nullptr.
   SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 30bcff7c14923..656ab8d0bdb1a 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15557,6 +15557,123 @@ void ScalarEvolution::LoopGuards::collectFromPHI(
   }
 }
 
+// Checks whether Expr is a non-negative constant, and Divisor is a positive
+// constant, and returns their APInt in ExprVal and in DivisorVal.
+static bool getNonNegExprAndPosDivisor(const SCEV *Expr, const SCEV *Divisor,
+                                       APInt &ExprVal, APInt &DivisorVal) {
+  auto *ConstExpr = dyn_cast<SCEVConstant>(Expr);
+  auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor);
+  if (!ConstExpr || !ConstDivisor)
+    return false;
+  ExprVal = ConstExpr->getAPInt();
+  DivisorVal = ConstDivisor->getAPInt();
+  return ExprVal.isNonNegative() && !DivisorVal.isNonPositive();
+}
+
+// Return a new SCEV that modifies \p Expr to the closest number divisible by
+// \p Divisor and less than or equal to Expr.
+// For now, only handle constant Expr and Divisor.
+static const SCEV *getPreviousSCEVDivisibleByDivisor(const SCEV *Expr,
+                                                     const SCEV *Divisor,
+                                                     ScalarEvolution &SE) {
+  APInt ExprVal;
+  APInt DivisorVal;
+  if (!getNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
+    return Expr;
+  APInt Rem = ExprVal.urem(DivisorVal);
+  // return the SCEV: Expr - Expr % Divisor
+  return SE.getConstant(ExprVal - Rem);
+}
+
+// Return a new SCEV that modifies \p Expr to the closest number divisible by
+// \p Divisor and greater than or equal to Expr.
+// For now, only handle constant Expr and Divisor.
+static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr,
+                                                 const SCEV *Divisor,
+                                                 ScalarEvolution &SE) {
+  APInt ExprVal;
+  APInt DivisorVal;
+  if (!getNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
+    return Expr;
+  APInt Rem = ExprVal.urem(DivisorVal);
+  if (!Rem.isZero())
+    // return the SCEV: Expr + Divisor - Expr % Divisor
+    return SE.getConstant(ExprVal + DivisorVal - Rem);
+  return Expr;
+}
+
+static bool collectDivisibilityInformation(
+    ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS,
+    DenseMap<const SCEV *, const SCEV *> &DivInfo,
+    DenseMap<const SCEV *, const SCEV *> &Multiples, ScalarEvolution &SE) {
+  // If we have LHS == 0, check if LHS is computing a property of some unknown
+  // SCEV %v which we can rewrite %v to express explicitly.
+  if (Predicate != CmpInst::ICMP_EQ || !match(RHS, m_scev_Zero()))
+    return false;
+  // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
+  // explicitly express that.
+  const SCEV *URemLHS = nullptr;
+  const SCEV *URemRHS = nullptr;
+  if (!SE.matchURem(LHS, URemLHS, URemRHS))
+    return false;
+  if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
+    const auto *Multiple = SE.getMulExpr(SE.getUDivExpr(LHS, URemRHS), URemRHS);
+    DivInfo[LHSUnknown] = Multiple;
+    Multiples[LHSUnknown] = URemRHS;
+    return true;
+  }
+  return false;
+}
+
+// Check if the condition is a divisibility guard (A % B == 0).
+static bool isDivisibilityGuard(const SCEV *LHS, const SCEV *RHS,
+                                ScalarEvolution &SE) {
+  const SCEV *X, *Y;
+  return SE.matchURem(LHS, X, Y) && RHS->isZero();
+}
+
+// Apply divisibility by \p Divisor on MinMaxExpr with constant values,
+// recursively. This is done by aligning up/down the constant value to the
+// Divisor.
+static const SCEV *applyDivisibilityOnMinMaxExpr(const SCEV *MinMaxExpr,
+                                                 const SCEV *Divisor,
+                                                 ScalarEvolution &SE) {
+  // Return true if \p Expr is a MinMax SCEV expression with a non-negative
+  // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS
+  // the non-constant operand and in \p LHS the constant operand.
+  auto IsMinMaxSCEVWithNonNegativeConstant =
+      [](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
+         const SCEV *&RHS) {
+        if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
+          if (MinMax->getNumOperands() != 2)
+            return false;
+          if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) {
+            if (C->getAPInt().isNegative())
+              return false;
+            SCTy = MinMax->getSCEVType();
+            LHS = MinMax->getOperand(0);
+            RHS = MinMax->getOperand(1);
+            return true;
+          }
+        }
+        return false;
+      };
+
+  const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr;
+  SCEVTypes SCTy;
+  if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS,
+                                           MinMaxRHS))
+    return MinMaxExpr;
+  auto IsMin = isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr);
+  assert(SE.isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!");
+  auto *DivisibleExpr =
+      IsMin ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE)
+            : getNextSCEVDivisibleByDivisor(MinMaxLHS, Divisor, SE);
+  SmallVector<const SCEV *> Ops = {
+      applyDivisibilityOnMinMaxExpr(MinMaxRHS, Divisor, SE), DivisibleExpr};
+  return SE.getMinMaxExpr(SCTy, Ops);
+}
+
 void ScalarEvolution::LoopGuards::collectFromBlock(
     ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
     const BasicBlock *Block, const BasicBlock *Pred,
@@ -15567,19 +15684,14 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
   SmallVector<const SCEV *> ExprsToRewrite;
   auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
                               const SCEV *RHS,
-                              DenseMap<const SCEV *, const SCEV *>
-                                  &RewriteMap) {
+                              DenseMap<const SCEV *, const SCEV *> &RewriteMap,
+                              const DenseMap<const SCEV *, const SCEV *>
+                                  &DivInfo) {
     // WARNING: It is generally unsound to apply any wrap flags to the proposed
     // replacement SCEV which isn't directly implied by the structure of that
     // SCEV.  In particular, using contextual facts to imply flags is *NOT*
     // legal.  See the scoping rules for flags in the header to understand why.
 
-    // If LHS is a constant, apply information to the other expression.
-    if (isa<SCEVConstant>(LHS)) {
-      std::swap(LHS, RHS);
-      Predicate = CmpInst::getSwappedPredicate(Predicate);
-    }
-
     // Check for a condition of the form (-C1 + X < C2).  InstCombine will
     // create this form when combining two checks of the form (X u< C2 + C1) and
     // (X >=u C1).
@@ -15612,115 +15724,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
     if (MatchRangeCheckIdiom())
       return;
 
-    // Return true if \p Expr is a MinMax SCEV expression with a non-negative
-    // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS
-    // the non-constant operand and in \p LHS the constant operand.
-    auto IsMinMaxSCEVWithNonNegativeConstant =
-        [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
-            const SCEV *&RHS) {
-          if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
-            if (MinMax->getNumOperands() != 2)
-              return false;
-            if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) {
-              if (C->getAPInt().isNegative())
-                return false;
-              SCTy = MinMax->getSCEVType();
-              LHS = MinMax->getOperand(0);
-              RHS = MinMax->getOperand(1);
-              return true;
-            }
-          }
-          return false;
-        };
-
-    // Checks whether Expr is a non-negative constant, and Divisor is a positive
-    // constant, and returns their APInt in ExprVal and in DivisorVal.
-    auto GetNonNegExprAndPosDivisor = [&](const SCEV *Expr, const SCEV *Divisor,
-                                          APInt &ExprVal, APInt &DivisorVal) {
-      auto *ConstExpr = dyn_cast<SCEVConstant>(Expr);
-      auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor);
-      if (!ConstExpr || !ConstDivisor)
-        return false;
-      ExprVal = ConstExpr->getAPInt();
-      DivisorVal = ConstDivisor->getAPInt();
-      return ExprVal.isNonNegative() && !DivisorVal.isNonPositive();
-    };
-
-    // Return a new SCEV that modifies \p Expr to the closest number divides by
-    // \p Divisor and greater or equal than Expr.
-    // For now, only handle constant Expr and Divisor.
-    auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr,
-                                           const SCEV *Divisor) {
-      APInt ExprVal;
-      APInt DivisorVal;
-      if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
-        return Expr;
-      APInt Rem = ExprVal.urem(DivisorVal);
-      if (!Rem.isZero())
-        // return the SCEV: Expr + Divisor - Expr % Divisor
-        return SE.getConstant(ExprVal + DivisorVal - Rem);
-      return Expr;
-    };
-
-    // Return a new SCEV that modifies \p Expr to the closest number divides by
-    // \p Divisor and less or equal than Expr.
-    // For now, only handle constant Expr and Divisor.
-    auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr,
-                                               const SCEV *Divisor) {
-      APInt ExprVal;
-      APInt DivisorVal;
-      if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
-        return Expr;
-      APInt Rem = ExprVal.urem(DivisorVal);
-      // return the SCEV: Expr - Expr % Divisor
-      return SE.getConstant(ExprVal - Rem);
-    };
-
-    // Apply divisibilty by \p Divisor on MinMaxExpr with constant values,
-    // recursively. This is done by aligning up/down the constant value to the
-    // Divisor.
-    std::function<const SCEV *(const SCEV *, const SCEV *)>
-        ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr,
-                                           const SCEV *Divisor) {
-          const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr;
-          SCEVTypes SCTy;
-          if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS,
-                                                   MinMaxRHS))
-            return MinMaxExpr;
-          auto IsMin =
-              isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr);
-          assert(SE.isKnownNonNegative(MinMaxLHS) &&
-                 "Expected non-negative operand!");
-          auto *DivisibleExpr =
-              IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor)
-                    : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor);
-          SmallVector<const SCEV *> Ops = {
-              ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
-          return SE.getMinMaxExpr(SCTy, Ops);
-        };
-
-    // If we have LHS == 0, check if LHS is computing a property of some unknown
-    // SCEV %v which we can rewrite %v to express explicitly.
-    if (Predicate == CmpInst::ICMP_EQ && match(RHS, m_scev_Zero())) {
-      // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
-      // explicitly express that.
-      const SCEV *URemLHS = nullptr;
-      const SCEV *URemRHS = nullptr;
-      if (SE.matchURem(LHS, URemLHS, URemRHS)) {
-        if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
-          auto I = RewriteMap.find(LHSUnknown);
-          const SCEV *RewrittenLHS =
-              I != RewriteMap.end() ? I->second : LHSUnknown;
-          RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS);
-          const auto *Multiple =
-              SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS);
-          RewriteMap[LHSUnknown] = Multiple;
-          ExprsToRewrite.push_back(LHSUnknown);
-          return;
-        }
-      }
-    }
-
     // Do not apply information for constants or if RHS contains an AddRec.
     if (isa<SCEVConstant>(LHS) || SE.containsAddRecurrence(RHS))
       return;
@@ -15751,7 +15754,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
 
     const SCEV *RewrittenLHS = GetMaybeRewritten(LHS);
     const SCEV *DividesBy = nullptr;
-    const APInt &Multiple = SE.getConstantMultiple(RewrittenLHS);
+    // Apply divisibility information when computing the constant multiple.
+    LoopGuards DivGuards(SE);
+    DivGuards.RewriteMap = DivInfo;
+    const APInt &Multiple =
+        SE.getConstantMultiple(DivGuards.rewrite(RewrittenLHS));
     if (!Multiple.isOne())
       DividesBy = SE.getConstant(Multiple);
 
@@ -15775,21 +15782,23 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
         [[fallthrough]];
       case CmpInst::ICMP_SLT: {
         RHS = SE.getMinusSCEV(RHS, One);
-        RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+        RHS = DividesBy ? getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE)
+                        : RHS;
         break;
       }
       case CmpInst::ICMP_UGT:
       case CmpInst::ICMP_SGT:
         RHS = SE.getAddExpr(RHS, One);
-        RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+        RHS = DividesBy ? getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE) : RHS;
         break;
       case CmpInst::ICMP_ULE:
       case CmpInst::ICMP_SLE:
-        RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+        RHS = DividesBy ? getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE)
+                        : RHS;
         break;
       case CmpInst::ICMP_UGE:
       case CmpInst::ICMP_SGE:
-        RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+        RHS = DividesBy ? getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE) : RHS;
         break;
       default:
         break;
@@ -15843,7 +15852,8 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
       case CmpInst::ICMP_NE:
         if (match(RHS, m_scev_Zero())) {
           const SCEV *OneAlignedUp =
-              DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One;
+              DividesBy ? getNextSCEVDivisibleByDivisor(One, DividesBy, SE)
+                        : One;
           To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
         }
         break;
@@ -15916,8 +15926,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
 
   // Now apply the information from the collected conditions to
   // Guards.RewriteMap. Conditions are processed in reverse order, so the
-  // earliest conditions is processed first. This ensures the SCEVs with the
+  // earliest conditions is processed first, except guards with divisibility
+  // information, which are moved to the back. This ensures the SCEVs with the
   // shortest dependency chains are constructed first.
+  SmallVector<std::tuple<CmpInst::Predicate, const SCEV *, const SCEV *>>
+      GuardsToProcess;
   for (auto [Term, EnterIfTrue] : reverse(Terms)) {
     SmallVector<Value *, 8> Worklist;
     SmallPtrSet<Value *, 8> Visited;
@@ -15932,7 +15945,12 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
             EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
         const auto *LHS = SE.getSCEV(Cmp->getOperand(0));
         const auto *RHS = SE.getSCEV(Cmp->getOperand(1));
-        CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap);
+        // If LHS is a constant, apply information to the other expression.
+        if (isa<SCEVConstant>(LHS)) {
+          std::swap(LHS, RHS);
+          Predicate = CmpInst::getSwappedPredicate(Predicate);
+        }
+        GuardsToProcess.emplace_back(Predicate, LHS, RHS);
         continue;
       }
 
@@ -15945,6 +15963,28 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
     }
   }
 
+  // Process divisibility guards in reverse order to populate DivInfo early.
+  DenseMap<const SCEV *, const SCEV *> Multiples;
+  DenseMap<const SCEV *, const SCEV *> DivInfo;
+  for (const auto &[Predicate, LHS, RHS] : GuardsToProcess) {
+    if (!isDivisibilityGuard(LHS, RHS, SE))
+      continue;
+    collectDivisibilityInformation(Predicate, LHS, RHS, DivInfo, Multiples, SE);
+  }
+
+  for (const auto &[Predicate, LHS, RHS] : GuardsToProcess)
+    CollectCondition(Predicate, LHS, RHS, Guards.RewriteMap, DivInfo);
+
+  // Apply divisibility information last. This ensures it is applied to the
+  // outermost expression after other rewrites for the given value.
+  for (const auto &[K, V] : Multiples) {
+    Guards.RewriteMap[K] = SE.getMulExpr(
+        SE.getUDivExpr(applyDivisibilityOnMinMaxExpr(Guards.rewrite(K), V, SE),
+                       V),
+        V);
+    ExprsToRewrite.push_back(K);
+  }
+
   // Let the rewriter preserve NUW/NSW flags if the unsigned/signed ranges of
   // the replacement expressions are contained in the ranges of the replaced
   // expressions.
diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll
index 14ee00d77197c..2763860e79875 100644
--- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll
+++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll
@@ -114,7 +114,7 @@ define i32 @urem_order1(i32 %n) {
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    call void @foo()
-; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 3
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 3
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
@@ -205,13 +205,12 @@ define i64 @test_loop_with_div_order_1(i64 %n) {
 ; CHECK-NEXT:    [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0
 ; CHECK-NEXT:    br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
 ; CHECK:       [[LOOP_PREHEADER]]:
-; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[UPPER_BOUND]], i64 1)
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[DUMMY:%.*]] = load volatile i64, ptr null, align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UPPER_BOUND]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[EXIT]]

>From 040a73a201619e4f9374cac49159347054f702d1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 19 Sep 2025 10:22:23 +0100
Subject: [PATCH 2/4] [SCEV] Rewrite more SCEVAddExpr when applying guards.

When re-writing SCEVAddExprs to apply information from guards, check if
we have information for the expression itself. If so, apply it.

When we have an expression of the form (Const + A),  check if we have
have guard info for (Const + 1 + A) and use it. This is needed to avoid
regressions in a few cases, where we have BTCs with a subtracted
constant.

Rewriting expressions could cause regressions, e.g. when comparing 2
SCEV expressions where we are only able to rewrite one side, but I could
not find any cases where this happens more with this patch in practice.

Depends on https://github.com/llvm/llvm-project/pull/160012 (included in
PR)

Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 34 ++++++++++++++-----
 ...ge-taken-count-guard-info-apply-to-adds.ll |  6 ++--
 .../ScalarEvolution/trip-count-minmax.ll      | 12 +++----
 .../IndVarSimplify/canonicalize-cmp.ll        |  6 ++--
 .../Transforms/LoopIdiom/add-nsw-zext-fold.ll |  6 ++--
 .../runtime-unroll-assume-no-remainder.ll     | 30 ++++++++++++++--
 .../dont-fold-tail-for-divisible-TC.ll        |  2 +-
 .../runtime-checks-difference.ll              |  7 +---
 8 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 656ab8d0bdb1a..4add7de9eb2f2 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -16078,16 +16078,32 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
     }
 
     const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-      // Trip count expressions sometimes consist of adding 3 operands, i.e.
-      // (Const + A + B). There may be guard info for A + B, and if so, apply
-      // it.
-      // TODO: Could more generally apply guards to Add sub-expressions.
-      if (isa<SCEVConstant>(Expr->getOperand(0)) &&
-          Expr->getNumOperands() == 3) {
-        if (const SCEV *S = Map.lookup(
-                SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
-          return SE.getAddExpr(Expr->getOperand(0), S);
+      if (const SCEV *S = Map.lookup(Expr))
+        return S;
+      if (isa<SCEVConstant>(Expr->getOperand(0))) {
+        // Trip count expressions sometimes consist of adding 3 operands, i.e.
+        // (Const + A + B). There may be guard info for A + B, and if so, apply
+        // it.
+        // TODO: Could more generally apply guards to Add sub-expressions.
+        if (Expr->getNumOperands() == 3) {
+          if (const SCEV *S = Map.lookup(
+                  SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+            return SE.getAddExpr(Expr->getOperand(0), S);
+        }
+
+        // For expressions of the form (Const + A), check if we have guard info
+        // for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
+        // sure we don't loose information when rewriting expressions based on
+        // back-edge taken counts in some cases..
+        if (Expr->getNumOperands() == 2) {
+          auto *NewC =
+              SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
+          if (const SCEV *S =
+                  Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
+            return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
+        }
       }
+
       SmallVector<const SCEV *, 2> Operands;
       bool Changed = false;
       for (const auto *Op : Expr->operands()) {
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
index 6b2c78cebc44a..5ea836d3b8067 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
@@ -33,9 +33,9 @@ declare void @clobber()
 define void @test_add_sub_1_guard(ptr %src, i32 %n) {
 ; CHECK-LABEL: 'test_add_sub_1_guard'
 ; CHECK-NEXT:  Determining loop execution counts for: @test_add_sub_1_guard
-; CHECK-NEXT:  Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
-; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4294967295
-; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
+; CHECK-NEXT:  Loop %loop: backedge-taken count is i64 0
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 0
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is i64 0
 ; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index d38010403dad7..2f0627b7d4476 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -102,12 +102,12 @@ define void @umax(i32 noundef %a, i32 noundef %b) {
 ; CHECK-NEXT:    %cond = select i1 %cmp, i32 %mul, i32 %mul1
 ; CHECK-NEXT:    --> ((2 * %a) umax (4 * %b)) U: [0,-1) S: [-2147483648,2147483647)
 ; CHECK-NEXT:    %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT:    --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @umax
 ; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
-; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483646
 ; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
 ; CHECK-NEXT:  Loop %for.body: Trip multiple is 2
 ;
@@ -197,12 +197,12 @@ define void @smax(i32 noundef %a, i32 noundef %b) {
 ; CHECK-NEXT:    %cond = select i1 %cmp, i32 %mul, i32 %mul1
 ; CHECK-NEXT:    --> ((2 * %a)<nsw> smax (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483647)
 ; CHECK-NEXT:    %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT:    --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @smax
 ; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
-; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483646
 ; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
 ; CHECK-NEXT:  Loop %for.body: Trip multiple is 2
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
index 4b52479fc6c4d..40e3c63cbe04a 100644
--- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
 ; CHECK-NEXT:    [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
 ; CHECK-NEXT:    br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
 ; CHECK-NEXT:    [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
 ; CHECK-NEXT:    br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
index bc1543d8361a7..09419c13aaeb0 100644
--- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -61,9 +61,9 @@ define void @test_memset_size_can_use_info_from_guards(i32 %x, ptr %dst) {
 ; CHECK:       [[LOOP1_BACKEDGE]]:
 ; CHECK-NEXT:    br label %[[LOOP1]]
 ; CHECK:       [[LOOP2_PREHEADER]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[SUB]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
-; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[SHR]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[UMAX:%.*]] = add nuw nsw i64 [[TMP1]], 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[UMAX]], i1 false)
 ; CHECK-NEXT:    br label %[[LOOP2:.*]]
 ; CHECK:       [[LOOP2]]:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
index 73f7fd37a0099..74d4bb0f8d1cb 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
@@ -19,9 +19,16 @@ define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias n
 ; CHECK-NEXT:    [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
 ; CHECK-NEXT:    br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
 ; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK:       for.body.preheader.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP0]], 3
@@ -34,8 +41,25 @@ define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias n
 ; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
 ; CHECK-NEXT:    store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[I_011]], 2
-; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       exit.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[I_011_UNR:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    [[I_011_EPIL_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR]], [[EXIT_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
+; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
+; CHECK:       for.body.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_EPIL_INIT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_EPIL]], align 1
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3
+; CHECK-NEXT:    [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011_EPIL_INIT]]
+; CHECK-NEXT:    store i8 [[ADD_EPIL]], ptr [[ARRAYIDX4_EPIL]], align 1
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 156c2bdca7b0e..812d9d928cc8b 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -193,7 +193,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
 ; CHECK-NEXT:    store i32 13, ptr [[TMP12]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
index 648ebc7e6c3a5..a556b15adbefc 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
 ; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
-; CHECK-NEXT:    br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
-; CHECK:       [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
-; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
-; CHECK-NEXT:    br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
+; CHECK-NEXT:    br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
 ;
 entry:
   %offset = sub i32 %x, %y

>From 0f40aa49edbdfa972b9d5c55a96a1d118cb0203c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 9 Oct 2025 11:25:57 +0100
Subject: [PATCH 3/4] !fixup  getMinsSCEV -> getAddExpr + getMinusOne.

---
 llvm/lib/Analysis/ScalarEvolution.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4add7de9eb2f2..a057a11513a76 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -16100,7 +16100,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
               SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
           if (const SCEV *S =
                   Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
-            return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
+            return SE.getAddExpr(S, SE.getMinusOne(Expr->getType()));
         }
       }
 

>From 0710344361161dcefeacdf6c0f80c407d9f2d9ad Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 11 Oct 2025 21:38:11 +0100
Subject: [PATCH 4/4] !fixup undo test changes, remove stray period.

---
 llvm/lib/Analysis/ScalarEvolution.cpp         |  2 +-
 .../runtime-unroll-assume-no-remainder.ll     | 30 ++-----------------
 2 files changed, 4 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a057a11513a76..47f0650e3ac73 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -16094,7 +16094,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
         // For expressions of the form (Const + A), check if we have guard info
         // for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
         // sure we don't loose information when rewriting expressions based on
-        // back-edge taken counts in some cases..
+        // back-edge taken counts in some cases.
         if (Expr->getNumOperands() == 2) {
           auto *NewC =
               SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
index 74d4bb0f8d1cb..73f7fd37a0099 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
@@ -19,16 +19,9 @@ define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias n
 ; CHECK-NEXT:    [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
 ; CHECK-NEXT:    br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
-; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
-; CHECK:       for.body.preheader.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP0]], 3
@@ -41,25 +34,8 @@ define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias n
 ; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
 ; CHECK-NEXT:    store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[I_011]], 2
-; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       exit.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_011_UNR:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER]], label [[EXIT_LOOPEXIT:%.*]]
-; CHECK:       for.body.epil.preheader:
-; CHECK-NEXT:    [[I_011_EPIL_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR]], [[EXIT_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
-; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
-; CHECK:       for.body.epil:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_EPIL_INIT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_EPIL]], align 1
-; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3
-; CHECK-NEXT:    [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011_EPIL_INIT]]
-; CHECK-NEXT:    store i8 [[ADD_EPIL]], ptr [[ARRAYIDX4_EPIL]], align 1
-; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit: