[llvm] [SCEV] Rewrite more SCEVAddExpr when applying guards. (PR #159942)

Sat Sep 20 13:27:42 PDT 2025

https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/159942

>From 237861fbce060c03445a1a8569325f43700ef273 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 19 Sep 2025 10:22:23 +0100
Subject: [PATCH] [SCEV] Rewrite more SCEVAddExpr when applying guards.

When re-writing SCEVAddExprs to apply information from guards, check if
we have information for the expression itself. If so, apply it.

When we have an expression of the form (Const + A),  check if we have
have guard info for (Const + 1 + A) and use it. This is needed to avoid
regressions in a few cases, where we have BTCs with a subtracted
constant.

Rewriting expressions could cause regressions, e.g. when comparing 2
SCEV expressions where we are only able to rewrite one side, but I could
not find any cases where this happens more with this patch in practice.

https://alive2.llvm.org/ce/z/cre6rr
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 44 +++++++++++++------
 .../IndVarSimplify/canonicalize-cmp.ll        |  6 +--
 .../runtime-checks-difference.ll              |  7 +--
 3 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b08399b381f34..f839e24a6b973 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -16059,29 +16059,45 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
     }
 
     const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      bool Changed = false;
+      for (const auto *Op : Expr->operands()) {
+        Operands.push_back(
+            SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      if (Changed) {
+        // We are only replacing operands with equivalent values, so transfer
+        // the flags from the original expression.
+        return SE.getAddExpr(Operands, ScalarEvolution::maskFlags(
+                                           Expr->getNoWrapFlags(), FlagMask));
+      }
+      if (const SCEV *S = Map.lookup(Expr))
+        return S;
+      if (!isa<SCEVConstant>(Expr->getOperand(0)))
+        return Expr;
       // Trip count expressions sometimes consist of adding 3 operands, i.e.
       // (Const + A + B). There may be guard info for A + B, and if so, apply
       // it.
       // TODO: Could more generally apply guards to Add sub-expressions.
-      if (isa<SCEVConstant>(Expr->getOperand(0)) &&
-          Expr->getNumOperands() == 3) {
+      if (Expr->getNumOperands() == 3) {
         if (const SCEV *S = Map.lookup(
                 SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
           return SE.getAddExpr(Expr->getOperand(0), S);
       }
-      SmallVector<const SCEV *, 2> Operands;
-      bool Changed = false;
-      for (const auto *Op : Expr->operands()) {
-        Operands.push_back(
-            SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visit(Op));
-        Changed |= Op != Operands.back();
+
+      // For expressions of the form (Const + A), check if we have guard info
+      // for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
+      // sure we don't loose information when rewriting expressions based on
+      // back-edge taken counts in some cases..
+      if (Expr->getNumOperands() == 2) {
+        auto *NewC =
+            SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
+        if (const SCEV *S =
+                Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
+          return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
       }
-      // We are only replacing operands with equivalent values, so transfer the
-      // flags from the original expression.
-      return !Changed ? Expr
-                      : SE.getAddExpr(Operands,
-                                      ScalarEvolution::maskFlags(
-                                          Expr->getNoWrapFlags(), FlagMask));
+      return Expr;
     }
 
     const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
index 4b52479fc6c4d..40e3c63cbe04a 100644
--- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
 ; CHECK-NEXT:    [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
 ; CHECK-NEXT:    br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
 ; CHECK-NEXT:    [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
 ; CHECK-NEXT:    br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
index 648ebc7e6c3a5..a556b15adbefc 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
 ; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
-; CHECK-NEXT:    br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
-; CHECK:       [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
-; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
-; CHECK-NEXT:    br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
+; CHECK-NEXT:    br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
 ;
 entry:
   %offset = sub i32 %x, %y