[llvm] [SCEV] Rewrite A - B = UMin(1, A - B) lazily for A != B loop guards. (PR #163787)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 16 06:54:37 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/163787
Follow-up to 2d027260b0f8 (https://github.com/llvm/llvm-project/pull/160500)
Creating the SCEV subtraction eagerly is very expensive. To soften the blow, just collect a map with inequalities and check if we can apply the subtract rewrite when rewriting SCEVAddExpr.
Restores most of the regression:
http://llvm-compile-time-tracker.com/compare.php?from=0792478e4e133be96650444f3264e89d002fc058&to=7fca35db60fe6f423ea6051b45226046c067c252&stat=instructions:u stage1-O3: -0.10%
stage1-ReleaseThinLTO: -0.09%
stage1-ReleaseLTO-g: -0.10%
stage1-O0-g: +0.02%
stage1-aarch64-O3: -0.09%
stage1-aarch64-O0-g: +0.00%
stage2-O3: -0.17%
stage2-O0-g: -0.05%
stage2-clang: -0.07%
There is still some negative impact compared to before 2d027260b0f8, but there's probably not much we could do reduce this even more.
Compile-time improvement with 2d027260b0f8 reverted on top of the current PR: http://llvm-compile-time-tracker.com/compare.php?from=7fca35db60fe6f423ea6051b45226046c067c252&to=98dd152bdfc76b30d00190d3850d89406ca3c21f&stat=instructions:u
stage1-O3: 60628M (-0.03%)
stage1-ReleaseThinLTO: 76388M (-0.04%)
stage1-ReleaseLTO-g: 89228M (-0.02%)
stage1-O0-g: 18523M (-0.03%)
stage1-aarch64-O3: 67623M (-0.03%)
stage1-aarch64-O0-g: 22595M (+0.01%)
stage2-O3: 52336M (+0.01%)
stage2-O0-g: 16174M (+0.00%)
stage2-clang: 34890032M (-0.03%)
>From 7f22a8246a54a188ad892020f40b4bb1fc19878a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 15 Oct 2025 22:01:29 +0100
Subject: [PATCH] [SCEV] Rewrite A - B = UMin(1, A - B) lazily for A != B loop
guards.
Follow-up to 2d027260b0f8 (https://github.com/llvm/llvm-project/pull/160500)
Creating the SCEV subtraction eagerly is very expensive. To soften the
blow, just collect a map with inequalities and check if we can apply the
subtract rewrite when rewriting SCEVAddExpr.
Restores most of the regression:
http://llvm-compile-time-tracker.com/compare.php?from=0792478e4e133be96650444f3264e89d002fc058&to=7fca35db60fe6f423ea6051b45226046c067c252&stat=instructions:u
stage1-O3: -0.10%
stage1-ReleaseThinLTO: -0.09%
stage1-ReleaseLTO-g: -0.10%
stage1-O0-g: +0.02%
stage1-aarch64-O3: -0.09%
stage1-aarch64-O0-g: +0.00%
stage2-O3: -0.17%
stage2-O0-g: -0.05%
stage2-clang: -0.07%
There is still some negative impact compared to before 2d027260b0f8, but
there's probably not much we could do reduce this even more.
Compile-time improvement with 2d027260b0f8 reverted on top of the current PR:
http://llvm-compile-time-tracker.com/compare.php?from=7fca35db60fe6f423ea6051b45226046c067c252&to=98dd152bdfc76b30d00190d3850d89406ca3c21f&stat=instructions:u
stage1-O3: 60628M (-0.03%)
stage1-ReleaseThinLTO: 76388M (-0.04%)
stage1-ReleaseLTO-g: 89228M (-0.02%)
stage1-O0-g: 18523M (-0.03%)
stage1-aarch64-O3: 67623M (-0.03%)
stage1-aarch64-O0-g: 22595M (+0.01%)
stage2-O3: 52336M (+0.01%)
stage2-O0-g: 16174M (+0.00%)
stage2-clang: 34890032M (-0.03%)
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 1 +
llvm/lib/Analysis/ScalarEvolution.cpp | 51 +++++++++++++++-----
2 files changed, 41 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index e5a6c8cc0a6aa..96d3ef6706843 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1345,6 +1345,7 @@ class ScalarEvolution {
class LoopGuards {
DenseMap<const SCEV *, const SCEV *> RewriteMap;
+ DenseMap<const SCEV *, SmallPtrSet<const SCEV *, 2>> NotEqualMap;
bool PreserveNUW = false;
bool PreserveNSW = false;
ScalarEvolution &SE;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a64b93d541943..ce6f5fd17e294 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15772,19 +15772,25 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
GetNextSCEVDividesByDivisor(One, DividesBy);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
} else {
+ // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
+ // but creating the subtraction eagerly is expensive. Track the
+ // inequalities in a separate map, and materialize the rewrite lazily
+ // when encountering a suitable subtraction while re-writing.
if (LHS->getType()->isPointerTy()) {
LHS = SE.getLosslessPtrToIntExpr(LHS);
RHS = SE.getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
break;
}
- auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
- const SCEV *Sub = SE.getMinusSCEV(A, B);
- AddRewrite(Sub, Sub,
- SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
- };
- AddSubRewrite(LHS, RHS);
- AddSubRewrite(RHS, LHS);
+ const SCEVConstant *C;
+ const SCEV *A, *B;
+ if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
+ match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) {
+ RHS = A;
+ LHS = B;
+ }
+ Guards.NotEqualMap[LHS].insert(RHS);
+ Guards.NotEqualMap[RHS].insert(LHS);
continue;
}
break;
@@ -15918,13 +15924,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
class SCEVLoopGuardRewriter
: public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
const DenseMap<const SCEV *, const SCEV *> ⤅
+ const DenseMap<const SCEV *, SmallPtrSet<const SCEV *, 2>> &NotEqualMap;
SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap;
public:
SCEVLoopGuardRewriter(ScalarEvolution &SE,
const ScalarEvolution::LoopGuards &Guards)
- : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) {
+ : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap),
+ NotEqualMap(Guards.NotEqualMap) {
if (Guards.PreserveNUW)
FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW);
if (Guards.PreserveNSW)
@@ -15979,14 +15987,35 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ // Helper to check if S is a subtraction (A - B) where A != B, and if so,
+ // return UMax(S, 1).
+ auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * {
+ const SCEV *LHS, *RHS;
+ if (MatchBinarySub(S, LHS, RHS)) {
+ auto It = NotEqualMap.find(LHS);
+ if (It != NotEqualMap.end() && It->second.contains(RHS))
+ return SE.getUMaxExpr(S, SE.getOne(S->getType()));
+ }
+ return nullptr;
+ };
+
+ // Check if Expr itself is a subtraction pattern with guard info.
+ if (const SCEV *Rewritten = RewriteSubtraction(Expr))
+ return Rewritten;
+
// Trip count expressions sometimes consist of adding 3 operands, i.e.
// (Const + A + B). There may be guard info for A + B, and if so, apply
// it.
// TODO: Could more generally apply guards to Add sub-expressions.
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ const SCEV *Add =
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2));
+ if (const SCEV *Rewritten = RewriteSubtraction(Add))
+ return SE.getAddExpr(
+ Expr->getOperand(0), Rewritten,
+ ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask));
+ if (const SCEV *S = Map.lookup(Add))
return SE.getAddExpr(Expr->getOperand(0), S);
}
SmallVector<const SCEV *, 2> Operands;
@@ -16021,7 +16050,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
};
- if (RewriteMap.empty())
+ if (RewriteMap.empty() && NotEqualMap.empty())
return Expr;
SCEVLoopGuardRewriter Rewriter(SE, *this);
More information about the llvm-commits
mailing list