[llvm] [SCEV] Rewrite more SCEVAddExpr when applying guards. (PR #159942)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 21 13:33:37 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/159942
>From 26ed8aa0f0bfad31b263aabda37416a51a150611 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 21 Sep 2025 21:25:54 +0100
Subject: [PATCH 1/3] [SCEV] Add additional test with guards for 3-op AddRec.
---
...ge-taken-count-guard-info-apply-to-adds.ll | 34 +++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
index 8df4b52757753..6b2c78cebc44a 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
@@ -56,3 +56,37 @@ loop:
exit:
ret void
}
+
+declare void @use(ptr)
+
+define i32 @test_3_op_add(i32 %x, i32 %y, ptr %A) {
+; CHECK-LABEL: 'test_3_op_add'
+; CHECK-NEXT: Determining loop execution counts for: @test_3_op_add
+; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (zext i32 (1 + (-1 * %x) + %y) to i64))<nsw>
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2147483647
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (zext i32 (1 + (-1 * %x) + %y) to i64))<nsw>
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
+;
+entry:
+ %pre.0 = icmp ugt i32 %x, 0
+ br i1 %pre.0, label %then, label %exit
+
+then:
+ %y.sub.x = sub i32 %y, %x
+ %pre.1 = icmp slt i32 %y.sub.x, 0
+ %add.1 = add i32 %y.sub.x, 1
+ %add.ext = zext i32 %add.1 to i64
+ br i1 %pre.1, label %exit, label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %then ]
+ %and = and i64 %iv, 1
+ %gep = getelementptr i8, ptr %A, i64 %and
+ call void @use(ptr %gep)
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %add.ext
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 0
+}
>From 541f1438d70ce90c763423ad623deacb87f9dbdb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 21 Sep 2025 20:33:41 +0100
Subject: [PATCH 2/3] [SCEV] Preserve divisibility info when creating UMax/SMax
expressions.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 24 +++++++++++++++++--
.../ScalarEvolution/trip-count-minmax.ll | 4 ++--
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b08399b381f34..ee1f92a4197e8 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15850,12 +15850,17 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
To = SE.getUMaxExpr(FromRewritten, RHS);
if (auto *UMin = dyn_cast<SCEVUMinExpr>(FromRewritten))
EnqueueOperands(UMin);
+ if (RHS->isOne())
+ ExprsToRewrite.push_back(From);
break;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SGE:
To = SE.getSMaxExpr(FromRewritten, RHS);
- if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten))
+ if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten)) {
EnqueueOperands(SMin);
+ }
+ if (RHS->isOne())
+ ExprsToRewrite.push_back(From);
break;
case CmpInst::ICMP_EQ:
if (isa<SCEVConstant>(RHS))
@@ -15986,7 +15991,22 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
for (const SCEV *Expr : ExprsToRewrite) {
const SCEV *RewriteTo = Guards.RewriteMap[Expr];
Guards.RewriteMap.erase(Expr);
- Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)});
+ const SCEV *Rewritten = Guards.rewrite(RewriteTo);
+
+ // Try to strengthen divisibility of SMax/UMax expressions coming from >=
+ // 1 conditions.
+ if (auto *SMax = dyn_cast<SCEVSMaxExpr>(Rewritten)) {
+ unsigned MinTrailingZeros = SE.getMinTrailingZeros(SMax->getOperand(1));
+ for (const SCEV *Op : drop_begin(SMax->operands(), 2))
+ MinTrailingZeros =
+ std::min(MinTrailingZeros, SE.getMinTrailingZeros(Op));
+ if (MinTrailingZeros != 0)
+ Rewritten = SE.getSMaxExpr(
+ SE.getConstant(APInt(SMax->getType()->getScalarSizeInBits(), 1)
+ .shl(MinTrailingZeros)),
+ SMax);
+ }
+ Guards.RewriteMap.insert({Expr, Rewritten});
}
}
}
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index 8d091a00ed4b9..d38010403dad7 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void umin(unsigned a, unsigned b) {
; a *= 2;
@@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void smin(signed a, signed b) {
; a *= 2;
>From fd7152b03fd918def165d012aefca44f48a4b899 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 19 Sep 2025 10:22:23 +0100
Subject: [PATCH 3/3] [SCEV] Rewrite more SCEVAddExpr when applying guards.
When re-writing SCEVAddExprs to apply information from guards, check if
we have information for the expression itself. If so, apply it.
When we have an expression of the form (Const + A), check if we have
have guard info for (Const + 1 + A) and use it. This is needed to avoid
regressions in a few cases, where we have BTCs with a subtracted
constant.
Rewriting expressions could cause regressions, e.g. when comparing 2
SCEV expressions where we are only able to rewrite one side, but I could
not find any cases where this happens more with this patch in practice.
https://alive2.llvm.org/ce/z/cre6rr
---
llvm/lib/Analysis/ScalarEvolution.cpp | 34 ++++++++++++++-----
...ge-taken-count-guard-info-apply-to-adds.ll | 6 ++--
.../ScalarEvolution/trip-count-minmax.ll | 12 +++----
.../IndVarSimplify/canonicalize-cmp.ll | 6 ++--
.../dont-fold-tail-for-divisible-TC.ll | 2 +-
.../runtime-checks-difference.ll | 7 +---
6 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index ee1f92a4197e8..09a31b105e128 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -16079,16 +16079,32 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
- // Trip count expressions sometimes consist of adding 3 operands, i.e.
- // (Const + A + B). There may be guard info for A + B, and if so, apply
- // it.
- // TODO: Could more generally apply guards to Add sub-expressions.
- if (isa<SCEVConstant>(Expr->getOperand(0)) &&
- Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
- return SE.getAddExpr(Expr->getOperand(0), S);
+ if (const SCEV *S = Map.lookup(Expr))
+ return S;
+ if (isa<SCEVConstant>(Expr->getOperand(0))) {
+ // Trip count expressions sometimes consist of adding 3 operands, i.e.
+ // (Const + A + B). There may be guard info for A + B, and if so, apply
+ // it.
+ // TODO: Could more generally apply guards to Add sub-expressions.
+ if (Expr->getNumOperands() == 3) {
+ if (const SCEV *S = Map.lookup(
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ return SE.getAddExpr(Expr->getOperand(0), S);
+ }
+
+ // For expressions of the form (Const + A), check if we have guard info
+ // for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
+ // sure we don't loose information when rewriting expressions based on
+ // back-edge taken counts in some cases..
+ if (Expr->getNumOperands() == 2) {
+ auto *NewC =
+ SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
+ if (const SCEV *S =
+ Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
+ return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
+ }
}
+
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
for (const auto *Op : Expr->operands()) {
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
index 6b2c78cebc44a..5ea836d3b8067 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
@@ -33,9 +33,9 @@ declare void @clobber()
define void @test_add_sub_1_guard(ptr %src, i32 %n) {
; CHECK-LABEL: 'test_add_sub_1_guard'
; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard
-; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
+; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index d38010403dad7..2f0627b7d4476 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -102,12 +102,12 @@ define void @umax(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a) umax (4 * %b)) U: [0,-1) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umax
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
@@ -197,12 +197,12 @@ define void @smax(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a)<nsw> smax (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @smax
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
index 4b52479fc6c4d..40e3c63cbe04a 100644
--- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 4f5a26e9c89cb..4a9b2bd7cc888 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -193,7 +193,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
index 648ebc7e6c3a5..a556b15adbefc 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
-; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
-; CHECK: [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
-; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
-; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
-; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
+; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
;
entry:
%offset = sub i32 %x, %y
More information about the llvm-commits
mailing list