[llvm] [SCEV] Check if AddRec doesn't wrap via BTC before adding predicate. (PR #131538)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 16 12:30:30 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/131538
https://github.com/llvm/llvm-project/issues/131281 exposed a case where
SCEV is not able to infer NSW for an AddRec, but constant folding in
SCEVExpander is able to determine the runtime check is always false
(i.e. no NSW).
This is caught by an assertion in LV, where we expand a runtime check
and the trip count expression, but the runtime check gets folded away.
For AddRecs with a step of 1, if Start + BTC >= Start, the AddRec is
treated as having NUW/NSW and won't add a wrap predicate.
https://alive2.llvm.org/ce/z/VnWwEN
This check can help determine NSW/NUW in a few more cases, but doing so
for all AddRecs has a noticeable compile time impact:
https://llvm-compile-time-tracker.com/compare.php?from=215c0d2b651dc757378209a3edaff1a130338dd8&to=cdd1c1d32c598d77b73a57bcc05c1383786b3ac4&stat=instructions:u
I am not sure if there is a good general place where we could try to
refine wrap-flags in SCEV with logic like in the patch?
Fixes https://github.com/llvm/llvm-project/issues/131281.
>From 0b47f6b5bcb82f08a738b89365cd2f1aec1871b4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 16 Mar 2025 19:16:00 +0000
Subject: [PATCH 1/2] [LV] Add test case for #131281
Add test case for https://github.com/llvm/llvm-project/issues/131281.
---
.../LoopVectorize/scev-predicate-reasoning.ll | 29 +++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 590cdd73e55f3..c34f462168360 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -241,3 +241,32 @@ loop:
exit:
ret void
}
+
+declare i1 @cond()
+
+; Test case for https://github.com/llvm/llvm-project/issues/131281.
+; %add2 is known to not wrap via BTC.
+define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress {
+entry:
+ %sub = add i32 %N, -100
+ %sub4 = add i32 %N, -99
+ br label %outer
+
+outer:
+ %c = call i1 @cond()
+ br i1 %c, label %loop, label %exit
+
+loop:
+ %iv = phi i32 [ 0, %outer ], [ %inc, %loop ]
+ %add2 = add i32 %sub4, %iv
+ %add.ext = sext i32 %add2 to i64
+ %gep.dst = getelementptr i32, ptr %dst, i64 %add.ext
+ store i32 0, ptr %gep.dst, align 4
+ %inc = add i32 %iv, 1
+ %add = add i32 %sub, %inc
+ %ec = icmp sgt i32 %add, %N
+ br i1 %ec, label %outer, label %loop
+
+exit:
+ ret void
+}
>From a6f59eee48b8647680e80d6838c86279198385fa Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 16 Mar 2025 19:29:23 +0000
Subject: [PATCH 2/2] [SCEV] Check if AddRec doesn't wrap via BTC before adding
predicate.
https://github.com/llvm/llvm-project/issues/131281 exposed a case where
SCEV is not able to infer NSW for an AddRec, but constant folding in
SCEVExpander is able to determine the runtime check is always false
(i.e. no NSW).
This is caught by an assertion in LV, where we expand a runtime check
and the trip count expression, but the runtime check gets folded away.
For AddRecs with a step of 1, if Start + BTC >= Start, the AddRec is
treated as having NUW/NSW and won't add a wrap predicate.
https://alive2.llvm.org/ce/z/VnWwEN
This check can help determine NSW/NUW in a few more cases, but doing so
for all AddRecs has a noticeable compile time impact:
https://llvm-compile-time-tracker.com/compare.php?from=215c0d2b651dc757378209a3edaff1a130338dd8&to=cdd1c1d32c598d77b73a57bcc05c1383786b3ac4&stat=instructions:u
I am not sure if there is a good general place where we could try to
refine wrap-flags in SCEV with logic like in the patch?
Fixes https://github.com/llvm/llvm-project/issues/131281.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 25 +++++++++
.../LoopVectorize/scev-predicate-reasoning.ll | 52 +++++++++++++++++++
2 files changed, 77 insertions(+)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8f74c1c398ced..6dbbcd008f59d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -14775,6 +14775,29 @@ const SCEVPredicate *ScalarEvolution::getWrapPredicate(
namespace {
+/// Return true if \p AR is known to not wrap via the loops backedge-taken count
+/// \p BTC.
+static bool proveNoWrapViaBTC(const SCEVAddRecExpr *AR,
+ SCEVWrapPredicate::IncrementWrapFlags Pred,
+ ScalarEvolution &SE) {
+ const Loop *L = AR->getLoop();
+ const SCEV *BTC = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BTC))
+ return false;
+ if (!match(AR->getStepRecurrence(SE), m_scev_One()) ||
+ AR->getType() != BTC->getType())
+ return false;
+ // AR has a step of 1, it is NSSW/NUSW if Start + BTC >= Start.
+ auto *Add = SE.getAddExpr(AR->getStart(), BTC);
+ assert((Pred == SCEVWrapPredicate::IncrementNSSW ||
+ Pred == SCEVWrapPredicate::IncrementNUSW) &&
+ "Unexpected predicate");
+ return SE.isKnownPredicate(Pred == SCEVWrapPredicate::IncrementNSSW
+ ? CmpInst::ICMP_SGE
+ : CmpInst::ICMP_UGE,
+ Add, AR->getStart());
+}
+
class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
public:
@@ -14860,6 +14883,8 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
bool addOverflowAssumption(const SCEVAddRecExpr *AR,
SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
+ if (proveNoWrapViaBTC(AR, AddedFlags, SE))
+ return true;
auto *A = SE.getWrapPredicate(AR, AddedFlags);
return addOverflowAssumption(A);
}
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index c34f462168360..40c752bbaf4c8 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -247,6 +247,58 @@ declare i1 @cond()
; Test case for https://github.com/llvm/llvm-project/issues/131281.
; %add2 is known to not wrap via BTC.
define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress {
+; CHECK-LABEL: define void @no_signed_wrap_iv_via_btc
+; CHECK-SAME: (ptr [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -100
+; CHECK-NEXT: [[SUB4:%.*]] = add i32 [[N]], -99
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB4]], i32 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 100
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[N]]
+; CHECK-NEXT: br label [[OUTER:%.*]]
+; CHECK: outer.loopexit:
+; CHECK-NEXT: br label [[OUTER]]
+; CHECK: outer:
+; CHECK-NEXT: [[C:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[C]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUB4]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[OUTER_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[SUB4]], [[IV]]
+; CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[ADD2]] to i64
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[ADD_EXT]]
+; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4
+; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[INC]]
+; CHECK-NEXT: [[EC:%.*]] = icmp sgt i32 [[ADD]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label [[OUTER_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
%sub = add i32 %N, -100
%sub4 = add i32 %N, -99
More information about the llvm-commits
mailing list