[llvm] [SCEV] Add implicit guard conditions when applying loop guards. (PR #84309)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 7 03:43:40 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Mel Chen (Mel-Chen)
<details>
<summary>Changes</summary>
When the guard condition has unknown operands, additional implicit
conditions will be generated.
For example, if guard condition is
%cond0 = icmp slt i32 %start, %n
Additional conditions will be generated as follows:
%cond1 = icmp slt i32 %start, MAX_SIGNED_i32
%cond2 = icmp slt i32 MIN_SIGNED_i32, %n
The constant operands helps obtain a more precise range, and one
application is to make `getRangeForAffineNoSelfWrappingAR` provide a
more accurate range.
For truncated induction variables, this patch still cannot be used to
sharpen a more accurate range due to the loss of the nw flag.
---
Full diff: https://github.com/llvm/llvm-project/pull/84309.diff
2 Files Affected:
- (modified) llvm/lib/Analysis/ScalarEvolution.cpp (+38-3)
- (added) llvm/test/Analysis/ScalarEvolution/range-sharpening.ll (+76)
``````````diff
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index acc0aa23107bb5..5de33f37958e1b 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7069,7 +7069,8 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
ICmpInst::Predicate GEPred =
IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
- const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+ const SCEV *End = applyLoopGuards(
+ AddRec->evaluateAtIteration(MaxBECount, *this), AddRec->getLoop());
// We know that there is no self-wrap. Let's take Start and End values and
// look at all intermediate values V1, V2, ..., Vn that IndVar takes during
@@ -7098,10 +7099,10 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
return ConstantRange::getFull(BitWidth);
if (isKnownPositive(Step) &&
- isKnownPredicateViaConstantRanges(LEPred, Start, End))
+ isKnownViaNonRecursiveReasoning(LEPred, Start, End))
return RangeBetween;
if (isKnownNegative(Step) &&
- isKnownPredicateViaConstantRanges(GEPred, Start, End))
+ isKnownViaNonRecursiveReasoning(GEPred, Start, End))
return RangeBetween;
return ConstantRange::getFull(BitWidth);
}
@@ -15434,6 +15435,40 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
const auto *LHS = getSCEV(Cmp->getOperand(0));
const auto *RHS = getSCEV(Cmp->getOperand(1));
CollectCondition(Predicate, LHS, RHS, RewriteMap);
+
+ // When the guard condition is of the following form,
+ // Unknown_LHS s> Unknown_RHS or Unknown_LHS s< Unknown_RHS
+ // Two additional conditions will be added:
+ // Unknown_LHS s> Min(Data_Type), Max(Data_Type) s> Unknown_RHS
+ // or
+ // Unknown_LHS s< Max(Data_Type), Min(Data_Type) s< Unknown_RHS
+ // Note that for pointer types, we will not add additional conditions,
+ // as this may not be a meaningful comparison.
+ // TODO: May worked on unsigned predicate too.
+ if (CmpInst::isStrictPredicate(Predicate) &&
+ CmpInst::isSigned(Predicate) && isa<SCEVUnknown>(LHS) &&
+ isa<SCEVUnknown>(RHS) && !LHS->getType()->isPointerTy()) {
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt MaxAP = APInt::getSignedMaxValue(BitWidth);
+ APInt MinAP = APInt::getSignedMinValue(BitWidth);
+ const SCEV *MaxScev = getConstant(MaxAP);
+ const SCEV *MinScev = getConstant(MinAP);
+
+ switch (Predicate) {
+ case CmpInst::ICMP_SLT:
+ CollectCondition(Predicate, LHS, MaxScev, RewriteMap);
+ CollectCondition(Predicate, MinScev, RHS, RewriteMap);
+ break;
+ case CmpInst::ICMP_SGT:
+ CollectCondition(Predicate, LHS, MinScev, RewriteMap);
+ CollectCondition(Predicate, MaxScev, RHS, RewriteMap);
+ break;
+ default:
+ break;
+ }
+ }
+
continue;
}
diff --git a/llvm/test/Analysis/ScalarEvolution/range-sharpening.ll b/llvm/test/Analysis/ScalarEvolution/range-sharpening.ll
new file mode 100644
index 00000000000000..f1ad4c9072346e
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/range-sharpening.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -disable-output -scalar-evolution-use-expensive-range-sharpening -passes='print<scalar-evolution>' 2>&1 | FileCheck %s
+
+define void @test_guard_less_than_non_const(ptr nocapture %a, i32 %iv_start, i32 %n) {
+; CHECK-LABEL: 'test_guard_less_than_non_const'
+; CHECK-NEXT: Classifying expressions for: @test_guard_less_than_non_const
+; CHECK-NEXT: %i.05 = phi i32 [ %inc, %for.body ], [ %iv_start, %entry ]
+; CHECK-NEXT: --> {%iv_start,+,1}<nsw><%for.body> U: full-set S: [-2147483648,2147483647) Exits: (-1 + %n) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.05
+; CHECK-NEXT: --> {((4 * (sext i32 %iv_start to i64))<nsw> + %a),+,4}<nw><%for.body> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %iv_start) + %n) to i64))<nuw><nsw> + (4 * (sext i32 %iv_start to i64))<nsw> + %a) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: %inc = add nsw i32 %i.05, 1
+; CHECK-NEXT: --> {(1 + %iv_start),+,1}<nsw><%for.body> U: full-set S: full-set Exits: %n LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_non_const
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (-1 * %iv_start) + %n)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -2
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + (-1 * %iv_start) + %n)
+; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+;
+entry:
+ %cmp4 = icmp slt i32 %iv_start, %n
+ br i1 %cmp4, label %for.body, label %exit
+
+for.body:
+ %i.05 = phi i32 [ %inc, %for.body ], [ %iv_start, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.05
+ store i32 %i.05, ptr %arrayidx, align 4
+ %inc = add nsw i32 %i.05, 1
+ %exitcond.not = icmp eq i32 %inc, %n
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @test_guard_less_than_non_const_sext(ptr nocapture %a, i32 %iv_start, i32 %n) {
+; CHECK-LABEL: 'test_guard_less_than_non_const_sext'
+; CHECK-NEXT: Classifying expressions for: @test_guard_less_than_non_const_sext
+; CHECK-NEXT: %0 = sext i32 %iv_start to i64
+; CHECK-NEXT: --> (sext i32 %iv_start to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
+; CHECK-NEXT: %wide.trip.count = sext i32 %n to i64
+; CHECK-NEXT: --> (sext i32 %n to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
+; CHECK-NEXT: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: --> {(sext i32 %iv_start to i64),+,1}<nsw><%for.body> U: [-2147483648,-9223372036854775808) S: [-2147483648,2147483647) Exits: (-1 + (sext i32 %n to i64))<nsw> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+; CHECK-NEXT: --> {((4 * (sext i32 %iv_start to i64))<nsw> + %a),+,4}<nw><%for.body> U: full-set S: full-set Exits: (-4 + (4 * (sext i32 %n to i64))<nsw> + %a) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: %1 = trunc i64 %indvars.iv to i32
+; CHECK-NEXT: --> {%iv_start,+,1}<%for.body> U: full-set S: full-set Exits: (-1 + %n) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK-NEXT: --> {(1 + (sext i32 %iv_start to i64))<nsw>,+,1}<nsw><%for.body> U: [-2147483647,-9223372036854775808) S: [-2147483647,-9223372036854775808) Exits: (sext i32 %n to i64) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_non_const_sext
+; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (sext i32 %n to i64) + (-1 * (sext i32 %iv_start to i64))<nsw>)
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i64 -2
+; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + (sext i32 %n to i64) + (-1 * (sext i32 %iv_start to i64))<nsw>)
+; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+;
+entry:
+ %cmp4 = icmp slt i32 %iv_start, %n
+ br i1 %cmp4, label %for.body.preheader, label %exit
+
+for.body.preheader:
+ %0 = sext i32 %iv_start to i64
+ %wide.trip.count = sext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %1 = trunc i64 %indvars.iv to i32
+ store i32 %1, ptr %arrayidx, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/84309
More information about the llvm-commits
mailing list