[llvm] [SCEV] Add implicit guard conditions when applying loop guards. (PR #84309)

Thu Mar 7 03:43:40 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Mel Chen (Mel-Chen)

<details>
<summary>Changes</summary>

When the guard condition has unknown operands, additional implicit
conditions will be generated.
For example, if guard condition is

  %cond0 = icmp slt i32 %start, %n

Additional conditions will be generated as follows:

  %cond1 = icmp slt i32 %start, MAX_SIGNED_i32
  %cond2 = icmp slt i32 MIN_SIGNED_i32, %n

The constant operands helps obtain a more precise range, and one
application is to make `getRangeForAffineNoSelfWrappingAR` provide a
more accurate range.

For truncated induction variables, this patch still cannot be used to
sharpen a more accurate range due to the loss of the nw flag.

---
Full diff: https://github.com/llvm/llvm-project/pull/84309.diff


2 Files Affected:

- (modified) llvm/lib/Analysis/ScalarEvolution.cpp (+38-3) 
- (added) llvm/test/Analysis/ScalarEvolution/range-sharpening.ll (+76) 


``````````diff

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index acc0aa23107bb5..5de33f37958e1b 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7069,7 +7069,8 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
       IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
   ICmpInst::Predicate GEPred =
       IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
-  const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+  const SCEV *End = applyLoopGuards(
+      AddRec->evaluateAtIteration(MaxBECount, *this), AddRec->getLoop());
 
   // We know that there is no self-wrap. Let's take Start and End values and
   // look at all intermediate values V1, V2, ..., Vn that IndVar takes during
@@ -7098,10 +7099,10 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
     return ConstantRange::getFull(BitWidth);
 
   if (isKnownPositive(Step) &&
-      isKnownPredicateViaConstantRanges(LEPred, Start, End))
+      isKnownViaNonRecursiveReasoning(LEPred, Start, End))
     return RangeBetween;
   if (isKnownNegative(Step) &&
-           isKnownPredicateViaConstantRanges(GEPred, Start, End))
+      isKnownViaNonRecursiveReasoning(GEPred, Start, End))
     return RangeBetween;
   return ConstantRange::getFull(BitWidth);
 }
@@ -15434,6 +15435,40 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
         const auto *LHS = getSCEV(Cmp->getOperand(0));
         const auto *RHS = getSCEV(Cmp->getOperand(1));
         CollectCondition(Predicate, LHS, RHS, RewriteMap);
+
+        // When the guard condition is of the following form,
+        //   Unknown_LHS s> Unknown_RHS or Unknown_LHS s< Unknown_RHS
+        // Two additional conditions will be added:
+        //   Unknown_LHS s> Min(Data_Type), Max(Data_Type) s> Unknown_RHS
+        //   or
+        //   Unknown_LHS s< Max(Data_Type), Min(Data_Type) s< Unknown_RHS
+        // Note that for pointer types, we will not add additional conditions,
+        // as this may not be a meaningful comparison.
+        // TODO: May worked on unsigned predicate too.
+        if (CmpInst::isStrictPredicate(Predicate) &&
+            CmpInst::isSigned(Predicate) && isa<SCEVUnknown>(LHS) &&
+            isa<SCEVUnknown>(RHS) && !LHS->getType()->isPointerTy()) {
+
+          unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+          APInt MaxAP = APInt::getSignedMaxValue(BitWidth);
+          APInt MinAP = APInt::getSignedMinValue(BitWidth);
+          const SCEV *MaxScev = getConstant(MaxAP);
+          const SCEV *MinScev = getConstant(MinAP);
+
+          switch (Predicate) {
+          case CmpInst::ICMP_SLT:
+            CollectCondition(Predicate, LHS, MaxScev, RewriteMap);
+            CollectCondition(Predicate, MinScev, RHS, RewriteMap);
+            break;
+          case CmpInst::ICMP_SGT:
+            CollectCondition(Predicate, LHS, MinScev, RewriteMap);
+            CollectCondition(Predicate, MaxScev, RHS, RewriteMap);
+            break;
+          default:
+            break;
+          }
+        }
+
         continue;
       }
 
diff --git a/llvm/test/Analysis/ScalarEvolution/range-sharpening.ll b/llvm/test/Analysis/ScalarEvolution/range-sharpening.ll
new file mode 100644
index 00000000000000..f1ad4c9072346e
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/range-sharpening.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -disable-output -scalar-evolution-use-expensive-range-sharpening -passes='print<scalar-evolution>' 2>&1 | FileCheck %s
+
+define void @test_guard_less_than_non_const(ptr nocapture %a, i32 %iv_start, i32 %n) {
+; CHECK-LABEL: 'test_guard_less_than_non_const'
+; CHECK-NEXT:  Classifying expressions for: @test_guard_less_than_non_const
+; CHECK-NEXT:    %i.05 = phi i32 [ %inc, %for.body ], [ %iv_start, %entry ]
+; CHECK-NEXT:    --> {%iv_start,+,1}<nsw><%for.body> U: full-set S: [-2147483648,2147483647) Exits: (-1 + %n) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.05
+; CHECK-NEXT:    --> {((4 * (sext i32 %iv_start to i64))<nsw> + %a),+,4}<nw><%for.body> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %iv_start) + %n) to i64))<nuw><nsw> + (4 * (sext i32 %iv_start to i64))<nsw> + %a) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %inc = add nsw i32 %i.05, 1
+; CHECK-NEXT:    --> {(1 + %iv_start),+,1}<nsw><%for.body> U: full-set S: full-set Exits: %n LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @test_guard_less_than_non_const
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + (-1 * %iv_start) + %n)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 -2
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + (-1 * %iv_start) + %n)
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+;
+entry:
+  %cmp4 = icmp slt i32 %iv_start, %n
+  br i1 %cmp4, label %for.body, label %exit
+
+for.body:
+  %i.05 = phi i32 [ %inc, %for.body ], [ %iv_start, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.05
+  store i32 %i.05, ptr %arrayidx, align 4
+  %inc = add nsw i32 %i.05, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @test_guard_less_than_non_const_sext(ptr nocapture %a, i32 %iv_start, i32 %n) {
+; CHECK-LABEL: 'test_guard_less_than_non_const_sext'
+; CHECK-NEXT:  Classifying expressions for: @test_guard_less_than_non_const_sext
+; CHECK-NEXT:    %0 = sext i32 %iv_start to i64
+; CHECK-NEXT:    --> (sext i32 %iv_start to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
+; CHECK-NEXT:    %wide.trip.count = sext i32 %n to i64
+; CHECK-NEXT:    --> (sext i32 %n to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
+; CHECK-NEXT:    %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:    --> {(sext i32 %iv_start to i64),+,1}<nsw><%for.body> U: [-2147483648,-9223372036854775808) S: [-2147483648,2147483647) Exits: (-1 + (sext i32 %n to i64))<nsw> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+; CHECK-NEXT:    --> {((4 * (sext i32 %iv_start to i64))<nsw> + %a),+,4}<nw><%for.body> U: full-set S: full-set Exits: (-4 + (4 * (sext i32 %n to i64))<nsw> + %a) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %1 = trunc i64 %indvars.iv to i32
+; CHECK-NEXT:    --> {%iv_start,+,1}<%for.body> U: full-set S: full-set Exits: (-1 + %n) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK-NEXT:    --> {(1 + (sext i32 %iv_start to i64))<nsw>,+,1}<nsw><%for.body> U: [-2147483647,-9223372036854775808) S: [-2147483647,-9223372036854775808) Exits: (sext i32 %n to i64) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @test_guard_less_than_non_const_sext
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + (sext i32 %n to i64) + (-1 * (sext i32 %iv_start to i64))<nsw>)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i64 -2
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + (sext i32 %n to i64) + (-1 * (sext i32 %iv_start to i64))<nsw>)
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+;
+entry:
+  %cmp4 = icmp slt i32 %iv_start, %n
+  br i1 %cmp4, label %for.body.preheader, label %exit
+
+for.body.preheader:
+  %0 = sext i32 %iv_start to i64
+  %wide.trip.count = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = trunc i64 %indvars.iv to i32
+  store i32 %1, ptr %arrayidx, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/84309