[llvm] 6478f3f - [SCEV] Support single-cond range check idiom in applyLoopGuards.

Fri Jun 25 02:25:57 PDT 2021

Author: Florian Hahn
Date: 2021-06-25T10:24:40+01:00
New Revision: 6478f3fb78b3e7cddb642dce0f39d5d4a976af1e

URL: https://github.com/llvm/llvm-project/commit/6478f3fb78b3e7cddb642dce0f39d5d4a976af1e
DIFF: https://github.com/llvm/llvm-project/commit/6478f3fb78b3e7cddb642dce0f39d5d4a976af1e.diff

LOG: [SCEV] Support single-cond range check idiom in applyLoopGuards.

This patch extends applyLoopGuards to detect a single-cond range check
idiom that InstCombine generates.

It extends applyLoopGuards to detect conditions of the form
(-C1 + X < C2). InstCombine will create this form when combining two
checks of the form (X u< C2 + C1) and (X >=u C1).

In practice, this enables us to correctly compute a tight trip count
bounds for code as in the function below. InstCombine will fold the
minimum iteration check created by LoopRotate with the user check (< 8).

    void unsigned_check(short *pred, unsigned width) {
        if (width < 8) {
            for (int x = 0; x < width; x++)
                pred[x] = pred[x] * pred[x];
        }
    }

As a consequence, LLVM creates dead vector loops for the code above,
e.g. see https://godbolt.org/z/cb8eTcqET

https://alive2.llvm.org/ce/z/SHHW4d

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D104741

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8e13a2c9b2eed..34e671b7a72fc 100644

--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -13672,11 +13672,42 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
       }
     }
 
-    if (!isa<SCEVUnknown>(LHS)) {
+    if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
       std::swap(LHS, RHS);
       Predicate = CmpInst::getSwappedPredicate(Predicate);
     }
 
+    // Check for a condition of the form (-C1 + X < C2).  InstCombine will
+    // create this form when combining two checks of the form (X u< C2 + C1) and
+    // (X >=u C1).
+    auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() {
+      auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
+      if (!AddExpr || AddExpr->getNumOperands() != 2)
+        return false;
+
+      auto *C1 = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
+      auto *LHSUnknown = dyn_cast<SCEVUnknown>(AddExpr->getOperand(1));
+      auto *C2 = dyn_cast<SCEVConstant>(RHS);
+      if (!C1 || !C2 || !LHSUnknown)
+        return false;
+
+      auto ExactRegion =
+          ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt())
+              .sub(C1->getAPInt());
+
+      // Bail out, unless we have a non-wrapping, monotonic range.
+      if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
+        return false;
+      auto I = RewriteMap.find(LHSUnknown->getValue());
+      const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
+      RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
+          getConstant(ExactRegion.getUnsignedMin()),
+          getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
+      return true;
+    };
+    if (MatchRangeCheckIdiom())
+      return;
+
     // For now, limit to conditions that provide information about unknown
     // expressions. RHS also cannot contain add recurrences.
     auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);

diff  --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index d22cb94a99a96..9e7957a3736e5 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1260,14 +1260,14 @@ define void @optimized_range_check_unsigned(i16* %pred, i32 %N) {
 ; CHECK-NEXT:    %N.off = add i32 %N, -1
 ; CHECK-NEXT:    --> (-1 + %N) U: full-set S: full-set
 ; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + %N) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,7) S: [0,7) Exits: (-1 + %N) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep = getelementptr inbounds i16, i16* %pred, i32 %iv
 ; CHECK-NEXT:    --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i32 %iv, 1
-; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,-2147483648) S: [1,-2147483648) Exits: %N LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,8) S: [1,8) Exits: %N LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @optimized_range_check_unsigned
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is (-1 + %N)
-; CHECK-NEXT:  Loop %loop: max backedge-taken count is -1
+; CHECK-NEXT:  Loop %loop: max backedge-taken count is 6
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is (-1 + %N)
 ; CHECK-NEXT:   Predicates:
 ; CHECK:       Loop %loop: Trip multiple is 1
@@ -1289,6 +1289,44 @@ exit:
   ret void
 }
 
+; The function below uses a single condition to ensure %N > 2 && %N < 22.
+; InstCombine transforms such checks with 2 conditions to a single check as in
+; the test function.
+define void @optimized_range_check_unsigned2(i16* %pred, i32 %N) {
+; CHECK-LABEL: 'optimized_range_check_unsigned2'
+; CHECK-NEXT:  Classifying expressions for: @optimized_range_check_unsigned2
+; CHECK-NEXT:    %N.off = add i32 %N, -2
+; CHECK-NEXT:    --> (-2 + %N) U: full-set S: full-set
+; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,21) S: [0,21) Exits: (-1 + %N) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %gep = getelementptr inbounds i16, i16* %pred, i32 %iv
+; CHECK-NEXT:    --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %iv.next = add nuw nsw i32 %iv, 1
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,22) S: [1,22) Exits: %N LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @optimized_range_check_unsigned2
+; CHECK-NEXT:  Loop %loop: backedge-taken count is (-1 + %N)
+; CHECK-NEXT:  Loop %loop: max backedge-taken count is 20
+; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is (-1 + %N)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %loop: Trip multiple is 1
+;
+entry:
+  %N.off = add i32 %N, -2
+  %cmp = icmp ult i32 %N.off, 20
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep = getelementptr inbounds i16, i16* %pred, i32 %iv
+  store i16 0, i16* %gep, align 2
+  %iv.next = add nuw nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, %N
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
 ; Similar to @optimized_range_check_unsigned, but the initial compare checks
 ; against unsigned max (-1), which breaks the range check idiom.
 define void @not_optimized_range_check_unsigned1(i16* %pred, i32 %N) {