[llvm] 8b19cbf - Reland "[IRCE] Parse range checks in the form of 'LHS - RHS vs Limit'"

Aleksandr Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 11 09:48:30 PDT 2023


Author: Aleksandr Popov
Date: 2023-07-11T18:48:14+02:00
New Revision: 8b19cbfd772f8c2df43736610d0d08058508f93a

URL: https://github.com/llvm/llvm-project/commit/8b19cbfd772f8c2df43736610d0d08058508f93a
DIFF: https://github.com/llvm/llvm-project/commit/8b19cbfd772f8c2df43736610d0d08058508f93a.diff

LOG: Reland "[IRCE] Parse range checks in the form of 'LHS - RHS vs Limit'"

This reverts commit 4c6f95be29c6ce0f89663a5103c58ee63d76cda3

and relands e16c5c092205f68825466c25a1dd30783c4820f3

https://reviews.llvm.org/D154069

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
    llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 5f9bad37e7ab73..5ab688235c72a9 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -119,6 +119,16 @@ static cl::opt<bool> AllowNarrowLatchCondition(
     cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
              "with narrow latch condition."));
 
+static cl::opt<unsigned> MaxTypeSizeForOverflowCheck(
+    "irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32),
+    cl::desc(
+        "Maximum size of range check type for which can be produced runtime "
+        "overflow check of its limit's computation"));
+
+static cl::opt<bool>
+    PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks",
+                                   cl::Hidden, cl::init(false));
+
 static const char *ClonedLoopTag = "irce.loop.clone";
 
 #define DEBUG_TYPE "irce"
@@ -156,6 +166,10 @@ class InductiveRangeCheck {
                                   const SCEVAddRecExpr *&Index,
                                   const SCEV *&End);
 
+  static bool reassociateSubLHS(Loop *L, Value *VariantLHS, Value *InvariantRHS,
+                                ICmpInst::Predicate Pred, ScalarEvolution &SE,
+                                const SCEVAddRecExpr *&Index, const SCEV *&End);
+
 public:
   const SCEV *getBegin() const { return Begin; }
   const SCEV *getStep() const { return Step; }
@@ -281,6 +295,10 @@ bool InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
   if (parseIvAgaisntLimit(L, LHS, RHS, Pred, SE, Index, End))
     return true;
 
+  if (reassociateSubLHS(L, LHS, RHS, Pred, SE, Index, End))
+    return true;
+
+  // TODO: support ReassociateAddLHS
   return false;
 }
 
@@ -346,6 +364,126 @@ bool InductiveRangeCheck::parseIvAgaisntLimit(Loop *L, Value *LHS, Value *RHS,
   llvm_unreachable("default clause returns!");
 }
 
+// Try to parse range check in the form of "IV - Offset vs Limit" or "Offset -
+// IV vs Limit"
+bool InductiveRangeCheck::reassociateSubLHS(
+    Loop *L, Value *VariantLHS, Value *InvariantRHS, ICmpInst::Predicate Pred,
+    ScalarEvolution &SE, const SCEVAddRecExpr *&Index, const SCEV *&End) {
+  Value *LHS, *RHS;
+  if (!match(VariantLHS, m_Sub(m_Value(LHS), m_Value(RHS))))
+    return false;
+
+  const SCEV *IV = SE.getSCEV(LHS);
+  const SCEV *Offset = SE.getSCEV(RHS);
+  const SCEV *Limit = SE.getSCEV(InvariantRHS);
+
+  bool OffsetSubtracted = false;
+  if (SE.isLoopInvariant(IV, L))
+    // "Offset - IV vs Limit"
+    std::swap(IV, Offset);
+  else if (SE.isLoopInvariant(Offset, L))
+    // "IV - Offset vs Limit"
+    OffsetSubtracted = true;
+  else
+    return false;
+
+  const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IV);
+  if (!AddRec)
+    return false;
+
+  // In order to turn "IV - Offset < Limit" into "IV < Limit + Offset", we need
+  // to be able to freely move values from left side of inequality to right side
+  // (just as in normal linear arithmetics). Overflows make things much more
+  // complicated, so we want to avoid this.
+  //
+  // Let's prove that the initial subtraction doesn't overflow with all IV's
+  // values from the safe range constructed for that check.
+  //
+  // [Case 1] IV - Offset < Limit
+  // It doesn't overflow if:
+  //     SINT_MIN <= IV - Offset <= SINT_MAX
+  // In terms of scaled SINT we need to prove:
+  //     SINT_MIN + Offset <= IV <= SINT_MAX + Offset
+  // Safe range will be constructed:
+  //     0 <= IV < Limit + Offset
+  // It means that 'IV - Offset' doesn't underflow, because:
+  //     SINT_MIN + Offset < 0 <= IV
+  // and doesn't overflow:
+  //     IV < Limit + Offset <= SINT_MAX + Offset
+  //
+  // [Case 2] Offset - IV > Limit
+  // It doesn't overflow if:
+  //     SINT_MIN <= Offset - IV <= SINT_MAX
+  // In terms of scaled SINT we need to prove:
+  //     -SINT_MIN >= IV - Offset >= -SINT_MAX
+  //     Offset - SINT_MIN >= IV >= Offset - SINT_MAX
+  // Safe range will be constructed:
+  //     0 <= IV < Offset - Limit
+  // It means that 'Offset - IV' doesn't underflow, because
+  //     Offset - SINT_MAX < 0 <= IV
+  // and doesn't overflow:
+  //     IV < Offset - Limit <= Offset - SINT_MIN
+  //
+  // For the computed upper boundary of the IV's range (Offset +/- Limit) we
+  // don't know exactly whether it overflows or not. So if we can't prove this
+  // fact at compile time, we scale boundary computations to a wider type with
+  // the intention to add runtime overflow check.
+
+  auto getExprScaledIfOverflow = [&](Instruction::BinaryOps BinOp,
+                                     const SCEV *LHS,
+                                     const SCEV *RHS) -> const SCEV * {
+    const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+                                              SCEV::NoWrapFlags, unsigned);
+    switch (BinOp) {
+    default:
+      llvm_unreachable("Unsupported binary op");
+    case Instruction::Add:
+      Operation = &ScalarEvolution::getAddExpr;
+      break;
+    case Instruction::Sub:
+      Operation = &ScalarEvolution::getMinusSCEV;
+      break;
+    }
+
+    if (SE.willNotOverflow(BinOp, ICmpInst::isSigned(Pred), LHS, RHS,
+                           cast<Instruction>(VariantLHS)))
+      return (SE.*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0);
+
+    // We couldn't prove that the expression does not overflow.
+    // Than scale it to a wider type to check overflow at runtime.
+    auto *Ty = cast<IntegerType>(LHS->getType());
+    if (Ty->getBitWidth() > MaxTypeSizeForOverflowCheck)
+      return nullptr;
+
+    auto WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+    return (SE.*Operation)(SE.getSignExtendExpr(LHS, WideTy),
+                           SE.getSignExtendExpr(RHS, WideTy), SCEV::FlagAnyWrap,
+                           0);
+  };
+
+  if (OffsetSubtracted)
+    // "IV - Offset < Limit" -> "IV" < Offset + Limit
+    Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Offset, Limit);
+  else {
+    // "Offset - IV > Limit" -> "IV" < Offset - Limit
+    Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Sub, Offset, Limit);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+    // "Expr <= Limit" -> "Expr < Limit + 1"
+    if (Pred == ICmpInst::ICMP_SLE && Limit)
+      Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Limit,
+                                      SE.getOne(Limit->getType()));
+    if (Limit) {
+      Index = AddRec;
+      End = Limit;
+      return true;
+    }
+  }
+  return false;
+}
+
 void InductiveRangeCheck::extractRangeChecksFromCond(
     Loop *L, ScalarEvolution &SE, Use &ConditionUse,
     SmallVectorImpl<InductiveRangeCheck> &Checks,
@@ -1593,11 +1731,34 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
   // if latch check is more narrow.
   auto *IVType = dyn_cast<IntegerType>(IndVar->getType());
   auto *RCType = dyn_cast<IntegerType>(getBegin()->getType());
+  auto *EndType = dyn_cast<IntegerType>(getEnd()->getType());
   // Do not work with pointer types.
   if (!IVType || !RCType)
     return std::nullopt;
   if (IVType->getBitWidth() > RCType->getBitWidth())
     return std::nullopt;
+
+  auto PrintRangeCheck = [&](raw_ostream &OS) {
+    auto L = IndVar->getLoop();
+    OS << "irce: in function ";
+    OS << L->getHeader()->getParent()->getName();
+    OS << ", in ";
+    L->print(OS);
+    OS << "there is range check with scaled boundary:\n";
+    print(OS);
+  };
+
+  if (EndType->getBitWidth() > RCType->getBitWidth()) {
+    assert(EndType->getBitWidth() == RCType->getBitWidth() * 2);
+    if (PrintScaledBoundaryRangeChecks)
+      PrintRangeCheck(errs());
+    // End is computed with extended type but will be truncated to a narrow one
+    // type of range check. Therefore we need a check that the result will not
+    // overflow in terms of narrow type.
+    // TODO: Support runtime overflow check for End
+    return std::nullopt;
+  }
+
   // IndVar is of the form "A + B * I" (where "I" is the canonical induction
   // variable, that may or may not exist as a real llvm::Value in the loop) and
   // this inductive range check is a range check on the "C + D * I" ("C" is

diff  --git a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
index fbab857a2f0556..f1d645fa1138e5 100644
--- a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
+++ b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
@@ -1,5 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -verify-loop-info -passes=irce -S < %s 2>&1 | FileCheck %s
+; RUN: opt -verify-loop-info -passes=irce -irce-print-scaled-boundary-range-checks -S < %s 2>&1 | FileCheck %s
+
+
+; CHECK: irce: in function test1, in Loop at depth 1 containing: %loop<header><exiting>,%inbounds<latch><exiting>
+; CHECK-NEXT: there is range check with scaled boundary:
+; CHECK-NEXT: InductiveRangeCheck:
+; CHECK-NEXT:   Begin: 0  Step: 1  End: (-1 + (sext i8 %n to i16))<nsw>
+; CHECK-NEXT:   CheckUse:   br i1 %check, label %inbounds, label %out_of_bounds Operand: 0
+;
+; CHECK-NEXT: irce: in function test4, in Loop at depth 1 containing: %loop<header><exiting>,%inbounds<latch><exiting>
+; CHECK-NEXT: there is range check with scaled boundary:
+; CHECK-NEXT: InductiveRangeCheck:
+; CHECK-NEXT:   Begin: 0  Step: 1  End: (-2 + (sext i8 %n to i16))<nsw>
+; CHECK-NEXT:   CheckUse:   br i1 %check, label %inbounds, label %out_of_bounds Operand: 0
+;
+; CHECK-NEXT: irce: in function test_overflow_check_runtime, in Loop at depth 1 containing: %loop<header><exiting>,%inbounds<latch><exiting>
+; CHECK-NEXT: there is range check with scaled boundary:
+; CHECK-NEXT: InductiveRangeCheck:
+; CHECK-NEXT:   Begin: 0  Step: 1  End: (3 + (zext i8 %n to i16))<nuw><nsw>
+; CHECK-NEXT:   CheckUse:   br i1 %check, label %inbounds, label %out_of_bounds Operand: 0
 
 ; IV = 0; IV <s limit; IV += 1;
 ;   Check(N - IV >= 2)
@@ -66,25 +85,64 @@ define i8 @test1a(i8 %limit, ptr %p) {
 ; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
 ; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i8 [[N]], -1
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP0]], i8 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i8 [[SMIN]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[SMIN2:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP2]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN2]], i8 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader4:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
 ; CHECK-NEXT:    [[CHECK:%.*]] = icmp sge i8 [[SUB]], 2
-; CHECK-NEXT:    br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]]
 ; CHECK:       inbounds:
 ; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA3:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
-; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA3]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit5:
+; CHECK-NEXT:    [[IDX_LCSSA_PH6:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out_of_bounds:
-; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH6]], [[OUT_OF_BOUNDS_LOOPEXIT5]] ]
 ; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]]
+; CHECK-NEXT:    [[CHECK_POSTLOOP:%.*]] = icmp sge i8 [[SUB_POSTLOOP]], 2
+; CHECK-NEXT:    br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]]
+; CHECK:       inbounds.postloop:
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone [[META6:![0-9]+]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -338,7 +396,7 @@ define i8 @test3a(i8 %limit, ptr %p) {
 ; CHECK:       inbounds.postloop:
 ; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
 ; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone !6
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -429,25 +487,69 @@ define i8 @test4a(i8 %limit, ptr %p) {
 ; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
 ; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[N]], -2
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i8 [[N]], 127
+; CHECK-NEXT:    [[SMAX:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 [[TMP0]], [[SMAX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i8 [[N]], -2
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP3]], i8 0)
+; CHECK-NEXT:    [[SMAX2:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 -1)
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i8 [[SMAX2]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[SMIN3:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP5]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN3]], i8 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER6:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader6:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER6]] ]
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
 ; CHECK-NEXT:    [[CHECK:%.*]] = icmp sgt i8 [[SUB]], 2
-; CHECK-NEXT:    br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT7:%.*]]
 ; CHECK:       inbounds:
 ; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA5:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
-; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA5]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit7:
+; CHECK-NEXT:    [[IDX_LCSSA_PH8:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out_of_bounds:
-; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH8]], [[OUT_OF_BOUNDS_LOOPEXIT7]] ]
 ; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]]
+; CHECK-NEXT:    [[CHECK_POSTLOOP:%.*]] = icmp sgt i8 [[SUB_POSTLOOP]], 2
+; CHECK-NEXT:    br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]]
+; CHECK:       inbounds.postloop:
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -652,7 +754,7 @@ define i8 @test6(i8 %limit, i8 %n) {
 ; CHECK:       inbounds.postloop:
 ; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
 ; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone !6
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %precheck = icmp sgt i8 %limit, 0
@@ -743,7 +845,7 @@ define i8 @test6a(i8 %limit, ptr %p) {
 ; CHECK:       inbounds.postloop:
 ; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
 ; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone !6
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -769,4 +871,158 @@ out_of_bounds:
   ret i8 %idx;
 }
 
+; IV = 0; IV <s limit; IV += 1;
+;   Check(N - IV > -2)
+;
+; IRCE is allowed.
+; IRCE will reassociate this range check to the 'IV < N + 2',
+; since N < 126 no-overflow fact is provable at compile time.
+define i8 @test_overflow_check_compile_time(i8 %limit, ptr %p) {
+; CHECK-LABEL: define i8 @test_overflow_check_compile_time
+; CHECK-SAME: (i8 [[LIMIT:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[N:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG11:![0-9]+]]
+; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
+; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i8 [[N]], 2
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP0]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_PREHEADER3:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader3:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER3]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
+; CHECK-NEXT:    [[CHECK:%.*]] = icmp sgt i8 [[SUB]], -2
+; CHECK-NEXT:    br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT4:%.*]]
+; CHECK:       inbounds:
+; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA2:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA2]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit4:
+; CHECK-NEXT:    [[IDX_LCSSA_PH5:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+; CHECK:       out_of_bounds:
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH5]], [[OUT_OF_BOUNDS_LOOPEXIT4]] ]
+; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]]
+; CHECK-NEXT:    [[CHECK_POSTLOOP:%.*]] = icmp sgt i8 [[SUB_POSTLOOP]], -2
+; CHECK-NEXT:    br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]]
+; CHECK:       inbounds.postloop:
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP12:![0-9]+]], !irce.loop.clone [[META6]]
+;
+entry:
+  %n = load i8, ptr %p, !range !1
+  %precheck = icmp sgt i8 %limit, 0
+  br i1 %precheck, label %loop, label %exit
+
+loop:
+  %idx = phi i8 [ %idx.next, %inbounds ], [ 0, %entry ]
+  %sub = sub i8 %n, %idx
+  %check = icmp sgt i8 %sub, -2
+  br i1 %check, label %inbounds, label %out_of_bounds
+
+inbounds:
+  %idx.next = add nuw i8 %idx, 1
+  %cmp = icmp slt i8 %idx.next, %limit
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %res = phi i8 [ 0, %entry ], [ %idx, %inbounds ]
+  ret i8 %res
+
+out_of_bounds:
+  ret i8 %idx;
+}
+
+; IV = 0; IV <s limit; IV += 1;
+;   Check(N - IV >= -2)
+;
+; TODO: IRCE is allowed.
+; IRCE will reassociate this range check to the 'IV < (N + 2) + 1',
+; since N < 126 no-overflow fact is NOT provable at compile time and
+; runtime overflow check is required.
+define i8 @test_overflow_check_runtime(i8 %limit, ptr %p) {
+; CHECK-LABEL: define i8 @test_overflow_check_runtime
+; CHECK-SAME: (i8 [[LIMIT:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[N:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG11]]
+; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
+; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
+; CHECK-NEXT:    [[CHECK:%.*]] = icmp sge i8 [[SUB]], -2
+; CHECK-NEXT:    br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       inbounds:
+; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds:
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+;
+entry:
+  %n = load i8, ptr %p, !range !1
+  %precheck = icmp sgt i8 %limit, 0
+  br i1 %precheck, label %loop, label %exit
+
+loop:
+  %idx = phi i8 [ %idx.next, %inbounds ], [ 0, %entry ]
+  %sub = sub i8 %n, %idx
+  %check = icmp sge i8 %sub, -2
+  br i1 %check, label %inbounds, label %out_of_bounds
+
+inbounds:
+  %idx.next = add nuw i8 %idx, 1
+  %cmp = icmp slt i8 %idx.next, %limit
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %res = phi i8 [ 0, %entry ], [ %idx, %inbounds ]
+  ret i8 %res
+
+out_of_bounds:
+  ret i8 %idx;
+}
+
 !0 = !{i8 0, i8 127}
+!1 = !{i8 0, i8 126}


        


More information about the llvm-commits mailing list