[llvm] e16c5c0 - [IRCE] Parse range checks in the form of 'LHS - RHS vs Limit'

Aleksandr Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 10 04:01:08 PDT 2023


Author: Aleksandr Popov
Date: 2023-07-10T13:00:38+02:00
New Revision: e16c5c092205f68825466c25a1dd30783c4820f3

URL: https://github.com/llvm/llvm-project/commit/e16c5c092205f68825466c25a1dd30783c4820f3
DIFF: https://github.com/llvm/llvm-project/commit/e16c5c092205f68825466c25a1dd30783c4820f3.diff

LOG: [IRCE] Parse range checks in the form of 'LHS - RHS vs Limit'

Introduced the following range checks forms parsing:
* IV - Offset vs Limit
* Offset - IV vs Limit

Range's end boundary is computed as (Offset +/- Limit ).

If it's not possible to prove at compile time that computed upper bound
will not overflow, then scale boundary computation to a wider type to
perform overflow check at runtime.

Runtime overflow will be implemented in the next patch. In the meantime
safe range for such kind of checks isn't computed.

Reviewed By: skatkov

Differential Revision: https://reviews.llvm.org/D154069

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
    llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 5f9bad37e7ab73..5ab688235c72a9 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -119,6 +119,16 @@ static cl::opt<bool> AllowNarrowLatchCondition(
     cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
              "with narrow latch condition."));
 
+static cl::opt<unsigned> MaxTypeSizeForOverflowCheck(
+    "irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32),
+    cl::desc(
+        "Maximum size of range check type for which can be produced runtime "
+        "overflow check of its limit's computation"));
+
+static cl::opt<bool>
+    PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks",
+                                   cl::Hidden, cl::init(false));
+
 static const char *ClonedLoopTag = "irce.loop.clone";
 
 #define DEBUG_TYPE "irce"
@@ -156,6 +166,10 @@ class InductiveRangeCheck {
                                   const SCEVAddRecExpr *&Index,
                                   const SCEV *&End);
 
+  static bool reassociateSubLHS(Loop *L, Value *VariantLHS, Value *InvariantRHS,
+                                ICmpInst::Predicate Pred, ScalarEvolution &SE,
+                                const SCEVAddRecExpr *&Index, const SCEV *&End);
+
 public:
   const SCEV *getBegin() const { return Begin; }
   const SCEV *getStep() const { return Step; }
@@ -281,6 +295,10 @@ bool InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
   if (parseIvAgaisntLimit(L, LHS, RHS, Pred, SE, Index, End))
     return true;
 
+  if (reassociateSubLHS(L, LHS, RHS, Pred, SE, Index, End))
+    return true;
+
+  // TODO: support ReassociateAddLHS
   return false;
 }
 
@@ -346,6 +364,126 @@ bool InductiveRangeCheck::parseIvAgaisntLimit(Loop *L, Value *LHS, Value *RHS,
   llvm_unreachable("default clause returns!");
 }
 
+// Try to parse range check in the form of "IV - Offset vs Limit" or "Offset -
+// IV vs Limit"
+bool InductiveRangeCheck::reassociateSubLHS(
+    Loop *L, Value *VariantLHS, Value *InvariantRHS, ICmpInst::Predicate Pred,
+    ScalarEvolution &SE, const SCEVAddRecExpr *&Index, const SCEV *&End) {
+  Value *LHS, *RHS;
+  if (!match(VariantLHS, m_Sub(m_Value(LHS), m_Value(RHS))))
+    return false;
+
+  const SCEV *IV = SE.getSCEV(LHS);
+  const SCEV *Offset = SE.getSCEV(RHS);
+  const SCEV *Limit = SE.getSCEV(InvariantRHS);
+
+  bool OffsetSubtracted = false;
+  if (SE.isLoopInvariant(IV, L))
+    // "Offset - IV vs Limit"
+    std::swap(IV, Offset);
+  else if (SE.isLoopInvariant(Offset, L))
+    // "IV - Offset vs Limit"
+    OffsetSubtracted = true;
+  else
+    return false;
+
+  const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IV);
+  if (!AddRec)
+    return false;
+
+  // In order to turn "IV - Offset < Limit" into "IV < Limit + Offset", we need
+  // to be able to freely move values from left side of inequality to right side
+  // (just as in normal linear arithmetics). Overflows make things much more
+  // complicated, so we want to avoid this.
+  //
+  // Let's prove that the initial subtraction doesn't overflow with all IV's
+  // values from the safe range constructed for that check.
+  //
+  // [Case 1] IV - Offset < Limit
+  // It doesn't overflow if:
+  //     SINT_MIN <= IV - Offset <= SINT_MAX
+  // In terms of scaled SINT we need to prove:
+  //     SINT_MIN + Offset <= IV <= SINT_MAX + Offset
+  // Safe range will be constructed:
+  //     0 <= IV < Limit + Offset
+  // It means that 'IV - Offset' doesn't underflow, because:
+  //     SINT_MIN + Offset < 0 <= IV
+  // and doesn't overflow:
+  //     IV < Limit + Offset <= SINT_MAX + Offset
+  //
+  // [Case 2] Offset - IV > Limit
+  // It doesn't overflow if:
+  //     SINT_MIN <= Offset - IV <= SINT_MAX
+  // In terms of scaled SINT we need to prove:
+  //     -SINT_MIN >= IV - Offset >= -SINT_MAX
+  //     Offset - SINT_MIN >= IV >= Offset - SINT_MAX
+  // Safe range will be constructed:
+  //     0 <= IV < Offset - Limit
+  // It means that 'Offset - IV' doesn't underflow, because
+  //     Offset - SINT_MAX < 0 <= IV
+  // and doesn't overflow:
+  //     IV < Offset - Limit <= Offset - SINT_MIN
+  //
+  // For the computed upper boundary of the IV's range (Offset +/- Limit) we
+  // don't know exactly whether it overflows or not. So if we can't prove this
+  // fact at compile time, we scale boundary computations to a wider type with
+  // the intention to add runtime overflow check.
+
+  auto getExprScaledIfOverflow = [&](Instruction::BinaryOps BinOp,
+                                     const SCEV *LHS,
+                                     const SCEV *RHS) -> const SCEV * {
+    const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+                                              SCEV::NoWrapFlags, unsigned);
+    switch (BinOp) {
+    default:
+      llvm_unreachable("Unsupported binary op");
+    case Instruction::Add:
+      Operation = &ScalarEvolution::getAddExpr;
+      break;
+    case Instruction::Sub:
+      Operation = &ScalarEvolution::getMinusSCEV;
+      break;
+    }
+
+    if (SE.willNotOverflow(BinOp, ICmpInst::isSigned(Pred), LHS, RHS,
+                           cast<Instruction>(VariantLHS)))
+      return (SE.*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0);
+
+    // We couldn't prove that the expression does not overflow.
+    // Than scale it to a wider type to check overflow at runtime.
+    auto *Ty = cast<IntegerType>(LHS->getType());
+    if (Ty->getBitWidth() > MaxTypeSizeForOverflowCheck)
+      return nullptr;
+
+    auto WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+    return (SE.*Operation)(SE.getSignExtendExpr(LHS, WideTy),
+                           SE.getSignExtendExpr(RHS, WideTy), SCEV::FlagAnyWrap,
+                           0);
+  };
+
+  if (OffsetSubtracted)
+    // "IV - Offset < Limit" -> "IV" < Offset + Limit
+    Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Offset, Limit);
+  else {
+    // "Offset - IV > Limit" -> "IV" < Offset - Limit
+    Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Sub, Offset, Limit);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+    // "Expr <= Limit" -> "Expr < Limit + 1"
+    if (Pred == ICmpInst::ICMP_SLE && Limit)
+      Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Limit,
+                                      SE.getOne(Limit->getType()));
+    if (Limit) {
+      Index = AddRec;
+      End = Limit;
+      return true;
+    }
+  }
+  return false;
+}
+
 void InductiveRangeCheck::extractRangeChecksFromCond(
     Loop *L, ScalarEvolution &SE, Use &ConditionUse,
     SmallVectorImpl<InductiveRangeCheck> &Checks,
@@ -1593,11 +1731,34 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
   // if latch check is more narrow.
   auto *IVType = dyn_cast<IntegerType>(IndVar->getType());
   auto *RCType = dyn_cast<IntegerType>(getBegin()->getType());
+  auto *EndType = dyn_cast<IntegerType>(getEnd()->getType());
   // Do not work with pointer types.
   if (!IVType || !RCType)
     return std::nullopt;
   if (IVType->getBitWidth() > RCType->getBitWidth())
     return std::nullopt;
+
+  auto PrintRangeCheck = [&](raw_ostream &OS) {
+    auto L = IndVar->getLoop();
+    OS << "irce: in function ";
+    OS << L->getHeader()->getParent()->getName();
+    OS << ", in ";
+    L->print(OS);
+    OS << "there is range check with scaled boundary:\n";
+    print(OS);
+  };
+
+  if (EndType->getBitWidth() > RCType->getBitWidth()) {
+    assert(EndType->getBitWidth() == RCType->getBitWidth() * 2);
+    if (PrintScaledBoundaryRangeChecks)
+      PrintRangeCheck(errs());
+    // End is computed with extended type but will be truncated to a narrow one
+    // type of range check. Therefore we need a check that the result will not
+    // overflow in terms of narrow type.
+    // TODO: Support runtime overflow check for End
+    return std::nullopt;
+  }
+
   // IndVar is of the form "A + B * I" (where "I" is the canonical induction
   // variable, that may or may not exist as a real llvm::Value in the loop) and
   // this inductive range check is a range check on the "C + D * I" ("C" is

diff  --git a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
index fbab857a2f0556..f1d645fa1138e5 100644
--- a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
+++ b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
@@ -1,5 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -verify-loop-info -passes=irce -S < %s 2>&1 | FileCheck %s
+; RUN: opt -verify-loop-info -passes=irce -irce-print-scaled-boundary-range-checks -S < %s 2>&1 | FileCheck %s
+
+
+; CHECK: irce: in function test1, in Loop at depth 1 containing: %loop<header><exiting>,%inbounds<latch><exiting>
+; CHECK-NEXT: there is range check with scaled boundary:
+; CHECK-NEXT: InductiveRangeCheck:
+; CHECK-NEXT:   Begin: 0  Step: 1  End: (-1 + (sext i8 %n to i16))<nsw>
+; CHECK-NEXT:   CheckUse:   br i1 %check, label %inbounds, label %out_of_bounds Operand: 0
+;
+; CHECK-NEXT: irce: in function test4, in Loop at depth 1 containing: %loop<header><exiting>,%inbounds<latch><exiting>
+; CHECK-NEXT: there is range check with scaled boundary:
+; CHECK-NEXT: InductiveRangeCheck:
+; CHECK-NEXT:   Begin: 0  Step: 1  End: (-2 + (sext i8 %n to i16))<nsw>
+; CHECK-NEXT:   CheckUse:   br i1 %check, label %inbounds, label %out_of_bounds Operand: 0
+;
+; CHECK-NEXT: irce: in function test_overflow_check_runtime, in Loop at depth 1 containing: %loop<header><exiting>,%inbounds<latch><exiting>
+; CHECK-NEXT: there is range check with scaled boundary:
+; CHECK-NEXT: InductiveRangeCheck:
+; CHECK-NEXT:   Begin: 0  Step: 1  End: (3 + (zext i8 %n to i16))<nuw><nsw>
+; CHECK-NEXT:   CheckUse:   br i1 %check, label %inbounds, label %out_of_bounds Operand: 0
 
 ; IV = 0; IV <s limit; IV += 1;
 ;   Check(N - IV >= 2)
@@ -66,25 +85,64 @@ define i8 @test1a(i8 %limit, ptr %p) {
 ; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
 ; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i8 [[N]], -1
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP0]], i8 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i8 [[SMIN]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[SMIN2:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP2]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN2]], i8 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader4:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
 ; CHECK-NEXT:    [[CHECK:%.*]] = icmp sge i8 [[SUB]], 2
-; CHECK-NEXT:    br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]]
 ; CHECK:       inbounds:
 ; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA3:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
-; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA3]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit5:
+; CHECK-NEXT:    [[IDX_LCSSA_PH6:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out_of_bounds:
-; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH6]], [[OUT_OF_BOUNDS_LOOPEXIT5]] ]
 ; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]]
+; CHECK-NEXT:    [[CHECK_POSTLOOP:%.*]] = icmp sge i8 [[SUB_POSTLOOP]], 2
+; CHECK-NEXT:    br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]]
+; CHECK:       inbounds.postloop:
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone [[META6:![0-9]+]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -338,7 +396,7 @@ define i8 @test3a(i8 %limit, ptr %p) {
 ; CHECK:       inbounds.postloop:
 ; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
 ; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone !6
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -429,25 +487,69 @@ define i8 @test4a(i8 %limit, ptr %p) {
 ; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
 ; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[N]], -2
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i8 [[N]], 127
+; CHECK-NEXT:    [[SMAX:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 [[TMP0]], [[SMAX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i8 [[N]], -2
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP3]], i8 0)
+; CHECK-NEXT:    [[SMAX2:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 -1)
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i8 [[SMAX2]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[SMIN3:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP5]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN3]], i8 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER6:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader6:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER6]] ]
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
 ; CHECK-NEXT:    [[CHECK:%.*]] = icmp sgt i8 [[SUB]], 2
-; CHECK-NEXT:    br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT7:%.*]]
 ; CHECK:       inbounds:
 ; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA5:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
-; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA5]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
 ; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit7:
+; CHECK-NEXT:    [[IDX_LCSSA_PH8:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
 ; CHECK:       out_of_bounds:
-; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH8]], [[OUT_OF_BOUNDS_LOOPEXIT7]] ]
 ; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]]
+; CHECK-NEXT:    [[CHECK_POSTLOOP:%.*]] = icmp sgt i8 [[SUB_POSTLOOP]], 2
+; CHECK-NEXT:    br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]]
+; CHECK:       inbounds.postloop:
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -652,7 +754,7 @@ define i8 @test6(i8 %limit, i8 %n) {
 ; CHECK:       inbounds.postloop:
 ; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
 ; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone !6
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %precheck = icmp sgt i8 %limit, 0
@@ -743,7 +845,7 @@ define i8 @test6a(i8 %limit, ptr %p) {
 ; CHECK:       inbounds.postloop:
 ; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
 ; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
-; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone !6
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]], !irce.loop.clone [[META6]]
 ;
 entry:
   %n = load i8, ptr %p, !range !0
@@ -769,4 +871,158 @@ out_of_bounds:
   ret i8 %idx;
 }
 
+; IV = 0; IV <s limit; IV += 1;
+;   Check(N - IV > -2)
+;
+; IRCE is allowed.
+; IRCE will reassociate this range check to the 'IV < N + 2',
+; since N < 126 no-overflow fact is provable at compile time.
+define i8 @test_overflow_check_compile_time(i8 %limit, ptr %p) {
+; CHECK-LABEL: define i8 @test_overflow_check_compile_time
+; CHECK-SAME: (i8 [[LIMIT:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[N:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG11:![0-9]+]]
+; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
+; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i8 [[N]], 2
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP0]])
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_PREHEADER3:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader3:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER3]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
+; CHECK-NEXT:    [[CHECK:%.*]] = icmp sgt i8 [[SUB]], -2
+; CHECK-NEXT:    br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT4:%.*]]
+; CHECK:       inbounds:
+; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[IDX_LCSSA2:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA2]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out_of_bounds.loopexit4:
+; CHECK-NEXT:    [[IDX_LCSSA_PH5:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+; CHECK:       out_of_bounds:
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH5]], [[OUT_OF_BOUNDS_LOOPEXIT4]] ]
+; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]]
+; CHECK-NEXT:    [[CHECK_POSTLOOP:%.*]] = icmp sgt i8 [[SUB_POSTLOOP]], -2
+; CHECK-NEXT:    br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]]
+; CHECK:       inbounds.postloop:
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP12:![0-9]+]], !irce.loop.clone [[META6]]
+;
+entry:
+  %n = load i8, ptr %p, !range !1
+  %precheck = icmp sgt i8 %limit, 0
+  br i1 %precheck, label %loop, label %exit
+
+loop:
+  %idx = phi i8 [ %idx.next, %inbounds ], [ 0, %entry ]
+  %sub = sub i8 %n, %idx
+  %check = icmp sgt i8 %sub, -2
+  br i1 %check, label %inbounds, label %out_of_bounds
+
+inbounds:
+  %idx.next = add nuw i8 %idx, 1
+  %cmp = icmp slt i8 %idx.next, %limit
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %res = phi i8 [ 0, %entry ], [ %idx, %inbounds ]
+  ret i8 %res
+
+out_of_bounds:
+  ret i8 %idx;
+}
+
+; IV = 0; IV <s limit; IV += 1;
+;   Check(N - IV >= -2)
+;
+; TODO: IRCE is allowed.
+; IRCE will reassociate this range check to the 'IV < (N + 2) + 1',
+; since N < 126 no-overflow fact is NOT provable at compile time and
+; runtime overflow check is required.
+define i8 @test_overflow_check_runtime(i8 %limit, ptr %p) {
+; CHECK-LABEL: define i8 @test_overflow_check_runtime
+; CHECK-SAME: (i8 [[LIMIT:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[N:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG11]]
+; CHECK-NEXT:    [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0
+; CHECK-NEXT:    br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[N]], [[IDX]]
+; CHECK-NEXT:    [[CHECK:%.*]] = icmp sge i8 [[SUB]], -2
+; CHECK-NEXT:    br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       inbounds:
+; CHECK-NEXT:    [[IDX_NEXT]] = add nuw i8 [[IDX]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK:       out_of_bounds:
+; CHECK-NEXT:    [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ]
+; CHECK-NEXT:    ret i8 [[IDX_LCSSA]]
+;
+entry:
+  %n = load i8, ptr %p, !range !1
+  %precheck = icmp sgt i8 %limit, 0
+  br i1 %precheck, label %loop, label %exit
+
+loop:
+  %idx = phi i8 [ %idx.next, %inbounds ], [ 0, %entry ]
+  %sub = sub i8 %n, %idx
+  %check = icmp sge i8 %sub, -2
+  br i1 %check, label %inbounds, label %out_of_bounds
+
+inbounds:
+  %idx.next = add nuw i8 %idx, 1
+  %cmp = icmp slt i8 %idx.next, %limit
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %res = phi i8 [ 0, %entry ], [ %idx, %inbounds ]
+  ret i8 %res
+
+out_of_bounds:
+  ret i8 %idx;
+}
+
 !0 = !{i8 0, i8 127}
+!1 = !{i8 0, i8 126}


        


More information about the llvm-commits mailing list