[llvm] r310032 - [IRCE] Handle loops with step different from 1/-1
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 4 00:01:05 PDT 2017
Author: mkazantsev
Date: Fri Aug 4 00:01:04 2017
New Revision: 310032
URL: http://llvm.org/viewvc/llvm-project?rev=310032&view=rev
Log:
[IRCE] Handle loops with step different from 1/-1
This patch generalizes IRCE to handle IV steps that are not equal to 1 or -1.
Differential Revision: https://reviews.llvm.org/D35539
Added:
llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp?rev=310032&r1=310031&r2=310032&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp Fri Aug 4 00:01:04 2017
@@ -457,6 +457,7 @@ struct LoopStructure {
Value *IndVarNext;
Value *IndVarStart;
+ Value *IndVarStep;
Value *LoopExitAt;
bool IndVarIncreasing;
bool IsSignedPredicate;
@@ -464,8 +465,8 @@ struct LoopStructure {
LoopStructure()
: Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
- IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false),
- IsSignedPredicate(true) {}
+ IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr),
+ IndVarIncreasing(false), IsSignedPredicate(true) {}
template <typename M> LoopStructure map(M Map) const {
LoopStructure Result;
@@ -477,6 +478,7 @@ struct LoopStructure {
Result.LatchBrExitIdx = LatchBrExitIdx;
Result.IndVarNext = Map(IndVarNext);
Result.IndVarStart = Map(IndVarStart);
+ Result.IndVarStep = Map(IndVarStep);
Result.LoopExitAt = Map(LoopExitAt);
Result.IndVarIncreasing = IndVarIncreasing;
Result.IsSignedPredicate = IsSignedPredicate;
@@ -660,6 +662,21 @@ static bool CanBeMax(ScalarEvolution &SE
SE.getUnsignedRange(S).contains(Max);
}
+static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2,
+ bool Signed) {
+ // S1 < INT_MAX - S2 ===> S1 + S2 < INT_MAX.
+ assert(SE.isKnownNonNegative(S2) &&
+ "We expected the 2nd arg to be non-negative!");
+ const SCEV *Max = SE.getConstant(
+ Signed ? APInt::getSignedMaxValue(
+ cast<IntegerType>(S1->getType())->getBitWidth())
+ : APInt::getMaxValue(
+ cast<IntegerType>(S1->getType())->getBitWidth()));
+ const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2);
+ return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S1, CapForS1);
+}
+
static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) {
APInt Min = Signed ?
APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth()) :
@@ -668,6 +685,21 @@ static bool CanBeMin(ScalarEvolution &SE
SE.getUnsignedRange(S).contains(Min);
}
+static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2,
+ bool Signed) {
+ // S1 > INT_MIN - S2 ===> S1 + S2 > INT_MIN.
+ assert(SE.isKnownNonPositive(S2) &&
+ "We expected the 2nd arg to be non-positive!");
+ const SCEV *Max = SE.getConstant(
+ Signed ? APInt::getSignedMinValue(
+ cast<IntegerType>(S1->getType())->getBitWidth())
+ : APInt::getMinValue(
+ cast<IntegerType>(S1->getType())->getBitWidth()));
+ const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2);
+ return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT,
+ S1, CapForS1);
+}
+
Optional<LoopStructure>
LoopStructure::parseLoopStructure(ScalarEvolution &SE,
BranchProbabilityInfo &BPI,
@@ -772,7 +804,11 @@ LoopStructure::parseLoopStructure(Scalar
return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
};
- auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
+ // Here we check whether the suggested AddRec is an induction variable that
+ // can be handled (i.e. with known constant step), and if yes, calculate its
+ // step and identify whether it is increasing or decreasing.
+ auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing,
+ ConstantInt *&StepCI) {
if (!AR->isAffine())
return false;
@@ -784,12 +820,10 @@ LoopStructure::parseLoopStructure(Scalar
if (const SCEVConstant *StepExpr =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
- ConstantInt *StepCI = StepExpr->getValue();
+ StepCI = StepExpr->getValue();
assert(!StepCI->isZero() && "Zero step?");
- if (StepCI->isOne() || StepCI->isMinusOne()) {
- IsIncreasing = StepCI->isOne();
- return true;
- }
+ IsIncreasing = !StepCI->isNegative();
+ return true;
}
return false;
@@ -801,7 +835,8 @@ LoopStructure::parseLoopStructure(Scalar
const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
bool IsIncreasing = false;
bool IsSignedPredicate = true;
- if (!IsInductionVar(IndVarNext, IsIncreasing)) {
+ ConstantInt *StepCI;
+ if (!IsInductionVar(IndVarNext, IsIncreasing, StepCI)) {
FailureReason = "LHS in icmp not induction variable";
return None;
}
@@ -809,34 +844,37 @@ LoopStructure::parseLoopStructure(Scalar
const SCEV *StartNext = IndVarNext->getStart();
const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+ const SCEV *Step = SE.getSCEV(StepCI);
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
if (IsIncreasing) {
bool DecreasedRightValueByOne = false;
- // Try to turn eq/ne predicates to those we can work with.
- if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
- // while (++i != len) { while (++i < len) {
- // ... ---> ...
- // } }
- // If both parts are known non-negative, it is profitable to use unsigned
- // comparison in increasing loop. This allows us to make the comparison
- // check against "RightSCEV + 1" more optimistic.
- if (SE.isKnownNonNegative(IndVarStart) &&
- SE.isKnownNonNegative(RightSCEV))
- Pred = ICmpInst::ICMP_ULT;
- else
- Pred = ICmpInst::ICMP_SLT;
- else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
- !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) {
- // while (true) { while (true) {
- // if (++i == len) ---> if (++i > len - 1)
- // break; break;
- // ... ...
- // } }
- // TODO: Insert ICMP_UGT if both are non-negative?
- Pred = ICmpInst::ICMP_SGT;
- RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
- DecreasedRightValueByOne = true;
+ if (StepCI->isOne()) {
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (++i != len) { while (++i < len) {
+ // ... ---> ...
+ // } }
+ // If both parts are known non-negative, it is profitable to use
+ // unsigned comparison in increasing loop. This allows us to make the
+ // comparison check against "RightSCEV + 1" more optimistic.
+ if (SE.isKnownNonNegative(IndVarStart) &&
+ SE.isKnownNonNegative(RightSCEV))
+ Pred = ICmpInst::ICMP_ULT;
+ else
+ Pred = ICmpInst::ICMP_SLT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
+ !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) {
+ // while (true) { while (true) {
+ // if (++i == len) ---> if (++i > len - 1)
+ // break; break;
+ // ... ...
+ // } }
+ // TODO: Insert ICMP_UGT if both are non-negative?
+ Pred = ICmpInst::ICMP_SGT;
+ RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ DecreasedRightValueByOne = true;
+ }
}
bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
@@ -857,7 +895,9 @@ LoopStructure::parseLoopStructure(Scalar
IsSignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (LatchBrExitIdx == 0) {
- if (CanBeMax(SE, RightSCEV, IsSignedPredicate)) {
+ const SCEV *StepMinusOne = SE.getMinusSCEV(Step,
+ SE.getOne(Step->getType()));
+ if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) {
// TODO: this restriction is easily removable -- we just have to
// remember that the icmp was an slt and not an sle.
FailureReason = "limit may overflow when coercing le to lt";
@@ -866,7 +906,7 @@ LoopStructure::parseLoopStructure(Scalar
if (!SE.isLoopEntryGuardedByCond(
&L, BoundPred, IndVarStart,
- SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())))) {
+ SE.getAddExpr(RightSCEV, Step))) {
FailureReason = "Induction variable start not bounded by upper limit";
return None;
}
@@ -887,26 +927,28 @@ LoopStructure::parseLoopStructure(Scalar
}
} else {
bool IncreasedRightValueByOne = false;
- // Try to turn eq/ne predicates to those we can work with.
- if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
- // while (--i != len) { while (--i > len) {
- // ... ---> ...
- // } }
- // We intentionally don't turn the predicate into UGT even if we know that
- // both operands are non-negative, because it will only pessimize our
- // check against "RightSCEV - 1".
- Pred = ICmpInst::ICMP_SGT;
- else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
- !CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) {
- // while (true) { while (true) {
- // if (--i == len) ---> if (--i < len + 1)
- // break; break;
- // ... ...
- // } }
- // TODO: Insert ICMP_ULT if both are non-negative?
- Pred = ICmpInst::ICMP_SLT;
- RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
- IncreasedRightValueByOne = true;
+ if (StepCI->isMinusOne()) {
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (--i != len) { while (--i > len) {
+ // ... ---> ...
+ // } }
+ // We intentionally don't turn the predicate into UGT even if we know
+ // that both operands are non-negative, because it will only pessimize
+ // our check against "RightSCEV - 1".
+ Pred = ICmpInst::ICMP_SGT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
+ !CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) {
+ // while (true) { while (true) {
+ // if (--i == len) ---> if (--i < len + 1)
+ // break; break;
+ // ... ...
+ // } }
+ // TODO: Insert ICMP_ULT if both are non-negative?
+ Pred = ICmpInst::ICMP_SLT;
+ RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ IncreasedRightValueByOne = true;
+ }
}
bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
@@ -928,7 +970,8 @@ LoopStructure::parseLoopStructure(Scalar
IsSignedPredicate ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
if (LatchBrExitIdx == 0) {
- if (CanBeMin(SE, RightSCEV, IsSignedPredicate)) {
+ const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
+ if (SumCanReachMin(SE, RightSCEV, StepPlusOne, IsSignedPredicate)) {
// TODO: this restriction is easily removable -- we just have to
// remember that the icmp was an sgt and not an sge.
FailureReason = "limit may overflow when coercing ge to gt";
@@ -979,6 +1022,7 @@ LoopStructure::parseLoopStructure(Scalar
Result.LatchExit = LatchExit;
Result.LatchBrExitIdx = LatchBrExitIdx;
Result.IndVarStart = IndVarStartV;
+ Result.IndVarStep = StepCI;
Result.IndVarNext = LeftValue;
Result.IndVarIncreasing = IsIncreasing;
Result.LoopExitAt = RightValue;
@@ -1529,8 +1573,6 @@ InductiveRangeCheck::computeSafeIteratio
// this inductive range check is a range check on the "C + D * I" ("C" is
// getOffset() and "D" is getScale()). We rewrite the value being range
// checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
- // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code
- // can be generalized as needed.
//
// The actual inequalities we solve are of the form
//
@@ -1567,11 +1609,9 @@ InductiveRangeCheck::computeSafeIteratio
return None;
ConstantInt *ConstD = D->getValue();
- if (!(ConstD->isMinusOne() || ConstD->isOne()))
- return None;
+ assert(!ConstD->isZero() && "Recurrence with zero step?");
const SCEV *M = SE.getMinusSCEV(C, A);
-
const SCEV *Begin = SE.getNegativeSCEV(M);
const SCEV *UpperLimit = nullptr;
@@ -1659,11 +1699,8 @@ bool InductiveRangeCheckElimination::run
return false;
}
LoopStructure LS = MaybeLoopStructure.getValue();
- bool Increasing = LS.IndVarIncreasing;
- const SCEV *MinusOne =
- SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
const SCEVAddRecExpr *IndVar =
- cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
+ cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarNext), SE.getSCEV(LS.IndVarStep)));
Optional<InductiveRangeCheck::Range> SafeIterRange;
Instruction *ExprInsertPt = Preheader->getTerminator();
Added: llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll?rev=310032&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll (added)
+++ llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll Fri Aug 4 00:01:04 2017
@@ -0,0 +1,468 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+
+; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_06: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_07: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_08: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+
+; IV = 0; IV <s 100; IV += 7; 0 <= Len <= 50. IRCE is allowed.
+define void @test_01(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_01(
+; CHECK: entry:
+; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
+; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND1]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
+; CHECK-NEXT: %idx.next = add i32 %idx, 7
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
+; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp slt i32 %idx.next, 100
+; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND2]], label %loop, label %main.exit.selector
+; CHECK: main.exit.selector:
+; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
+; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, 100
+; CHECK-NEXT: br i1 [[COND3]], label %main.pseudo.exit, label %exit
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: br label %postloop
+; CHECK: postloop:
+; CHECK-NEXT: br label %loop.postloop
+; CHECK: loop.postloop:
+; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
+; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 7
+; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
+; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.postloop:
+; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
+; CHECK-NEXT: store i32 0, i32* %addr.postloop
+; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 100
+; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 100
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = 0; IV <s MAX_INT - 7; IV += 7; 0 <= Len <= 50. IRCE is allowed.
+define void @test_02(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_02(
+; CHECK: entry:
+; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
+; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND1]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
+; CHECK-NEXT: %idx.next = add i32 %idx, 7
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
+; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp slt i32 %idx.next, 2147483640
+; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND2]], label %loop, label %main.exit.selector
+; CHECK: main.exit.selector:
+; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
+; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, 2147483640
+; CHECK-NEXT: br i1 [[COND3]], label %main.pseudo.exit, label %exit
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: br label %postloop
+; CHECK: postloop:
+; CHECK-NEXT: br label %loop.postloop
+; CHECK: loop.postloop:
+; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
+; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 7
+; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
+; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.postloop:
+; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
+; CHECK-NEXT: store i32 0, i32* %addr.postloop
+; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 2147483640
+; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 2147483640
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = 0; IV <s MAX_INT; IV += 7; 0 <= Len <= MAX_INT - 7. This is the greatest
+; value of Len for which IRCE is allowed.
+define void @test_03(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_03(
+; CHECK: entry:
+; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
+; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND1]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
+; CHECK-NEXT: %idx.next = add i32 %idx, 7
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
+; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp slt i32 %idx.next, 2147483647
+; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND2]], label %loop, label %main.exit.selector
+; CHECK: main.exit.selector:
+; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
+; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, 2147483647
+; CHECK-NEXT: br i1 [[COND3]], label %main.pseudo.exit, label %exit
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: br label %postloop
+; CHECK: postloop:
+; CHECK-NEXT: br label %loop.postloop
+; CHECK: loop.postloop:
+; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
+; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 7
+; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
+; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.postloop:
+; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
+; CHECK-NEXT: store i32 0, i32* %addr.postloop
+; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 2147483647
+; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !1
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 2147483647
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = 0; IV <s MAX_INT; IV += 7; 0 <= Len <= MAX_INT - 6. IRCE is not allowed,
+; because we cannot guarantee that IV + 7 will not exceed MAX_INT.
+; Negative test.
+define void @test_04(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_04(
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !2
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 2147483647
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = 100; IV >s -1; IV -= 7; 0 <= Len <= 50. IRCE is allowed.
+define void @test_05(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_05(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
+; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
+; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp sgt i32 100, %exit.preloop.at
+; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+; CHECK: loop.preloop.preheader:
+; CHECK-NEXT: br label %loop.preloop
+; CHECK: mainloop:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
+; CHECK-NEXT: %idx.next = add i32 %idx, -7
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
+; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp sgt i32 %idx.next, -1
+; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
+; CHECK: loop.preloop:
+; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 100, %loop.preloop.preheader ]
+; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7
+; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
+; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp sgt i32 %idx.next.preloop, -1
+; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp sgt i32 %idx.next.preloop, %exit.preloop.at
+; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
+; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp sgt i32 %idx.next.preloop.lcssa, -1
+; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit
+; CHECK: preloop.pseudo.exit:
+; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 100, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 100, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
+; CHECK-NEXT: br label %mainloop
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp sgt i32 %idx.next, -1
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = MAX_INT - 7; IV >u 6; IV -= 7; 10 <= Len <= 50. IRCE is allowed.
+define void @test_06(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_06(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
+; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
+; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483640, %exit.preloop.at
+; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+; CHECK: loop.preloop.preheader:
+; CHECK-NEXT: br label %loop.preloop
+; CHECK: mainloop:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
+; CHECK-NEXT: %idx.next = add i32 %idx, -7
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
+; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp ugt i32 %idx.next, 6
+; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
+; CHECK: loop.preloop:
+; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483640, %loop.preloop.preheader ]
+; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7
+; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
+; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp ugt i32 %idx.next.preloop, 6
+; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 %idx.next.preloop, %exit.preloop.at
+; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
+; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ugt i32 %idx.next.preloop.lcssa, 6
+; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit
+; CHECK: preloop.pseudo.exit:
+; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 2147483640, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 2147483640, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
+; CHECK-NEXT: br label %mainloop
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !3
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 2147483640, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ugt i32 %idx.next, 6
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = MAX_INT - 7; IV >u 5; IV -= 7; 10 <= Len <= 50. IRCE is not allowed,
+; because we can cross the 0 border.
+define void @test_07(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_07(
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !3
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 2147483640, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ugt i32 %idx.next, 5
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; IV = MAX_INT; IV >u 6; IV -= 7; 10 <= Len <= 50. IRCE is allowed.
+define void @test_08(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: @test_08(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
+; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
+; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483647, %exit.preloop.at
+; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+; CHECK: loop.preloop.preheader:
+; CHECK-NEXT: br label %loop.preloop
+; CHECK: mainloop:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
+; CHECK-NEXT: %idx.next = add i32 %idx, -7
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
+; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp ugt i32 %idx.next, 6
+; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
+; CHECK: loop.preloop:
+; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483647, %loop.preloop.preheader ]
+; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7
+; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
+; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp ugt i32 %idx.next.preloop, 6
+; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 %idx.next.preloop, %exit.preloop.at
+; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
+; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ugt i32 %idx.next.preloop.lcssa, 6
+; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit
+; CHECK: preloop.pseudo.exit:
+; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 2147483647, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 2147483647, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ]
+; CHECK-NEXT: br label %mainloop
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !3
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 2147483647, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -7
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ugt i32 %idx.next, 6
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+!0 = !{i32 0, i32 50}
+!1 = !{i32 0, i32 2147483640}
+!2 = !{i32 0, i32 2147483641}
+!3 = !{i32 10, i32 50}
More information about the llvm-commits
mailing list