[llvm] r260938 - [SCEVExpander] Make findExistingExpansion smarter
Junmo Park via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 15 22:46:58 PST 2016
Author: flyingforyou
Date: Tue Feb 16 00:46:58 2016
New Revision: 260938
URL: http://llvm.org/viewvc/llvm-project?rev=260938&view=rev
Log:
[SCEVExpander] Make findExistingExpansion smarter
Summary:
Extending findExistingExpansion can use existing value in ExprValueMap.
This patch gives 0.3~0.5% performance improvements on
benchmarks(test-suite, spec2000, spec2006, commercial benchmark)
Reviewers: mzolotukhin, sanjoy, zzheng
Differential Revision: http://reviews.llvm.org/D15559
Modified:
llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h
llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h?rev=260938&r1=260937&r2=260938&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h Tue Feb 16 00:46:58 2016
@@ -263,6 +263,9 @@ namespace llvm {
const SCEV *const *op_end,
PointerType *PTy, Type *Ty, Value *V);
+ /// \brief Find a previous Value in ExprValueMap for expand.
+ Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
+
Value *expand(const SCEV *S);
/// \brief Insert code to directly compute the specified SCEV expression
Modified: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp?rev=260938&r1=260937&r2=260938&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp Tue Feb 16 00:46:58 2016
@@ -1598,6 +1598,34 @@ Value *SCEVExpander::expandCodeFor(const
return V;
}
+Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+ const Instruction *InsertPt) {
+ SetVector<Value *> *Set = SE.getSCEVValues(S);
+ // If the expansion is not in CanonicalMode, and the SCEV contains any
+ // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
+ if (CanonicalMode || !SE.containsAddRecurrence(S)) {
+ // If S is scConstant, it may be worse to reuse an existing Value.
+ if (S->getSCEVType() != scConstant && Set) {
+ // Choose a Value from the set which dominates the insertPt.
+ // insertPt should be inside the Value's parent loop so as not to break
+ // the LCSSA form.
+ for (auto const &Ent : *Set) {
+ Instruction *EntInst = nullptr;
+ if (Ent && isa<Instruction>(Ent) &&
+ (EntInst = cast<Instruction>(Ent)) &&
+ S->getType() == Ent->getType() &&
+ EntInst->getFunction() == InsertPt->getFunction() &&
+ SE.DT.dominates(EntInst, InsertPt) &&
+ (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
+ return Ent;
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
@@ -1643,31 +1671,8 @@ Value *SCEVExpander::expand(const SCEV *
Builder.SetInsertPoint(InsertPt);
// Expand the expression into instructions.
- SetVector<Value *> *Set = SE.getSCEVValues(S);
- Value *V = nullptr;
- // If the expansion is not in CanonicalMode, and the SCEV contains any
- // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
- if (CanonicalMode || !SE.containsAddRecurrence(S)) {
- // If S is scConstant, it may be worse to reuse an existing Value.
- if (S->getSCEVType() != scConstant && Set) {
- // Choose a Value from the set which dominates the insertPt.
- // insertPt should be inside the Value's parent loop so as not to break
- // the LCSSA form.
- for (auto const &Ent : *Set) {
- Instruction *EntInst = nullptr;
- if (Ent && isa<Instruction>(Ent) &&
- (EntInst = cast<Instruction>(Ent)) &&
- S->getType() == Ent->getType() &&
- EntInst->getFunction() == InsertPt->getFunction() &&
- SE.DT.dominates(EntInst, InsertPt) &&
- (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
- V = Ent;
- break;
- }
- }
- }
- }
+ Value *V = FindValueInExprValueMap(S, InsertPt);
+
if (!V)
V = visit(S);
@@ -1877,6 +1882,11 @@ Value *SCEVExpander::findExistingExpansi
return RHS;
}
+ // Use expand's logic which is used for reusing a previous Value in
+ // ExprValueMap.
+ if (Value *Val = FindValueInExprValueMap(S, At))
+ return Val;
+
// There is potential to make this significantly smarter, but this simple
// heuristic already gets some interesting cases.
Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp?rev=260938&r1=260937&r2=260938&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp Tue Feb 16 00:46:58 2016
@@ -311,9 +311,12 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
return false;
BasicBlock *Header = L->getHeader();
+ BasicBlock *PH = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
- if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
+ if (!AllowExpensiveTripCount &&
+ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
return false;
// We only handle cases when the unroll factor is a power of 2.
@@ -331,13 +334,12 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
- BasicBlock *PH = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
// It helps to split the original preheader twice, one for the end of the
// prolog code and one for a new loop preheader.
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
- BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+ PreHeaderBR = cast<BranchInst>(PH->getTerminator());
// Compute the number of extra iterations required, which is:
// extra iterations = run-time trip count % (loop unroll factor + 1)
Modified: llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll?rev=260938&r1=260937&r2=260938&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll Tue Feb 16 00:46:58 2016
@@ -24,4 +24,38 @@ loopexit: ;
ret i32 0
}
+;; Though SCEV for loop tripcount contains division,
+;; it shouldn't be considered expensive, since the division already
+;; exists in the code and we don't need to expand it once more.
+;; Thus, it shouldn't prevent us from unrolling the loop.
+
+define i32 @test2(i64* %loc, i64 %conv7) {
+; CHECK-LABEL: @test2(
+; CHECK: udiv
+; CHECK: udiv
+; CHECK-NOT: udiv
+; CHECK-LABEL: for.body.prol
+entry:
+ %rem0 = load i64, i64* %loc, align 8
+ %ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
+ br label %bb1
+bb1:
+ %div11 = udiv i64 %ExpensiveComputation, %conv7
+ %cmp.i38 = icmp ugt i64 %div11, 1
+ %div12 = select i1 %cmp.i38, i64 %div11, i64 1
+ br label %for.body
+for.body:
+ %rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ]
+ %k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ]
+ %mul1 = mul i64 %rem1, 48271
+ %rem2 = urem i64 %mul1, 2147483647
+ %dec = add i64 %k1, -1
+ %cmp = icmp eq i64 %dec, 0
+ br i1 %cmp, label %exit, label %for.body
+exit:
+ %rem3 = phi i64 [ %rem2, %for.body ]
+ store i64 %rem3, i64* %loc, align 8
+ ret i32 0
+}
+
!0 = !{i64 1, i64 100}
More information about the llvm-commits
mailing list