[PATCH] D15537: limit the number of instructions per block examined by dead store elimination
Bob Haarman via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 16 09:26:07 PST 2015
Dead store elimination gets very expensive when large numbers of
instructions need to be analyzed. This patch limits the number of
instructions analyzed per store to the value of the
memdep-block-scan-limit parameter (which defaults to 100). This resulted
in no observed difference in performance of the generated code, and no
change in the statistics for the dead store elimination pass, but
improved compilation time on some files by more than an order of
magnitude.
http://reviews.llvm.org/D15537
(Originally submitted via Phabricator, but I don't see a message to the list, so posting to the list to make sure people see this.)
Index: include/llvm/Analysis/MemoryDependenceAnalysis.h
===================================================================
--- include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -348,6 +348,11 @@
///
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ /// Some methods limit the number of instructions they will examine.
+ /// The return value of this method is the default limit that will be
+ /// used if no limit is explicitly passed in.
+ unsigned getDefaultBlockScanLimit() const;
+
/// getDependency - Return the instruction on which a memory operation
/// depends. See the class comment for more details. It is illegal to call
/// this on non-memory instructions.
@@ -405,18 +410,25 @@
/// annotated to the query instruction to refine the result.
///
/// Note that this is an uncached query, and thus may be inefficient.
+ /// Limit can be used to set the maximum number of instructions that
+ /// will be examined to find the pointer dependency. On return, it will
+ /// be set to the number of instructions left to examine. If a null pointer
+ /// is passed in, the limit will default to the value of the
+ /// -memdep-block-scan-limit parameter.
///
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc,
bool isLoad,
BasicBlock::iterator ScanIt,
BasicBlock *BB,
- Instruction *QueryInst = nullptr);
+ Instruction *QueryInst = nullptr,
+ unsigned *Limit = nullptr);
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
bool isLoad,
BasicBlock::iterator ScanIt,
BasicBlock *BB,
- Instruction *QueryInst);
+ Instruction *QueryInst,
+ unsigned *Limit = nullptr);
/// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not
Index: lib/Analysis/MemoryDependenceAnalysis.cpp
===================================================================
--- lib/Analysis/MemoryDependenceAnalysis.cpp
+++ lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -98,6 +98,10 @@
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
+unsigned MemoryDependenceAnalysis::getDefaultBlockScanLimit() const {
+ return BlockScanLimit;
+}
+
bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -378,7 +382,7 @@
/// annotated to the query instruction to refine the result.
MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -389,7 +393,8 @@
return invariantGroupDependency;
}
}
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+ Limit);
}
MemDepResult
@@ -447,11 +452,16 @@
MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+
+ if (!Limit) {
+ unsigned DefaultLimit = BlockScanLimit;
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+ &DefaultLimit);
+ }
const Value *MemLocBase = nullptr;
int64_t MemLocOffset = 0;
- unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false;
// We must be careful with atomic accesses, as they may allow another thread
@@ -510,8 +520,8 @@
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
- --Limit;
- if (!Limit)
+ --*Limit;
+ if (!*Limit)
return MemDepResult::getUnknown();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -547,7 +557,7 @@
return MemDepResult::getClobber(LI);
// Otherwise, volatile doesn't imply any special ordering
}
-
+
// Atomic loads have complications involved.
// A Monotonic (or higher) load is OK if the query inst is itself not atomic.
// FIXME: This is overly conservative.
@@ -960,7 +970,7 @@
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
-
+
// This routine does not expect to deal with volatile instructions.
// Doing so would require piping through the QueryInst all the way through.
// TODO: volatiles can't be elided, but they can be reordered with other
Index: lib/Transforms/Scalar/DeadStoreElimination.cpp
===================================================================
--- lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -566,6 +566,13 @@
if (!Loc.Ptr)
continue;
+ // Loop until we find a store we can eliminate or a load that
+ // invalidates the analysis. Without an upper bound on the number of
+ // instructions examined, this analysis can become very time-consuming.
+ // However, the potential gain diminishes as we process more instructions
+ // without eliminating any of them. Therefore, we limit the number of
+ // instructions we look at.
+ auto Limit = MD->getDefaultBlockScanLimit();
while (InstDep.isDef() || InstDep.isClobber()) {
// Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we
@@ -645,8 +652,9 @@
if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
break;
- InstDep = MD->getPointerDependencyFrom(Loc, false,
- DepWrite->getIterator(), &BB);
+ InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false,
+ DepWrite->getIterator(), &BB,
+ /*QueryInst=*/ nullptr, &Limit);
}
}
@@ -873,7 +881,7 @@
}
if (auto CS = CallSite(&*BBI)) {
- // Remove allocation function calls from the list of dead stack objects;
+ // Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
if (isAllocLikeFn(&*BBI, TLI))
DeadStackObjects.remove(&*BBI);
More information about the llvm-commits
mailing list