[PATCH] D15537: limit the number of instructions per block examined by dead store elimination

Thu Dec 17 14:11:41 PST 2015

Have you looked at other ways to address the compile time impact? I'm 
uncomfortable with relying on a simple cutoff unless that's the 
absolutely last resort.

Philip

On 12/16/2015 09:26 AM, Bob Haarman via llvm-commits wrote:
> Dead store elimination gets very expensive when large numbers of
> instructions need to be analyzed. This patch limits the number of
> instructions analyzed per store to the value of the
> memdep-block-scan-limit parameter (which defaults to 100). This resulted
>  in no observed difference in performance of the generated code, and no
> change in the statistics for the dead store elimination pass, but
> improved compilation time on some files by more than an order of
> magnitude.
>
> http://reviews.llvm.org/D15537
>
> (Originally submitted via Phabricator, but I don't see a message to 
> the list, so posting to the list to make sure people see this.)
>
> Index: include/llvm/Analysis/MemoryDependenceAnalysis.h
> ===================================================================
> --- include/llvm/Analysis/MemoryDependenceAnalysis.h
> +++ include/llvm/Analysis/MemoryDependenceAnalysis.h
> @@ -348,6 +348,11 @@
>      ///
>      void getAnalysisUsage(AnalysisUsage &AU) const override;
>
> +    /// Some methods limit the number of instructions they will examine.
> +    /// The return value of this method is the default limit that 
> will be
> +    /// used if no limit is explicitly passed in.
> +    unsigned getDefaultBlockScanLimit() const;
> +
>      /// getDependency - Return the instruction on which a memory 
> operation
>      /// depends.  See the class comment for more details.  It is 
> illegal to call
>      /// this on non-memory instructions.
> @@ -405,18 +410,25 @@
>      /// annotated to the query instruction to refine the result.
>      ///
>      /// Note that this is an uncached query, and thus may be 
> inefficient.
> +    /// Limit can be used to set the maximum number of instructions that
> +    /// will be examined to find the pointer dependency. On return, 
> it will
> +    /// be set to the number of instructions left to examine. If a 
> null pointer
> +    /// is passed in, the limit will default to the value of the
> +    /// -memdep-block-scan-limit parameter.
>      ///
>      MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc,
>                                            bool isLoad,
>                                            BasicBlock::iterator ScanIt,
>                                            BasicBlock *BB,
> -                                          Instruction *QueryInst = 
> nullptr);
> +                                          Instruction *QueryInst = 
> nullptr,
> +                                          unsigned *Limit = nullptr);
>
>      MemDepResult getSimplePointerDependencyFrom(const MemoryLocation 
> &MemLoc,
>                                                  bool isLoad,
> BasicBlock::iterator ScanIt,
>                                                  BasicBlock *BB,
> -                                                Instruction *QueryInst);
> +                                                Instruction *QueryInst,
> +                                                unsigned *Limit = 
> nullptr);
>
>      /// This analysis looks for other loads and stores with 
> invariant.group
>      /// metadata and the same pointer operand. Returns Unknown if it 
> does not
> Index: lib/Analysis/MemoryDependenceAnalysis.cpp
> ===================================================================
> --- lib/Analysis/MemoryDependenceAnalysis.cpp
> +++ lib/Analysis/MemoryDependenceAnalysis.cpp
> @@ -98,6 +98,10 @@
>    AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
>  }
>
> +unsigned MemoryDependenceAnalysis::getDefaultBlockScanLimit() const {
> +  return BlockScanLimit;
> +}
> +
>  bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
>    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
>    AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
> @@ -378,7 +382,7 @@
>  /// annotated to the query instruction to refine the result.
>  MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
>      const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator 
> ScanIt,
> -    BasicBlock *BB, Instruction *QueryInst) {
> +    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
>
>    if (QueryInst != nullptr) {
>      if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
> @@ -389,7 +393,8 @@
>          return invariantGroupDependency;
>      }
>    }
> -  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, 
> QueryInst);
> +  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, 
> QueryInst,
> +                                        Limit);
>  }
>
>  MemDepResult
> @@ -447,11 +452,16 @@
>
>  MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
>      const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator 
> ScanIt,
> -    BasicBlock *BB, Instruction *QueryInst) {
> +    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
> +
> +  if (!Limit) {
> +    unsigned DefaultLimit = BlockScanLimit;
> +    return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, 
> QueryInst,
> +                                          &DefaultLimit);
> +  }
>
>    const Value *MemLocBase = nullptr;
>    int64_t MemLocOffset = 0;
> -  unsigned Limit = BlockScanLimit;
>    bool isInvariantLoad = false;
>
>    // We must be careful with atomic accesses, as they may allow 
> another thread
> @@ -510,8 +520,8 @@
>
>      // Limit the amount of scanning we do so we don't end up with 
> quadratic
>      // running time on extreme testcases.
> -    --Limit;
> -    if (!Limit)
> +    --*Limit;
> +    if (!*Limit)
>        return MemDepResult::getUnknown();
>
>      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
> @@ -547,7 +557,7 @@
>            return MemDepResult::getClobber(LI);
>          // Otherwise, volatile doesn't imply any special ordering
>        }
> -
> +
>        // Atomic loads have complications involved.
>        // A Monotonic (or higher) load is OK if the query inst is 
> itself not atomic.
>        // FIXME: This is overly conservative.
> @@ -960,7 +970,7 @@
>    assert(Loc.Ptr->getType()->isPointerTy() &&
>           "Can't get pointer deps of a non-pointer!");
>    Result.clear();
> -
> +
>    // This routine does not expect to deal with volatile instructions.
>    // Doing so would require piping through the QueryInst all the way 
> through.
>    // TODO: volatiles can't be elided, but they can be reordered with 
> other
> Index: lib/Transforms/Scalar/DeadStoreElimination.cpp
> ===================================================================
> --- lib/Transforms/Scalar/DeadStoreElimination.cpp
> +++ lib/Transforms/Scalar/DeadStoreElimination.cpp
> @@ -566,6 +566,13 @@
>      if (!Loc.Ptr)
>        continue;
>
> +    // Loop until we find a store we can eliminate or a load that
> +    // invalidates the analysis. Without an upper bound on the number of
> +    // instructions examined, this analysis can become very 
> time-consuming.
> +    // However, the potential gain diminishes as we process more 
> instructions
> +    // without eliminating any of them. Therefore, we limit the 
> number of
> +    // instructions we look at.
> +    auto Limit = MD->getDefaultBlockScanLimit();
>      while (InstDep.isDef() || InstDep.isClobber()) {
>        // Get the memory clobbered by the instruction we depend on.  
> MemDep will
>        // skip any instructions that 'Loc' clearly doesn't interact 
> with.  If we
> @@ -645,8 +652,9 @@
>        if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
>          break;
>
> -      InstDep = MD->getPointerDependencyFrom(Loc, false,
> - DepWrite->getIterator(), &BB);
> +      InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false,
> + DepWrite->getIterator(), &BB,
> +                                             /*QueryInst=*/ nullptr, 
> &Limit);
>      }
>    }
>
> @@ -873,7 +881,7 @@
>      }
>
>      if (auto CS = CallSite(&*BBI)) {
> -      // Remove allocation function calls from the list of dead stack 
> objects;
> +      // Remove allocation function calls from the list of dead stack 
> objects;
>        // there can't be any references before the definition.
>        if (isAllocLikeFn(&*BBI, TLI))
>          DeadStackObjects.remove(&*BBI);
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits