[PATCH] D20146: [PM] Port DSE to the new pass manager.

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Sat May 14 09:44:14 PDT 2016


Jake VanAdrighem <jvanadrighem at gmail.com> writes:
> JakeVanAdrighem updated this revision to Diff 57280.
> JakeVanAdrighem marked 3 inline comments as done.
> JakeVanAdrighem added a comment.
>
> Pull out all the member functions to be helpers. The diff here is less
> clean but this is just because the helpers needed to be moved around.

An easy way to make the diffs more readable is to split this into two
patches, "convert these methods to free functions" and "add the
new-style pass". Feel free to do that if you like.

This is pretty close. A couple of comments below.

>
> Repository:
>   rL LLVM
>
> http://reviews.llvm.org/D20146
>
> Files:
>   include/llvm/InitializePasses.h
>   include/llvm/Transforms/Scalar/DeadStoreElimination.h
>   lib/Passes/PassBuilder.cpp
>   lib/Passes/PassRegistry.def
>   lib/Transforms/Scalar/DeadStoreElimination.cpp
>   lib/Transforms/Scalar/Scalar.cpp
>   test/Transforms/DeadStoreElimination/simple.ll
>
> Index: test/Transforms/DeadStoreElimination/simple.ll
> ===================================================================
> --- test/Transforms/DeadStoreElimination/simple.ll
> +++ test/Transforms/DeadStoreElimination/simple.ll
> @@ -1,4 +1,5 @@
>  ; RUN: opt < %s -basicaa -dse -S | FileCheck %s
> +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
>  target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
>  
>  declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
> Index: lib/Transforms/Scalar/Scalar.cpp
> ===================================================================
> --- lib/Transforms/Scalar/Scalar.cpp
> +++ lib/Transforms/Scalar/Scalar.cpp
> @@ -40,7 +40,7 @@
>    initializeDCELegacyPassPass(Registry);
>    initializeDeadInstEliminationPass(Registry);
>    initializeScalarizerPass(Registry);
> -  initializeDSEPass(Registry);
> +  initializeDSELegacyPassPass(Registry);
>    initializeGVNLegacyPassPass(Registry);
>    initializeEarlyCSELegacyPassPass(Registry);
>    initializeFlattenCFGPassPass(Registry);
> Index: lib/Transforms/Scalar/DeadStoreElimination.cpp
> ===================================================================
> --- lib/Transforms/Scalar/DeadStoreElimination.cpp
> +++ lib/Transforms/Scalar/DeadStoreElimination.cpp
> @@ -15,7 +15,7 @@
>  //
>  //===----------------------------------------------------------------------===//
>  
> -#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
>  #include "llvm/ADT/STLExtras.h"
>  #include "llvm/ADT/SetVector.h"
>  #include "llvm/ADT/Statistic.h"
> @@ -36,6 +36,7 @@
>  #include "llvm/Pass.h"
>  #include "llvm/Support/Debug.h"
>  #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Transforms/Scalar.h"
>  #include "llvm/Transforms/Utils/Local.h"
>  using namespace llvm;
>  
> @@ -45,69 +46,6 @@
>  STATISTIC(NumFastStores, "Number of stores deleted");
>  STATISTIC(NumFastOther , "Number of other instrs removed");
>  
> -namespace {
> -  struct DSE : public FunctionPass {
> -    AliasAnalysis *AA;
> -    MemoryDependenceResults *MD;
> -    DominatorTree *DT;
> -    const TargetLibraryInfo *TLI;
> -
> -    static char ID; // Pass identification, replacement for typeid
> -    DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
> -      initializeDSEPass(*PassRegistry::getPassRegistry());
> -    }
> -
> -    bool runOnFunction(Function &F) override {
> -      if (skipFunction(F))
> -        return false;
> -
> -      AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
> -      MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
> -      DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
> -      TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
> -
> -      bool Changed = false;
> -      for (BasicBlock &I : F)
> -        // Only check non-dead blocks.  Dead blocks may have strange pointer
> -        // cycles that will confuse alias analysis.
> -        if (DT->isReachableFromEntry(&I))
> -          Changed |= runOnBasicBlock(I);
> -
> -      AA = nullptr; MD = nullptr; DT = nullptr;
> -      return Changed;
> -    }
> -
> -    bool runOnBasicBlock(BasicBlock &BB);
> -    bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
> -    bool HandleFree(CallInst *F);
> -    bool handleEndBlock(BasicBlock &BB);
> -    void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
> -                               SmallSetVector<Value *, 16> &DeadStackObjects,
> -                               const DataLayout &DL);
> -
> -    void getAnalysisUsage(AnalysisUsage &AU) const override {
> -      AU.setPreservesCFG();
> -      AU.addRequired<DominatorTreeWrapperPass>();
> -      AU.addRequired<AAResultsWrapperPass>();
> -      AU.addRequired<MemoryDependenceWrapperPass>();
> -      AU.addRequired<TargetLibraryInfoWrapperPass>();
> -      AU.addPreserved<DominatorTreeWrapperPass>();
> -      AU.addPreserved<GlobalsAAWrapperPass>();
> -      AU.addPreserved<MemoryDependenceWrapperPass>();
> -    }
> -  };
> -}
> -
> -char DSE::ID = 0;
> -INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
> -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
> -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
> -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
> -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
> -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
> -INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
> -
> -FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
>  
>  //===----------------------------------------------------------------------===//
>  // Helper functions
> @@ -503,212 +441,13 @@
>  }
>  
>  
> -//===----------------------------------------------------------------------===//
> -// DSE Pass
> -//===----------------------------------------------------------------------===//
> -
> -bool DSE::runOnBasicBlock(BasicBlock &BB) {
> -  const DataLayout &DL = BB.getModule()->getDataLayout();
> -  bool MadeChange = false;
> -
> -  // Do a top-down walk on the BB.
> -  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
> -    Instruction *Inst = &*BBI++;
> -
> -    // Handle 'free' calls specially.
> -    if (CallInst *F = isFreeCall(Inst, TLI)) {
> -      MadeChange |= HandleFree(F);
> -      continue;
> -    }
> -
> -    // If we find something that writes memory, get its memory dependence.
> -    if (!hasMemoryWrite(Inst, *TLI))
> -      continue;
> -
> -    // If we're storing the same value back to a pointer that we just
> -    // loaded from, then the store can be removed.
> -    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
> -
> -      auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
> -        // DeleteDeadInstruction can delete the current instruction.  Save BBI
> -        // in case we need it.
> -        WeakVH NextInst(&*BBI);
> -
> -        DeleteDeadInstruction(DeadInst, *MD, *TLI);
> -
> -        if (!NextInst) // Next instruction deleted.
> -          BBI = BB.begin();
> -        else if (BBI != BB.begin()) // Revisit this instruction if possible.
> -          --BBI;
> -        ++NumRedundantStores;
> -        MadeChange = true;
> -      };
> -
> -      if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
> -        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
> -            isRemovable(SI) &&
> -            MemoryIsNotModifiedBetween(DepLoad, SI)) {
> -
> -          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
> -                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
> -
> -          RemoveDeadInstAndUpdateBBI(SI);
> -          continue;
> -        }
> -      }
> -
> -      // Remove null stores into the calloc'ed objects
> -      Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
> -
> -      if (StoredConstant && StoredConstant->isNullValue() &&
> -          isRemovable(SI)) {
> -        Instruction *UnderlyingPointer = dyn_cast<Instruction>(
> -            GetUnderlyingObject(SI->getPointerOperand(), DL));
> -
> -        if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
> -            MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {
> -          DEBUG(dbgs()
> -                << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
> -                << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
> -
> -          RemoveDeadInstAndUpdateBBI(SI);
> -          continue;
> -        }
> -      }
> -    }
> -
> -    MemDepResult InstDep = MD->getDependency(Inst);
> -
> -    // Ignore any store where we can't find a local dependence.
> -    // FIXME: cross-block DSE would be fun. :)
> -    if (!InstDep.isDef() && !InstDep.isClobber())
> -      continue;
> -
> -    // Figure out what location is being stored to.
> -    MemoryLocation Loc = getLocForWrite(Inst, *AA);
> -
> -    // If we didn't get a useful location, fail.
> -    if (!Loc.Ptr)
> -      continue;
> -
> -    while (InstDep.isDef() || InstDep.isClobber()) {
> -      // Get the memory clobbered by the instruction we depend on.  MemDep will
> -      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
> -      // end up depending on a may- or must-aliased load, then we can't optimize
> -      // away the store and we bail out.  However, if we depend on on something
> -      // that overwrites the memory location we *can* potentially optimize it.
> -      //
> -      // Find out what memory location the dependent instruction stores.
> -      Instruction *DepWrite = InstDep.getInst();
> -      MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
> -      // If we didn't get a useful location, or if it isn't a size, bail out.
> -      if (!DepLoc.Ptr)
> -        break;
> -
> -      // If we find a write that is a) removable (i.e., non-volatile), b) is
> -      // completely obliterated by the store to 'Loc', and c) which we know that
> -      // 'Inst' doesn't load from, then we can remove it.
> -      if (isRemovable(DepWrite) &&
> -          !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
> -        int64_t InstWriteOffset, DepWriteOffset;
> -        OverwriteResult OR =
> -            isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
> -        if (OR == OverwriteComplete) {
> -          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
> -                << *DepWrite << "\n  KILLER: " << *Inst << '\n');
> -
> -          // Delete the store and now-dead instructions that feed it.
> -          DeleteDeadInstruction(DepWrite, *MD, *TLI);
> -          ++NumFastStores;
> -          MadeChange = true;
> -
> -          // DeleteDeadInstruction can delete the current instruction in loop
> -          // cases, reset BBI.
> -          BBI = Inst->getIterator();
> -          if (BBI != BB.begin())
> -            --BBI;
> -          break;
> -        } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
> -                   ((OR == OverwriteBegin &&
> -                     isShortenableAtTheBeginning(DepWrite)))) {
> -          // TODO: base this on the target vector size so that if the earlier
> -          // store was too small to get vector writes anyway then its likely
> -          // a good idea to shorten it
> -          // Power of 2 vector writes are probably always a bad idea to optimize
> -          // as any store/memset/memcpy is likely using vector instructions so
> -          // shortening it to not vector size is likely to be slower
> -          MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
> -          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
> -          bool IsOverwriteEnd = (OR == OverwriteEnd);
> -          if (!IsOverwriteEnd)
> -            InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
> -
> -          if ((llvm::isPowerOf2_64(InstWriteOffset) &&
> -               DepWriteAlign <= InstWriteOffset) ||
> -              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
> -
> -            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
> -                         << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
> -                         << *DepWrite << "\n  KILLER (offset "
> -                         << InstWriteOffset << ", " << DepLoc.Size << ")"
> -                         << *Inst << '\n');
> -
> -            int64_t NewLength =
> -                IsOverwriteEnd
> -                    ? InstWriteOffset - DepWriteOffset
> -                    : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
> -
> -            Value *DepWriteLength = DepIntrinsic->getLength();
> -            Value *TrimmedLength =
> -                ConstantInt::get(DepWriteLength->getType(), NewLength);
> -            DepIntrinsic->setLength(TrimmedLength);
> -
> -            if (!IsOverwriteEnd) {
> -              int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
> -              Value *Indices[1] = {
> -                  ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
> -              GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
> -                  DepIntrinsic->getRawDest(), Indices, "", DepWrite);
> -              DepIntrinsic->setDest(NewDestGEP);
> -            }
> -            MadeChange = true;
> -          }
> -        }
> -      }
> -
> -      // If this is a may-aliased store that is clobbering the store value, we
> -      // can keep searching past it for another must-aliased pointer that stores
> -      // to the same location.  For example, in:
> -      //   store -> P
> -      //   store -> Q
> -      //   store -> P
> -      // we can remove the first store to P even though we don't know if P and Q
> -      // alias.
> -      if (DepWrite == &BB.front()) break;
> -
> -      // Can't look past this instruction if it might read 'Loc'.
> -      if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
> -        break;
> -
> -      InstDep = MD->getPointerDependencyFrom(Loc, false,
> -                                             DepWrite->getIterator(), &BB);
> -    }
> -  }
> -
> -  // If this block ends in a return, unwind, or unreachable, all allocas are
> -  // dead at its end, which means stores to them are also dead.
> -  if (BB.getTerminator()->getNumSuccessors() == 0)
> -    MadeChange |= handleEndBlock(BB);
> -
> -  return MadeChange;
> -}
> -
>  /// Returns true if the memory which is accessed by the second instruction is not
>  /// modified between the first and the second instruction.
>  /// Precondition: Second instruction must be dominated by the first
>  /// instruction.
> -bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
> -                                     Instruction *SecondI) {
> +bool MemoryIsNotModifiedBetween(Instruction *FirstI,
> +                                         Instruction *SecondI,
> +                                         AliasAnalysis *AA) {

Make these functions static now, since they're not exposed by any
headers. You'll also want to reflow these lines to fix the hanging
indentation (git-clang-format or clang-format-diff can do that for you)

>    SmallVector<BasicBlock *, 16> WorkList;
>    SmallPtrSet<BasicBlock *, 8> Visited;
>    BasicBlock::iterator FirstBBI(FirstI);
> @@ -779,7 +518,9 @@
>  
>  /// HandleFree - Handle frees of entire structures whose dependency is a store
>  /// to a field of that structure.
> -bool DSE::HandleFree(CallInst *F) {
> +bool HandleFree(CallInst *F, AliasAnalysis *AA,
> +                         MemoryDependenceResults *MD, DominatorTree *DT,
> +                         const TargetLibraryInfo *TLI) {
>    bool MadeChange = false;
>  
>    MemoryLocation Loc = MemoryLocation(F->getOperand(0));
> @@ -828,13 +569,43 @@
>    return MadeChange;
>  }
>  
> +/// RemoveAccessedObjects - Check to see if the specified location may alias any

Since the diff touches pretty well every line anyway, I'd probably
modernize these doc comments not to repeat the function name, and maybe
even lower case the first letter of the functions to match the modern
llvm style.

> +/// of the stack objects in the DeadStackObjects set.  If so, they become live
> +/// because the location is being loaded.
> +void RemoveAccessedObjects(
> +    const MemoryLocation &LoadedLoc,
> +    SmallSetVector<Value *, 16> &DeadStackObjects, const DataLayout &DL,
> +    AliasAnalysis *AA, const TargetLibraryInfo *TLI) {
> +  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
> +
> +  // A constant can't be in the dead pointer set.
> +  if (isa<Constant>(UnderlyingPointer))
> +    return;
> +
> +  // If the kill pointer can be easily reduced to an alloca, don't bother doing
> +  // extraneous AA queries.
> +  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
> +    DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
> +    return;
> +  }
> +
> +  // Remove objects that could alias LoadedLoc.
> +  DeadStackObjects.remove_if([&](Value *I) {
> +    // See if the loaded location could alias the stack location.
> +    MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
> +    return !AA->isNoAlias(StackLoc, LoadedLoc);
> +  });
> +}
> +
>  /// handleEndBlock - Remove dead stores to stack-allocated locations in the
>  /// function end block.  Ex:
>  /// %A = alloca i32
>  /// ...
>  /// store i32 1, i32* %A
>  /// ret void
> -bool DSE::handleEndBlock(BasicBlock &BB) {
> +bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
> +                             MemoryDependenceResults *MD,
> +                             const TargetLibraryInfo *TLI) {
>    bool MadeChange = false;
>  
>    // Keep track of all of the stack objects that are dead at the end of the
> @@ -967,7 +738,7 @@
>  
>      // Remove any allocas from the DeadPointer set that are loaded, as this
>      // makes any stores above the access live.
> -    RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL);
> +    RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI);
>  
>      // If all of the allocas were clobbered by the access then we're not going
>      // to find anything else to process.
> @@ -978,29 +749,276 @@
>    return MadeChange;
>  }
>  
> -/// RemoveAccessedObjects - Check to see if the specified location may alias any
> -/// of the stack objects in the DeadStackObjects set.  If so, they become live
> -/// because the location is being loaded.
> -void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
> -                                SmallSetVector<Value *, 16> &DeadStackObjects,
> -                                const DataLayout &DL) {
> -  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
> +bool runOnBasicBlock(BasicBlock &BB, AliasAnalysis *AA,
> +                              MemoryDependenceResults *MD, DominatorTree *DT,
> +                              const TargetLibraryInfo *TLI) {

`runOnBasicBlock` isn't a very clear name now that this is separate from
the patch. Maybe "eliminateDeadStores"? 

> +  const DataLayout &DL = BB.getModule()->getDataLayout();
> +  bool MadeChange = false;
>  
> -  // A constant can't be in the dead pointer set.
> -  if (isa<Constant>(UnderlyingPointer))
> -    return;
> +  // Do a top-down walk on the BB.
> +  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
> +    Instruction *Inst = &*BBI++;
>  
> -  // If the kill pointer can be easily reduced to an alloca, don't bother doing
> -  // extraneous AA queries.
> -  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
> -    DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
> -    return;
> +    // Handle 'free' calls specially.
> +    if (CallInst *F = isFreeCall(Inst, TLI)) {
> +      MadeChange |= HandleFree(F, AA, MD, DT, TLI);
> +      continue;
> +    }
> +
> +    // If we find something that writes memory, get its memory dependence.
> +    if (!hasMemoryWrite(Inst, *TLI))
> +      continue;
> +
> +    // If we're storing the same value back to a pointer that we just
> +    // loaded from, then the store can be removed.
> +    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
> +
> +      auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
> +        // DeleteDeadInstruction can delete the current instruction.  Save BBI
> +        // in case we need it.
> +        WeakVH NextInst(&*BBI);
> +
> +        DeleteDeadInstruction(DeadInst, *MD, *TLI);
> +
> +        if (!NextInst) // Next instruction deleted.
> +          BBI = BB.begin();
> +        else if (BBI != BB.begin()) // Revisit this instruction if possible.
> +          --BBI;
> +        ++NumRedundantStores;
> +        MadeChange = true;
> +      };
> +
> +      if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
> +        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
> +            isRemovable(SI) &&
> +            MemoryIsNotModifiedBetween(DepLoad, SI, AA)) {
> +
> +          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
> +                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
> +
> +          RemoveDeadInstAndUpdateBBI(SI);
> +          continue;
> +        }
> +      }
> +
> +      // Remove null stores into the calloc'ed objects
> +      Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
> +
> +      if (StoredConstant && StoredConstant->isNullValue() &&
> +          isRemovable(SI)) {
> +        Instruction *UnderlyingPointer = dyn_cast<Instruction>(
> +            GetUnderlyingObject(SI->getPointerOperand(), DL));
> +
> +        if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
> +            MemoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) {
> +          DEBUG(dbgs()
> +                << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
> +                << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
> +
> +          RemoveDeadInstAndUpdateBBI(SI);
> +          continue;
> +        }
> +      }
> +    }
> +
> +    MemDepResult InstDep = MD->getDependency(Inst);
> +
> +    // Ignore any store where we can't find a local dependence.
> +    // FIXME: cross-block DSE would be fun. :)
> +    if (!InstDep.isDef() && !InstDep.isClobber())
> +      continue;
> +
> +    // Figure out what location is being stored to.
> +    MemoryLocation Loc = getLocForWrite(Inst, *AA);
> +
> +    // If we didn't get a useful location, fail.
> +    if (!Loc.Ptr)
> +      continue;
> +
> +    while (InstDep.isDef() || InstDep.isClobber()) {
> +      // Get the memory clobbered by the instruction we depend on.  MemDep will
> +      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
> +      // end up depending on a may- or must-aliased load, then we can't optimize
> +      // away the store and we bail out.  However, if we depend on on something
> +      // that overwrites the memory location we *can* potentially optimize it.
> +      //
> +      // Find out what memory location the dependent instruction stores.
> +      Instruction *DepWrite = InstDep.getInst();
> +      MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
> +      // If we didn't get a useful location, or if it isn't a size, bail out.
> +      if (!DepLoc.Ptr)
> +        break;
> +
> +      // If we find a write that is a) removable (i.e., non-volatile), b) is
> +      // completely obliterated by the store to 'Loc', and c) which we know that
> +      // 'Inst' doesn't load from, then we can remove it.
> +      if (isRemovable(DepWrite) &&
> +          !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
> +        int64_t InstWriteOffset, DepWriteOffset;
> +        OverwriteResult OR =
> +            isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
> +        if (OR == OverwriteComplete) {
> +          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
> +                << *DepWrite << "\n  KILLER: " << *Inst << '\n');
> +
> +          // Delete the store and now-dead instructions that feed it.
> +          DeleteDeadInstruction(DepWrite, *MD, *TLI);
> +          ++NumFastStores;
> +          MadeChange = true;
> +
> +          // DeleteDeadInstruction can delete the current instruction in loop
> +          // cases, reset BBI.
> +          BBI = Inst->getIterator();
> +          if (BBI != BB.begin())
> +            --BBI;
> +          break;
> +        } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
> +                   ((OR == OverwriteBegin &&
> +                     isShortenableAtTheBeginning(DepWrite)))) {
> +          // TODO: base this on the target vector size so that if the earlier
> +          // store was too small to get vector writes anyway then its likely
> +          // a good idea to shorten it
> +          // Power of 2 vector writes are probably always a bad idea to optimize
> +          // as any store/memset/memcpy is likely using vector instructions so
> +          // shortening it to not vector size is likely to be slower
> +          MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
> +          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
> +          bool IsOverwriteEnd = (OR == OverwriteEnd);
> +          if (!IsOverwriteEnd)
> +            InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
> +
> +          if ((llvm::isPowerOf2_64(InstWriteOffset) &&
> +               DepWriteAlign <= InstWriteOffset) ||
> +              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
> +
> +            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
> +                         << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
> +                         << *DepWrite << "\n  KILLER (offset "
> +                         << InstWriteOffset << ", " << DepLoc.Size << ")"
> +                         << *Inst << '\n');
> +
> +            int64_t NewLength =
> +                IsOverwriteEnd
> +                    ? InstWriteOffset - DepWriteOffset
> +                    : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
> +
> +            Value *DepWriteLength = DepIntrinsic->getLength();
> +            Value *TrimmedLength =
> +                ConstantInt::get(DepWriteLength->getType(), NewLength);
> +            DepIntrinsic->setLength(TrimmedLength);
> +
> +            if (!IsOverwriteEnd) {
> +              int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
> +              Value *Indices[1] = {
> +                  ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
> +              GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
> +                  DepIntrinsic->getRawDest(), Indices, "", DepWrite);
> +              DepIntrinsic->setDest(NewDestGEP);
> +            }
> +            MadeChange = true;
> +          }
> +        }
> +      }
> +
> +      // If this is a may-aliased store that is clobbering the store value, we
> +      // can keep searching past it for another must-aliased pointer that stores
> +      // to the same location.  For example, in:
> +      //   store -> P
> +      //   store -> Q
> +      //   store -> P
> +      // we can remove the first store to P even though we don't know if P and Q
> +      // alias.
> +      if (DepWrite == &BB.front()) break;
> +
> +      // Can't look past this instruction if it might read 'Loc'.
> +      if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
> +        break;
> +
> +      InstDep = MD->getPointerDependencyFrom(Loc, false,
> +                                             DepWrite->getIterator(), &BB);
> +    }
>    }
>  
> -  // Remove objects that could alias LoadedLoc.
> -  DeadStackObjects.remove_if([&](Value *I) {
> -    // See if the loaded location could alias the stack location.
> -    MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
> -    return !AA->isNoAlias(StackLoc, LoadedLoc);
> -  });
> +  // If this block ends in a return, unwind, or unreachable, all allocas are
> +  // dead at its end, which means stores to them are also dead.
> +  if (BB.getTerminator()->getNumSuccessors() == 0)
> +    MadeChange |= handleEndBlock(BB, AA, MD, TLI);
> +
> +  return MadeChange;
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// DSE Pass
> +//===----------------------------------------------------------------------===//
> +PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
> +  bool Changed = false;
> +
> +  AliasAnalysis *AA = &AM.getResult<AAManager>(F);
> +  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
> +  MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
> +  const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
> +
> +  for (BasicBlock &BB : F)
> +    // Only check non-dead blocks.  Dead blocks may have strange pointer
> +    // cycles that will confuse alias analysis.
> +    if (DT->isReachableFromEntry(&BB))
> +      Changed |= runOnBasicBlock(BB, AA, MD, DT, TLI);

It's probably worth wrapping this loop in a function to avoid needing to
duplicate the logic. It can just be an overload of `eliminateDeadStores`
that takes a Function& instead of a BB&.

Especially since...

> +
> +  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
> +}
> +
> +/// A legacy pass for the legacy pass manager that wraps the \c DSE pass.
> +///
> +/// This is in the llvm namespace purely to allow it to be a friend of the \c
> +/// DSE pass.
> +class DSELegacyPass : public FunctionPass {
> +public:
> +  DSELegacyPass() : FunctionPass(ID) {
> +    initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
> +  }
> +
> +  bool runOnFunction(Function &F) override {
> +    if (skipFunction(F))
> +      return false;
> +
> +    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
> +    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
> +    MemoryDependenceResults *MD =
> +        &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
> +    const TargetLibraryInfo *TLI =
> +        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
> +
> +    bool Changed = false;
> +    for (BasicBlock &BB : F)
> +      Changed |= runOnBasicBlock(BB, AA, MD, DT, TLI);

... this is missing the isReachable check here! I find it a bit
concerning that no tests fail because of this, but maybe it's still
correct and just inefficient.

> +    return Changed;
> +  }
> +
> +  void getAnalysisUsage(AnalysisUsage &AU) const override {
> +    AU.setPreservesCFG();
> +    AU.addRequired<DominatorTreeWrapperPass>();
> +    AU.addRequired<AAResultsWrapperPass>();
> +    AU.addRequired<MemoryDependenceWrapperPass>();
> +    AU.addRequired<TargetLibraryInfoWrapperPass>();
> +    AU.addPreserved<DominatorTreeWrapperPass>();
> +    AU.addPreserved<GlobalsAAWrapperPass>();
> +    AU.addPreserved<MemoryDependenceWrapperPass>();
> +  }
> +
> +  static char ID; // Pass identification, replacement for typeid
> +};
> +
> +char DSELegacyPass::ID = 0;
> +INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
> +                      false)
> +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
> +INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
> +                    false)
> +
> +FunctionPass *llvm::createDeadStoreEliminationPass() {
> +  return new DSELegacyPass();
>  }
> Index: lib/Passes/PassRegistry.def
> ===================================================================
> --- lib/Passes/PassRegistry.def
> +++ lib/Passes/PassRegistry.def
> @@ -110,6 +110,7 @@
>  FUNCTION_PASS("aa-eval", AAEvaluator())
>  FUNCTION_PASS("adce", ADCEPass())
>  FUNCTION_PASS("dce", DCEPass())
> +FUNCTION_PASS("dse", DSEPass())
>  FUNCTION_PASS("early-cse", EarlyCSEPass())
>  FUNCTION_PASS("instcombine", InstCombinePass())
>  FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
> Index: lib/Passes/PassBuilder.cpp
> ===================================================================
> --- lib/Passes/PassBuilder.cpp
> +++ lib/Passes/PassBuilder.cpp
> @@ -64,6 +64,7 @@
>  #include "llvm/Transforms/PGOInstrumentation.h"
>  #include "llvm/Transforms/Scalar/ADCE.h"
>  #include "llvm/Transforms/Scalar/DCE.h"
> +#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
>  #include "llvm/Transforms/Scalar/EarlyCSE.h"
>  #include "llvm/Transforms/Scalar/GVN.h"
>  #include "llvm/Transforms/Scalar/LoopRotation.h"
> Index: include/llvm/Transforms/Scalar/DeadStoreElimination.h
> ===================================================================
> --- /dev/null
> +++ include/llvm/Transforms/Scalar/DeadStoreElimination.h
> @@ -0,0 +1,42 @@
> +//===- DeadStoreElimination.h - Fast Dead Store Elimination -------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file implements a trivial dead store elimination that only considers
> +// basic-block local redundant stores.
> +//
> +// FIXME: This should eventually be extended to be a post-dominator tree
> +// traversal.  Doing so would be pretty trivial.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_TRANSFORMS_SCALAR_DSE_H
> +#define LLVM_TRANSFORMS_SCALAR_DSE_H
> +
> +#include "llvm/ADT/SetVector.h"
> +#include "llvm/Analysis/AliasAnalysis.h"
> +#include "llvm/Analysis/MemoryDependenceAnalysis.h"
> +#include "llvm/Analysis/TargetLibraryInfo.h"
> +#include "llvm/IR/Dominators.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/PassManager.h"

Pretty sure you only need Function.h and PassManager.h at this point.

> +
> +namespace llvm {
> +
> +/// This class implements a trivial dead store elimination. We consider
> +/// only the redundant stores that are local to a single Basic Block.
> +class DSEPass : public PassInfoMixin<DSEPass> {
> +public:
> +  DSEPass() {}

No need to write out an empty constructor.

> +
> +  /// \brief Run the pass over the function.

I wouldn't bother with this doc string - this is all obvious from the
signature.

> +  PreservedAnalyses run(Function &F, AnalysisManager<Function> &FAM);
> +};
> +}
> +
> +#endif // LLVM_TRANSFORMS_SCALAR_DSE_H
> Index: include/llvm/InitializePasses.h
> ===================================================================
> --- include/llvm/InitializePasses.h
> +++ include/llvm/InitializePasses.h
> @@ -105,7 +105,7 @@
>  void initializeDAEPass(PassRegistry&);
>  void initializeDAHPass(PassRegistry&);
>  void initializeDCELegacyPassPass(PassRegistry&);
> -void initializeDSEPass(PassRegistry&);
> +void initializeDSELegacyPassPass(PassRegistry&);
>  void initializeDeadInstEliminationPass(PassRegistry&);
>  void initializeDeadMachineInstructionElimPass(PassRegistry&);
>  void initializeDelinearizationPass(PassRegistry &);


More information about the llvm-commits mailing list