<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, May 17, 2016 at 1:52 PM, Justin Bogner <span dir="ltr"><<a href="mailto:mail@justinbogner.com" target="_blank">mail@justinbogner.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-style:solid;border-left-color:rgb(204,204,204);padding-left:1ex"><span class="">Jake VanAdrighem <<a href="mailto:jvanadrighem@gmail.com">jvanadrighem@gmail.com</a>> writes:<br>
> JakeVanAdrighem updated this revision to Diff 57399.<br>
> JakeVanAdrighem marked an inline comment as done.<br>
> JakeVanAdrighem added a comment.<br>
><br>
> Rename runOnBasicBlock to eliminateDeadStores. Add an overload that<br>
> takes a Function instead of BasicBlock. Made all the helpers<br>
> static. Cleaned up function description comments. clang-format.<br>
><br>
> I considered discarding this patch and splitting it out but at this<br>
> point I'd rather leave it as a slight refactor + port to the new PM.<br>
<br>
</span>Makes sense. This LGTM.<br>
<br>
Do you have commit or do you need me to commit this for you?<br>
<span class=""><br></span></blockquote><div><br></div><div>I don't have commit access so if you could please commit the patch that would be much appreciated. Thank you for taking the time to actually review the patches.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-style:solid;border-left-color:rgb(204,204,204);padding-left:1ex"><span class="">
><br>
> Repository:<br>
> rL LLVM<br>
><br>
> <a href="http://reviews.llvm.org/D20146" rel="noreferrer" target="_blank">http://reviews.llvm.org/D20146</a><br>
><br>
> Files:<br>
> include/llvm/InitializePasses.h<br>
> include/llvm/Transforms/Scalar/DeadStoreElimination.h<br>
> lib/Passes/PassBuilder.cpp<br>
> lib/Passes/PassRegistry.def<br>
> lib/Transforms/Scalar/DeadStoreElimination.cpp<br>
> lib/Transforms/Scalar/Scalar.cpp<br>
> test/Transforms/DeadStoreElimination/simple.ll<br>
><br>
</span><div><div class="h5">> Index: test/Transforms/DeadStoreElimination/simple.ll<br>
> ===================================================================<br>
> --- test/Transforms/DeadStoreElimination/simple.ll<br>
> +++ test/Transforms/DeadStoreElimination/simple.ll<br>
> @@ -1,4 +1,5 @@<br>
> ; RUN: opt < %s -basicaa -dse -S | FileCheck %s<br>
> +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s<br>
> target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"<br>
><br>
> declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind<br>
> Index: lib/Transforms/Scalar/Scalar.cpp<br>
> ===================================================================<br>
> --- lib/Transforms/Scalar/Scalar.cpp<br>
> +++ lib/Transforms/Scalar/Scalar.cpp<br>
> @@ -40,7 +40,7 @@<br>
> initializeDCELegacyPassPass(Registry);<br>
> initializeDeadInstEliminationPass(Registry);<br>
> initializeScalarizerPass(Registry);<br>
> - initializeDSEPass(Registry);<br>
> + initializeDSELegacyPassPass(Registry);<br>
> initializeGVNLegacyPassPass(Registry);<br>
> initializeEarlyCSELegacyPassPass(Registry);<br>
> initializeFlattenCFGPassPass(Registry);<br>
> Index: lib/Transforms/Scalar/DeadStoreElimination.cpp<br>
> ===================================================================<br>
> --- lib/Transforms/Scalar/DeadStoreElimination.cpp<br>
> +++ lib/Transforms/Scalar/DeadStoreElimination.cpp<br>
> @@ -15,7 +15,7 @@<br>
> //<br>
> //===----------------------------------------------------------------------===//<br>
><br>
> -#include "llvm/Transforms/Scalar.h"<br>
> +#include "llvm/Transforms/Scalar/DeadStoreElimination.h"<br>
> #include "llvm/ADT/STLExtras.h"<br>
> #include "llvm/ADT/SetVector.h"<br>
> #include "llvm/ADT/Statistic.h"<br>
> @@ -36,6 +36,7 @@<br>
> #include "llvm/Pass.h"<br>
> #include "llvm/Support/Debug.h"<br>
> #include "llvm/Support/raw_ostream.h"<br>
> +#include "llvm/Transforms/Scalar.h"<br>
> #include "llvm/Transforms/Utils/Local.h"<br>
> using namespace llvm;<br>
><br>
</div></div>> @@ -45,80 +46,15 @@<br>
<div><div class="h5">> STATISTIC(NumFastStores, "Number of stores deleted");<br>
> STATISTIC(NumFastOther , "Number of other instrs removed");<br>
><br>
> -namespace {<br>
> - struct DSE : public FunctionPass {<br>
> - AliasAnalysis *AA;<br>
> - MemoryDependenceResults *MD;<br>
> - DominatorTree *DT;<br>
> - const TargetLibraryInfo *TLI;<br>
> -<br>
> - static char ID; // Pass identification, replacement for typeid<br>
> - DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {<br>
> - initializeDSEPass(*PassRegistry::getPassRegistry());<br>
> - }<br>
> -<br>
> - bool runOnFunction(Function &F) override {<br>
> - if (skipFunction(F))<br>
> - return false;<br>
> -<br>
> - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();<br>
> - MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();<br>
> - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();<br>
> - TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();<br>
> -<br>
> - bool Changed = false;<br>
> - for (BasicBlock &I : F)<br>
> - // Only check non-dead blocks. Dead blocks may have strange pointer<br>
> - // cycles that will confuse alias analysis.<br>
> - if (DT->isReachableFromEntry(&I))<br>
> - Changed |= runOnBasicBlock(I);<br>
> -<br>
> - AA = nullptr; MD = nullptr; DT = nullptr;<br>
> - return Changed;<br>
> - }<br>
> -<br>
> - bool runOnBasicBlock(BasicBlock &BB);<br>
> - bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);<br>
> - bool HandleFree(CallInst *F);<br>
> - bool handleEndBlock(BasicBlock &BB);<br>
> - void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,<br>
> - SmallSetVector<Value *, 16> &DeadStackObjects,<br>
> - const DataLayout &DL);<br>
> -<br>
> - void getAnalysisUsage(AnalysisUsage &AU) const override {<br>
> - AU.setPreservesCFG();<br>
> - AU.addRequired<DominatorTreeWrapperPass>();<br>
> - AU.addRequired<AAResultsWrapperPass>();<br>
> - AU.addRequired<MemoryDependenceWrapperPass>();<br>
> - AU.addRequired<TargetLibraryInfoWrapperPass>();<br>
> - AU.addPreserved<DominatorTreeWrapperPass>();<br>
> - AU.addPreserved<GlobalsAAWrapperPass>();<br>
> - AU.addPreserved<MemoryDependenceWrapperPass>();<br>
> - }<br>
> - };<br>
> -}<br>
> -<br>
> -char DSE::ID = 0;<br>
> -INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)<br>
> -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)<br>
> -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)<br>
> -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)<br>
> -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)<br>
> -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)<br>
> -INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)<br>
> -<br>
> -FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }<br>
><br>
> //===----------------------------------------------------------------------===//<br>
> // Helper functions<br>
</div></div>> //===----------------------------------------------------------------------===//<br>
><br>
> -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through<br>
> -/// and zero out all the operands of this instruction. If any of them become<br>
> -/// dead, delete them and the computation tree that feeds them.<br>
> -///<br>
> +/// Delete this instruction. Before we do, go through and zero out all the<br>
> +/// operands of this instruction. If any of them become dead, delete them and<br>
> +/// the computation tree that feeds them.<br>
> /// If ValueSet is non-null, remove any deleted instructions from it as well.<br>
> -///<br>
> static void DeleteDeadInstruction(Instruction *I,<br>
> MemoryDependenceResults &MD,<br>
> const TargetLibraryInfo &TLI,<br>
> @@ -156,9 +92,8 @@<br>
> } while (!NowDeadInsts.empty());<br>
> }<br>
><br>
> -<br>
> -/// hasMemoryWrite - Does this instruction write some memory? This only returns<br>
> -/// true for things that we can analyze with other helpers below.<br>
> +/// Does this instruction write some memory? This only returns true for things<br>
> +/// that we can analyze with other helpers below.<br>
> static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {<br>
> if (isa<StoreInst>(I))<br>
> return true;<br>
> @@ -197,9 +132,9 @@<br>
> return false;<br>
> }<br>
><br>
> -/// getLocForWrite - Return a Location stored to by the specified instruction.<br>
> -/// If isRemovable returns true, this function and getLocForRead completely<br>
> -/// describe the memory operations for this instruction.<br>
> +/// Return a Location stored to by the specified instruction. If isRemovable<br>
> +/// returns true, this function and getLocForRead completely describe the memory<br>
> +/// operations for this instruction.<br>
> static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {<br>
<span class="">> if (StoreInst *SI = dyn_cast<StoreInst>(Inst))<br>
</span>> return MemoryLocation::get(SI);<br>
> @@ -228,8 +163,8 @@<br>
> }<br>
> }<br>
><br>
> -/// getLocForRead - Return the location read by the specified "hasMemoryWrite"<br>
> -/// instruction if any.<br>
> +/// Return the location read by the specified "hasMemoryWrite" instruction if<br>
> +/// any.<br>
> static MemoryLocation getLocForRead(Instruction *Inst,<br>
> const TargetLibraryInfo &TLI) {<br>
> assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");<br>
> @@ -241,9 +176,8 @@<br>
> return MemoryLocation();<br>
> }<br>
><br>
> -<br>
> -/// isRemovable - If the value of this instruction and the memory it writes to<br>
> -/// is unused, may we delete this instruction?<br>
> +/// If the value of this instruction and the memory it writes to is unused, may<br>
> +/// we delete this instruction?<br>
> static bool isRemovable(Instruction *I) {<br>
> // Don't remove volatile/atomic stores.<br>
> if (StoreInst *SI = dyn_cast<StoreInst>(I))<br>
> @@ -307,7 +241,7 @@<br>
> return II && II->getIntrinsicID() == Intrinsic::memset;<br>
> }<br>
><br>
> -/// getStoredPointerOperand - Return the pointer that is being written to.<br>
> +/// Return the pointer that is being written to.<br>
> static Value *getStoredPointerOperand(Instruction *I) {<br>
> if (StoreInst *SI = dyn_cast<StoreInst>(I))<br>
> return SI->getPointerOperand();<br>
> @@ -458,7 +392,7 @@<br>
> return OverwriteUnknown;<br>
> }<br>
><br>
> -/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a<br>
> +/// If 'Inst' might be a self read (i.e. a noop copy of a<br>
> /// memory region into an identical pointer) then it doesn't actually make its<br>
> /// input dead in the traditional sense. Consider this case:<br>
> ///<br>
> @@ -503,212 +437,13 @@<br>
<div><div class="h5">> }<br>
><br>
><br>
> -//===----------------------------------------------------------------------===//<br>
> -// DSE Pass<br>
> -//===----------------------------------------------------------------------===//<br>
> -<br>
> -bool DSE::runOnBasicBlock(BasicBlock &BB) {<br>
> - const DataLayout &DL = BB.getModule()->getDataLayout();<br>
> - bool MadeChange = false;<br>
> -<br>
> - // Do a top-down walk on the BB.<br>
> - for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {<br>
> - Instruction *Inst = &*BBI++;<br>
> -<br>
> - // Handle 'free' calls specially.<br>
> - if (CallInst *F = isFreeCall(Inst, TLI)) {<br>
> - MadeChange |= HandleFree(F);<br>
> - continue;<br>
> - }<br>
> -<br>
> - // If we find something that writes memory, get its memory dependence.<br>
> - if (!hasMemoryWrite(Inst, *TLI))<br>
> - continue;<br>
> -<br>
> - // If we're storing the same value back to a pointer that we just<br>
> - // loaded from, then the store can be removed.<br>
> - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {<br>
> -<br>
> - auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {<br>
> - // DeleteDeadInstruction can delete the current instruction. Save BBI<br>
> - // in case we need it.<br>
> - WeakVH NextInst(&*BBI);<br>
> -<br>
> - DeleteDeadInstruction(DeadInst, *MD, *TLI);<br>
> -<br>
> - if (!NextInst) // Next instruction deleted.<br>
> - BBI = BB.begin();<br>
> - else if (BBI != BB.begin()) // Revisit this instruction if possible.<br>
> - --BBI;<br>
> - ++NumRedundantStores;<br>
> - MadeChange = true;<br>
> - };<br>
> -<br>
> - if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {<br>
> - if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&<br>
> - isRemovable(SI) &&<br>
> - MemoryIsNotModifiedBetween(DepLoad, SI)) {<br>
> -<br>
> - DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "<br>
> - << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');<br>
> -<br>
> - RemoveDeadInstAndUpdateBBI(SI);<br>
> - continue;<br>
> - }<br>
> - }<br>
> -<br>
> - // Remove null stores into the calloc'ed objects<br>
> - Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());<br>
> -<br>
> - if (StoredConstant && StoredConstant->isNullValue() &&<br>
> - isRemovable(SI)) {<br>
> - Instruction *UnderlyingPointer = dyn_cast<Instruction>(<br>
> - GetUnderlyingObject(SI->getPointerOperand(), DL));<br>
> -<br>
> - if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&<br>
> - MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {<br>
> - DEBUG(dbgs()<br>
> - << "DSE: Remove null store to the calloc'ed object:\n DEAD: "<br>
> - << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');<br>
> -<br>
> - RemoveDeadInstAndUpdateBBI(SI);<br>
> - continue;<br>
> - }<br>
> - }<br>
> - }<br>
> -<br>
> - MemDepResult InstDep = MD->getDependency(Inst);<br>
> -<br>
> - // Ignore any store where we can't find a local dependence.<br>
> - // FIXME: cross-block DSE would be fun. :)<br>
> - if (!InstDep.isDef() && !InstDep.isClobber())<br>
> - continue;<br>
> -<br>
> - // Figure out what location is being stored to.<br>
> - MemoryLocation Loc = getLocForWrite(Inst, *AA);<br>
> -<br>
> - // If we didn't get a useful location, fail.<br>
> - if (!Loc.Ptr)<br>
> - continue;<br>
> -<br>
> - while (InstDep.isDef() || InstDep.isClobber()) {<br>
> - // Get the memory clobbered by the instruction we depend on. MemDep will<br>
> - // skip any instructions that 'Loc' clearly doesn't interact with. If we<br>
> - // end up depending on a may- or must-aliased load, then we can't optimize<br>
> - // away the store and we bail out. However, if we depend on on something<br>
> - // that overwrites the memory location we *can* potentially optimize it.<br>
> - //<br>
> - // Find out what memory location the dependent instruction stores.<br>
> - Instruction *DepWrite = InstDep.getInst();<br>
> - MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);<br>
> - // If we didn't get a useful location, or if it isn't a size, bail out.<br>
> - if (!DepLoc.Ptr)<br>
> - break;<br>
> -<br>
> - // If we find a write that is a) removable (i.e., non-volatile), b) is<br>
> - // completely obliterated by the store to 'Loc', and c) which we know that<br>
> - // 'Inst' doesn't load from, then we can remove it.<br>
> - if (isRemovable(DepWrite) &&<br>
> - !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {<br>
> - int64_t InstWriteOffset, DepWriteOffset;<br>
> - OverwriteResult OR =<br>
> - isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);<br>
> - if (OR == OverwriteComplete) {<br>
> - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "<br>
> - << *DepWrite << "\n KILLER: " << *Inst << '\n');<br>
> -<br>
> - // Delete the store and now-dead instructions that feed it.<br>
> - DeleteDeadInstruction(DepWrite, *MD, *TLI);<br>
> - ++NumFastStores;<br>
> - MadeChange = true;<br>
> -<br>
> - // DeleteDeadInstruction can delete the current instruction in loop<br>
> - // cases, reset BBI.<br>
> - BBI = Inst->getIterator();<br>
> - if (BBI != BB.begin())<br>
> - --BBI;<br>
> - break;<br>
> - } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||<br>
> - ((OR == OverwriteBegin &&<br>
> - isShortenableAtTheBeginning(DepWrite)))) {<br>
> - // TODO: base this on the target vector size so that if the earlier<br>
> - // store was too small to get vector writes anyway then its likely<br>
> - // a good idea to shorten it<br>
> - // Power of 2 vector writes are probably always a bad idea to optimize<br>
> - // as any store/memset/memcpy is likely using vector instructions so<br>
> - // shortening it to not vector size is likely to be slower<br>
> - MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);<br>
> - unsigned DepWriteAlign = DepIntrinsic->getAlignment();<br>
> - bool IsOverwriteEnd = (OR == OverwriteEnd);<br>
> - if (!IsOverwriteEnd)<br>
> - InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);<br>
> -<br>
> - if ((llvm::isPowerOf2_64(InstWriteOffset) &&<br>
> - DepWriteAlign <= InstWriteOffset) ||<br>
> - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {<br>
> -<br>
> - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "<br>
> - << (IsOverwriteEnd ? "END" : "BEGIN") << ": "<br>
> - << *DepWrite << "\n KILLER (offset "<br>
> - << InstWriteOffset << ", " << DepLoc.Size << ")"<br>
> - << *Inst << '\n');<br>
> -<br>
> - int64_t NewLength =<br>
> - IsOverwriteEnd<br>
> - ? InstWriteOffset - DepWriteOffset<br>
> - : DepLoc.Size - (InstWriteOffset - DepWriteOffset);<br>
> -<br>
> - Value *DepWriteLength = DepIntrinsic->getLength();<br>
> - Value *TrimmedLength =<br>
> - ConstantInt::get(DepWriteLength->getType(), NewLength);<br>
> - DepIntrinsic->setLength(TrimmedLength);<br>
> -<br>
> - if (!IsOverwriteEnd) {<br>
> - int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);<br>
> - Value *Indices[1] = {<br>
> - ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};<br>
> - GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(<br>
> - DepIntrinsic->getRawDest(), Indices, "", DepWrite);<br>
> - DepIntrinsic->setDest(NewDestGEP);<br>
> - }<br>
> - MadeChange = true;<br>
> - }<br>
> - }<br>
> - }<br>
> -<br>
> - // If this is a may-aliased store that is clobbering the store value, we<br>
> - // can keep searching past it for another must-aliased pointer that stores<br>
> - // to the same location. For example, in:<br>
> - // store -> P<br>
> - // store -> Q<br>
> - // store -> P<br>
> - // we can remove the first store to P even though we don't know if P and Q<br>
> - // alias.<br>
> - if (DepWrite == &BB.front()) break;<br>
> -<br>
> - // Can't look past this instruction if it might read 'Loc'.<br>
> - if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)<br>
> - break;<br>
> -<br>
> - InstDep = MD->getPointerDependencyFrom(Loc, false,<br>
> - DepWrite->getIterator(), &BB);<br>
> - }<br>
> - }<br>
> -<br>
> - // If this block ends in a return, unwind, or unreachable, all allocas are<br>
> - // dead at its end, which means stores to them are also dead.<br>
> - if (BB.getTerminator()->getNumSuccessors() == 0)<br>
> - MadeChange |= handleEndBlock(BB);<br>
> -<br>
> - return MadeChange;<br>
> -}<br>
> -<br>
> /// Returns true if the memory which is accessed by the second instruction is not<br>
> /// modified between the first and the second instruction.<br>
> /// Precondition: Second instruction must be dominated by the first<br>
> /// instruction.<br>
> -bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,<br>
> - Instruction *SecondI) {<br>
</div></div>> +static bool MemoryIsNotModifiedBetween(Instruction *FirstI,<br>
<span class="">> + Instruction *SecondI,<br>
> + AliasAnalysis *AA) {<br>
</span><span class="">> SmallVector<BasicBlock *, 16> WorkList;<br>
> SmallPtrSet<BasicBlock *, 8> Visited;<br>
> BasicBlock::iterator FirstBBI(FirstI);<br>
</span>> @@ -777,9 +512,11 @@<br>
> }<br>
> }<br>
><br>
> -/// HandleFree - Handle frees of entire structures whose dependency is a store<br>
> +/// Handle frees of entire structures whose dependency is a store<br>
<span class="">> /// to a field of that structure.<br>
> -bool DSE::HandleFree(CallInst *F) {<br>
</span>> +static bool HandleFree(CallInst *F, AliasAnalysis *AA,<br>
<span class="">> + MemoryDependenceResults *MD, DominatorTree *DT,<br>
> + const TargetLibraryInfo *TLI) {<br>
> bool MadeChange = false;<br>
><br>
> MemoryLocation Loc = MemoryLocation(F->getOperand(0));<br>
</span>> @@ -828,13 +565,43 @@<br>
> return MadeChange;<br>
> }<br>
><br>
> -/// handleEndBlock - Remove dead stores to stack-allocated locations in the<br>
> -/// function end block. Ex:<br>
> +/// Check to see if the specified location may alias any of the stack objects in<br>
> +/// the DeadStackObjects set. If so, they become live because the location is<br>
> +/// being loaded.<br>
> +static void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,<br>
<span class="">> + SmallSetVector<Value *, 16> &DeadStackObjects,<br>
</span><span class="">> + const DataLayout &DL, AliasAnalysis *AA,<br>
</span>> + const TargetLibraryInfo *TLI) {<br>
<span class="">> + const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);<br>
> +<br>
> + // A constant can't be in the dead pointer set.<br>
> + if (isa<Constant>(UnderlyingPointer))<br>
> + return;<br>
> +<br>
> + // If the kill pointer can be easily reduced to an alloca, don't bother doing<br>
> + // extraneous AA queries.<br>
> + if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {<br>
> + DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));<br>
> + return;<br>
> + }<br>
> +<br>
> + // Remove objects that could alias LoadedLoc.<br>
> + DeadStackObjects.remove_if([&](Value *I) {<br>
> + // See if the loaded location could alias the stack location.<br>
> + MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));<br>
> + return !AA->isNoAlias(StackLoc, LoadedLoc);<br>
> + });<br>
> +}<br>
> +<br>
</span>> +/// Remove dead stores to stack-allocated locations in the function end block.<br>
> +/// Ex:<br>
<span class="">> /// %A = alloca i32<br>
> /// ...<br>
> /// store i32 1, i32* %A<br>
> /// ret void<br>
> -bool DSE::handleEndBlock(BasicBlock &BB) {<br>
</span>> +static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,<br>
<span class="">> + MemoryDependenceResults *MD,<br>
> + const TargetLibraryInfo *TLI) {<br>
> bool MadeChange = false;<br>
><br>
> // Keep track of all of the stack objects that are dead at the end of the<br>
</span>> @@ -967,7 +734,7 @@<br>
<span class="">><br>
> // Remove any allocas from the DeadPointer set that are loaded, as this<br>
> // makes any stores above the access live.<br>
> - RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL);<br>
> + RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI);<br>
><br>
> // If all of the allocas were clobbered by the access then we're not going<br>
> // to find anything else to process.<br>
</span>> @@ -978,29 +745,275 @@<br>
<span class="">> return MadeChange;<br>
> }<br>
><br>
> -/// RemoveAccessedObjects - Check to see if the specified location may alias any<br>
> -/// of the stack objects in the DeadStackObjects set. If so, they become live<br>
> -/// because the location is being loaded.<br>
> -void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,<br>
> - SmallSetVector<Value *, 16> &DeadStackObjects,<br>
> - const DataLayout &DL) {<br>
> - const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);<br>
</span>> +static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,<br>
<span class="">> + MemoryDependenceResults *MD, DominatorTree *DT,<br>
> + const TargetLibraryInfo *TLI) {<br>
</span><div><div class="h5">> + const DataLayout &DL = BB.getModule()->getDataLayout();<br>
> + bool MadeChange = false;<br>
><br>
> - // A constant can't be in the dead pointer set.<br>
> - if (isa<Constant>(UnderlyingPointer))<br>
> - return;<br>
> + // Do a top-down walk on the BB.<br>
> + for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {<br>
> + Instruction *Inst = &*BBI++;<br>
><br>
> - // If the kill pointer can be easily reduced to an alloca, don't bother doing<br>
> - // extraneous AA queries.<br>
> - if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {<br>
> - DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));<br>
> - return;<br>
> + // Handle 'free' calls specially.<br>
> + if (CallInst *F = isFreeCall(Inst, TLI)) {<br>
> + MadeChange |= HandleFree(F, AA, MD, DT, TLI);<br>
> + continue;<br>
> + }<br>
> +<br>
> + // If we find something that writes memory, get its memory dependence.<br>
> + if (!hasMemoryWrite(Inst, *TLI))<br>
> + continue;<br>
> +<br>
> + // If we're storing the same value back to a pointer that we just<br>
> + // loaded from, then the store can be removed.<br>
> + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {<br>
> +<br>
> + auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {<br>
> + // DeleteDeadInstruction can delete the current instruction. Save BBI<br>
> + // in case we need it.<br>
> + WeakVH NextInst(&*BBI);<br>
> +<br>
> + DeleteDeadInstruction(DeadInst, *MD, *TLI);<br>
> +<br>
> + if (!NextInst) // Next instruction deleted.<br>
> + BBI = BB.begin();<br>
> + else if (BBI != BB.begin()) // Revisit this instruction if possible.<br>
> + --BBI;<br>
> + ++NumRedundantStores;<br>
> + MadeChange = true;<br>
> + };<br>
> +<br>
> + if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {<br>
> + if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&<br>
> + isRemovable(SI) &&<br>
> + MemoryIsNotModifiedBetween(DepLoad, SI, AA)) {<br>
> +<br>
> + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "<br>
> + << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');<br>
> +<br>
> + RemoveDeadInstAndUpdateBBI(SI);<br>
> + continue;<br>
> + }<br>
> + }<br>
> +<br>
> + // Remove null stores into the calloc'ed objects<br>
> + Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());<br>
> +<br>
> + if (StoredConstant && StoredConstant->isNullValue() &&<br>
> + isRemovable(SI)) {<br>
> + Instruction *UnderlyingPointer = dyn_cast<Instruction>(<br>
> + GetUnderlyingObject(SI->getPointerOperand(), DL));<br>
> +<br>
> + if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&<br>
> + MemoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) {<br>
> + DEBUG(dbgs()<br>
> + << "DSE: Remove null store to the calloc'ed object:\n DEAD: "<br>
> + << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');<br>
> +<br>
> + RemoveDeadInstAndUpdateBBI(SI);<br>
> + continue;<br>
> + }<br>
> + }<br>
> + }<br>
> +<br>
> + MemDepResult InstDep = MD->getDependency(Inst);<br>
> +<br>
> + // Ignore any store where we can't find a local dependence.<br>
> + // FIXME: cross-block DSE would be fun. :)<br>
> + if (!InstDep.isDef() && !InstDep.isClobber())<br>
> + continue;<br>
> +<br>
> + // Figure out what location is being stored to.<br>
> + MemoryLocation Loc = getLocForWrite(Inst, *AA);<br>
> +<br>
> + // If we didn't get a useful location, fail.<br>
> + if (!Loc.Ptr)<br>
> + continue;<br>
> +<br>
> + while (InstDep.isDef() || InstDep.isClobber()) {<br>
> + // Get the memory clobbered by the instruction we depend on. MemDep will<br>
> + // skip any instructions that 'Loc' clearly doesn't interact with. If we<br>
> + // end up depending on a may- or must-aliased load, then we can't optimize<br>
> + // away the store and we bail out. However, if we depend on on something<br>
> + // that overwrites the memory location we *can* potentially optimize it.<br>
> + //<br>
> + // Find out what memory location the dependent instruction stores.<br>
> + Instruction *DepWrite = InstDep.getInst();<br>
> + MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);<br>
> + // If we didn't get a useful location, or if it isn't a size, bail out.<br>
> + if (!DepLoc.Ptr)<br>
> + break;<br>
> +<br>
> + // If we find a write that is a) removable (i.e., non-volatile), b) is<br>
> + // completely obliterated by the store to 'Loc', and c) which we know that<br>
> + // 'Inst' doesn't load from, then we can remove it.<br>
> + if (isRemovable(DepWrite) &&<br>
> + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {<br>
> + int64_t InstWriteOffset, DepWriteOffset;<br>
> + OverwriteResult OR =<br>
> + isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);<br>
> + if (OR == OverwriteComplete) {<br>
> + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "<br>
> + << *DepWrite << "\n KILLER: " << *Inst << '\n');<br>
> +<br>
> + // Delete the store and now-dead instructions that feed it.<br>
> + DeleteDeadInstruction(DepWrite, *MD, *TLI);<br>
> + ++NumFastStores;<br>
> + MadeChange = true;<br>
> +<br>
> + // DeleteDeadInstruction can delete the current instruction in loop<br>
> + // cases, reset BBI.<br>
> + BBI = Inst->getIterator();<br>
> + if (BBI != BB.begin())<br>
> + --BBI;<br>
> + break;<br>
> + } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||<br>
> + ((OR == OverwriteBegin &&<br>
> + isShortenableAtTheBeginning(DepWrite)))) {<br>
> + // TODO: base this on the target vector size so that if the earlier<br>
> + // store was too small to get vector writes anyway then its likely<br>
> + // a good idea to shorten it<br>
> + // Power of 2 vector writes are probably always a bad idea to optimize<br>
> + // as any store/memset/memcpy is likely using vector instructions so<br>
> + // shortening it to not vector size is likely to be slower<br>
> + MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);<br>
> + unsigned DepWriteAlign = DepIntrinsic->getAlignment();<br>
> + bool IsOverwriteEnd = (OR == OverwriteEnd);<br>
> + if (!IsOverwriteEnd)<br>
> + InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);<br>
> +<br>
> + if ((llvm::isPowerOf2_64(InstWriteOffset) &&<br>
> + DepWriteAlign <= InstWriteOffset) ||<br>
> + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {<br>
> +<br>
> + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "<br>
> + << (IsOverwriteEnd ? "END" : "BEGIN") << ": "<br>
> + << *DepWrite << "\n KILLER (offset "<br>
> + << InstWriteOffset << ", " << DepLoc.Size << ")"<br>
> + << *Inst << '\n');<br>
> +<br>
> + int64_t NewLength =<br>
> + IsOverwriteEnd<br>
> + ? InstWriteOffset - DepWriteOffset<br>
> + : DepLoc.Size - (InstWriteOffset - DepWriteOffset);<br>
> +<br>
> + Value *DepWriteLength = DepIntrinsic->getLength();<br>
> + Value *TrimmedLength =<br>
> + ConstantInt::get(DepWriteLength->getType(), NewLength);<br>
> + DepIntrinsic->setLength(TrimmedLength);<br>
> +<br>
> + if (!IsOverwriteEnd) {<br>
> + int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);<br>
> + Value *Indices[1] = {<br>
> + ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};<br>
> + GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(<br>
> + DepIntrinsic->getRawDest(), Indices, "", DepWrite);<br>
> + DepIntrinsic->setDest(NewDestGEP);<br>
> + }<br>
> + MadeChange = true;<br>
> + }<br>
> + }<br>
> + }<br>
> +<br>
> + // If this is a may-aliased store that is clobbering the store value, we<br>
> + // can keep searching past it for another must-aliased pointer that stores<br>
> + // to the same location. For example, in:<br>
> + // store -> P<br>
> + // store -> Q<br>
> + // store -> P<br>
> + // we can remove the first store to P even though we don't know if P and Q<br>
> + // alias.<br>
> + if (DepWrite == &BB.front()) break;<br>
> +<br>
> + // Can't look past this instruction if it might read 'Loc'.<br>
> + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)<br>
> + break;<br>
> +<br>
> + InstDep = MD->getPointerDependencyFrom(Loc, false,<br>
> + DepWrite->getIterator(), &BB);<br>
> + }<br>
> }<br>
><br>
> - // Remove objects that could alias LoadedLoc.<br>
> - DeadStackObjects.remove_if([&](Value *I) {<br>
> - // See if the loaded location could alias the stack location.<br>
> - MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));<br>
> - return !AA->isNoAlias(StackLoc, LoadedLoc);<br>
> - });<br>
> + // If this block ends in a return, unwind, or unreachable, all allocas are<br>
> + // dead at its end, which means stores to them are also dead.<br>
> + if (BB.getTerminator()->getNumSuccessors() == 0)<br>
> + MadeChange |= handleEndBlock(BB, AA, MD, TLI);<br>
> +<br>
> + return MadeChange;<br>
> +}<br>
> +<br>
</div></div>> +static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,<br>
<span class="">> + MemoryDependenceResults *MD, DominatorTree *DT,<br>
> + const TargetLibraryInfo *TLI) {<br>
</span><span class="">> + bool MadeChange = false;<br>
</span><span class="">> + for (BasicBlock &BB : F)<br>
> + // Only check non-dead blocks. Dead blocks may have strange pointer<br>
> + // cycles that will confuse alias analysis.<br>
> + if (DT->isReachableFromEntry(&BB))<br>
</span>> + MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI);<br>
<span class="">> + return MadeChange;<br>
> +}<br>
> +<br>
> +//===----------------------------------------------------------------------===//<br>
> +// DSE Pass<br>
> +//===----------------------------------------------------------------------===//<br>
> +PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {<br>
</span><span class="">> + AliasAnalysis *AA = &AM.getResult<AAManager>(F);<br>
> + DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);<br>
> + MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);<br>
> + const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);<br>
> +<br>
</span>> + return eliminateDeadStores(F, AA, MD, DT, TLI) ? PreservedAnalyses::none()<br>
> + : PreservedAnalyses::all();<br>
> +}<br>
> +<br>
> +/// A legacy pass for the legacy pass manager that wraps \c DSEPass.<br>
<span class="">> +class DSELegacyPass : public FunctionPass {<br>
> +public:<br>
> + DSELegacyPass() : FunctionPass(ID) {<br>
> + initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());<br>
> + }<br>
> +<br>
> + bool runOnFunction(Function &F) override {<br>
> + if (skipFunction(F))<br>
> + return false;<br>
> +<br>
> + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();<br>
> + AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();<br>
> + MemoryDependenceResults *MD =<br>
> + &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();<br>
> + const TargetLibraryInfo *TLI =<br>
> + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();<br>
> +<br>
</span>> + return eliminateDeadStores(F, AA, MD, DT, TLI);<br>
> + }<br>
<div><div class="h5">> +<br>
> + void getAnalysisUsage(AnalysisUsage &AU) const override {<br>
> + AU.setPreservesCFG();<br>
> + AU.addRequired<DominatorTreeWrapperPass>();<br>
> + AU.addRequired<AAResultsWrapperPass>();<br>
> + AU.addRequired<MemoryDependenceWrapperPass>();<br>
> + AU.addRequired<TargetLibraryInfoWrapperPass>();<br>
> + AU.addPreserved<DominatorTreeWrapperPass>();<br>
> + AU.addPreserved<GlobalsAAWrapperPass>();<br>
> + AU.addPreserved<MemoryDependenceWrapperPass>();<br>
> + }<br>
> +<br>
> + static char ID; // Pass identification, replacement for typeid<br>
> +};<br>
> +<br>
> +char DSELegacyPass::ID = 0;<br>
> +INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,<br>
> + false)<br>
> +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)<br>
> +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)<br>
> +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)<br>
> +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)<br>
> +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)<br>
> +INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,<br>
> + false)<br>
> +<br>
> +FunctionPass *llvm::createDeadStoreEliminationPass() {<br>
> + return new DSELegacyPass();<br>
> }<br>
> Index: lib/Passes/PassRegistry.def<br>
> ===================================================================<br>
> --- lib/Passes/PassRegistry.def<br>
> +++ lib/Passes/PassRegistry.def<br>
</div></div>> @@ -111,6 +111,7 @@<br>
<span class="">> FUNCTION_PASS("aa-eval", AAEvaluator())<br>
> FUNCTION_PASS("adce", ADCEPass())<br>
> FUNCTION_PASS("dce", DCEPass())<br>
> +FUNCTION_PASS("dse", DSEPass())<br>
> FUNCTION_PASS("early-cse", EarlyCSEPass())<br>
> FUNCTION_PASS("instcombine", InstCombinePass())<br>
> FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())<br>
> Index: lib/Passes/PassBuilder.cpp<br>
> ===================================================================<br>
> --- lib/Passes/PassBuilder.cpp<br>
> +++ lib/Passes/PassBuilder.cpp<br>
> @@ -64,6 +64,7 @@<br>
> #include "llvm/Transforms/PGOInstrumentation.h"<br>
> #include "llvm/Transforms/Scalar/ADCE.h"<br>
> #include "llvm/Transforms/Scalar/DCE.h"<br>
> +#include "llvm/Transforms/Scalar/DeadStoreElimination.h"<br>
> #include "llvm/Transforms/Scalar/EarlyCSE.h"<br>
> #include "llvm/Transforms/Scalar/GVN.h"<br>
> #include "llvm/Transforms/Scalar/LoopRotation.h"<br>
> Index: include/llvm/Transforms/Scalar/DeadStoreElimination.h<br>
> ===================================================================<br>
> --- /dev/null<br>
> +++ include/llvm/Transforms/Scalar/DeadStoreElimination.h<br>
</span>> @@ -0,0 +1,34 @@<br>
<span class="">> +//===- DeadStoreElimination.h - Fast Dead Store Elimination -------------===//<br>
> +//<br>
> +// The LLVM Compiler Infrastructure<br>
> +//<br>
> +// This file is distributed under the University of Illinois Open Source<br>
> +// License. See LICENSE.TXT for details.<br>
> +//<br>
> +//===----------------------------------------------------------------------===//<br>
> +//<br>
> +// This file implements a trivial dead store elimination that only considers<br>
> +// basic-block local redundant stores.<br>
> +//<br>
> +// FIXME: This should eventually be extended to be a post-dominator tree<br>
> +// traversal. Doing so would be pretty trivial.<br>
> +//<br>
> +//===----------------------------------------------------------------------===//<br>
> +<br>
> +#ifndef LLVM_TRANSFORMS_SCALAR_DSE_H<br>
> +#define LLVM_TRANSFORMS_SCALAR_DSE_H<br>
> +<br>
</span>> +#include "llvm/IR/Function.h"<br>
> +#include "llvm/IR/PassManager.h"<br>
<span class="im">> +<br>
> +namespace llvm {<br>
> +<br>
> +/// This class implements a trivial dead store elimination. We consider<br>
> +/// only the redundant stores that are local to a single Basic Block.<br>
> +class DSEPass : public PassInfoMixin<DSEPass> {<br>
> +public:<br>
</span><div class=""><div class="h5">> + PreservedAnalyses run(Function &F, AnalysisManager<Function> &FAM);<br>
> +};<br>
> +}<br>
> +<br>
> +#endif // LLVM_TRANSFORMS_SCALAR_DSE_H<br>
> Index: include/llvm/InitializePasses.h<br>
> ===================================================================<br>
> --- include/llvm/InitializePasses.h<br>
> +++ include/llvm/InitializePasses.h<br>
> @@ -105,7 +105,7 @@<br>
> void initializeDAEPass(PassRegistry&);<br>
> void initializeDAHPass(PassRegistry&);<br>
> void initializeDCELegacyPassPass(PassRegistry&);<br>
> -void initializeDSEPass(PassRegistry&);<br>
> +void initializeDSELegacyPassPass(PassRegistry&);<br>
> void initializeDeadInstEliminationPass(PassRegistry&);<br>
> void initializeDeadMachineInstructionElimPass(PassRegistry&);<br>
> void initializeDelinearizationPass(PassRegistry &);<br>
</div></div></blockquote></div><br></div></div>