[PATCH] D14148: [GlobalOpt] Demote globals to locals more aggressively
Duncan P. N. Exon Smith via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 12 09:39:22 PST 2015
> On 2015-Nov-12, at 08:22, James Molloy <james.molloy at arm.com> wrote:
>
> jmolloy updated this revision to Diff 40057.
> jmolloy added a comment.
>
> Hi Manman, Mehdi,
>
> Updated patch attached.
>
> - Now, Clang knows how to add norecurse to "main", so we can remove that logic (hooray!)
> - Removed all the norecurse logic, replaced with a query to F->doesNotRecurse()
>
> Please take a look.
>
> Cheers,
>
> James
>
>
> Repository:
> rL LLVM
>
> http://reviews.llvm.org/D14148
>
> Files:
> lib/Transforms/IPO/GlobalOpt.cpp
> test/Transforms/GlobalOpt/global-demotion.ll
> test/Transforms/GlobalOpt/metadata.ll
> test/Transforms/MergeFunc/crash2.ll
>
> <D14148.40057.patch>
> Index: test/Transforms/GlobalOpt/metadata.ll
> ===================================================================
> --- test/Transforms/GlobalOpt/metadata.ll
> +++ test/Transforms/GlobalOpt/metadata.ll
> @@ -5,10 +5,11 @@
> ; to that containing %G should likewise drop to null.
> @G = internal global i8** null
>
> -define i32 @main(i32 %argc, i8** %argv) {
> +define i32 @main(i32 %argc, i8** %argv) norecurse {
> ; CHECK-LABEL: @main(
> ; CHECK: %G = alloca
> store i8** %argv, i8*** @G
> + %1 = load i8**, i8*** @G
What's this new `load` needed for?
> ret i32 0
> }
>
> Index: lib/Transforms/IPO/GlobalOpt.cpp
> ===================================================================
> --- lib/Transforms/IPO/GlobalOpt.cpp
> +++ lib/Transforms/IPO/GlobalOpt.cpp
> @@ -28,6 +28,7 @@
> #include "llvm/IR/Constants.h"
> #include "llvm/IR/DataLayout.h"
> #include "llvm/IR/DerivedTypes.h"
> +#include "llvm/IR/Dominators.h"
> #include "llvm/IR/GetElementPtrTypeIterator.h"
> #include "llvm/IR/Instructions.h"
> #include "llvm/IR/IntrinsicInst.h"
> @@ -69,6 +70,7 @@
> struct GlobalOpt : public ModulePass {
> void getAnalysisUsage(AnalysisUsage &AU) const override {
> AU.addRequired<TargetLibraryInfoWrapperPass>();
> + AU.addRequired<DominatorTreeWrapperPass>();
> }
> static char ID; // Pass identification, replacement for typeid
> GlobalOpt() : ModulePass(ID) {
> @@ -85,7 +87,9 @@
> bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
> const GlobalStatus &GS);
> bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
> -
> +
> + bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV);
> +
> TargetLibraryInfo *TLI;
> SmallSet<const Comdat *, 8> NotDiscardableComdats;
> };
> @@ -95,6 +99,7 @@
> INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
> "Global Variable Optimizer", false, false)
> INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
> INITIALIZE_PASS_END(GlobalOpt, "globalopt",
> "Global Variable Optimizer", false, false)
>
> @@ -1727,46 +1732,91 @@
> return ProcessInternalGlobal(GV, GVI, GS);
> }
>
> +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
> + // Find all uses of GV. We expect them all to be in F, and if we can't
I think you should word this more strongly, since it's a pre-requisite
that they're all in `F`.
> + // identify any of the uses we bail out.
> + //
> + // On each of these uses, identify if the memory that GV points to is
> + // used/required/live at the start of the function. If it is not, for example
> + // if the first thing the function does is store to the GV, the GV can
> + // possibly be demoted.
> + //
> + // We don't do an exhaustive search for memory operations - simply look
> + // through bitcasts as they're quite common and benign.
> + const DataLayout &DL = GV->getParent()->getDataLayout();
> + SmallVector<Instruction *, 4> Loads, Stores;
> + for (auto *U : GV->users()) {
> + if (Operator::getOpcode(U) == Instruction::BitCast) {
This is a fair-sized block of indented code. Might be nice to deal with
it last to reduce indentation?
> + auto *OrigType = cast<PointerType>(U->getOperand(0)->getType());
> + auto *NewType = cast<PointerType>(U->getType());
> +
> + for (auto *UU : U->users()) {
> + if (isa<LoadInst>(UU)) {
> + if (DL.getTypeStoreSize(NewType->getElementType()) >
> + DL.getTypeStoreSize(OrigType->getElementType()))
> + // This load is larger than the GV; this means that it may access
> + // memory past the end of any preceding stores which renders our
> + // analysis useless. We must bail out here.
> + return false;
Why do you need this logic here, but not below (where the direct user of
the GV is a `LoadInst`)?
Could you end up with multiple bitcasts all pointing at the same
`LoadInst`?
> +
> + Loads.push_back(cast<Instruction>(UU));
> + } else if (isa<StoreInst>(UU)) {
> + if (DL.getTypeStoreSize(NewType->getElementType()) <
> + DL.getTypeStoreSize(OrigType->getElementType()))
> + // This store is smaller than the GV; this means it only partially
> + // writes memory that subsequent loads may depend on. This may
> + // cause us to think that a load doesn't depend on memory at the
> + // start of the function when it actually does. We must bail out.
> + return false;
> +
> + Stores.push_back(cast<Instruction>(UU));
> + } else {
> + return false;
> + }
> + }
> + continue;
> + }
> +
> + Instruction *I = dyn_cast<Instruction>(U);
> + if (!I)
> + return false;
> + assert(I->getParent()->getParent() == F);
Comment inside the assertion would be nice.
> +
> + if (isa<LoadInst>(I)) {
> + Loads.push_back(I);
> + } else if (isa<StoreInst>(I)) {
> + Stores.push_back(I);
> + } else {
> + return false;
> + }
> + }
> +
> + // We have identified all uses of GV into loads and stores. Now check if all
> + // of them are known not to depend on the value of the global at the function
> + // entry point. We do this by ensuring that every load is dominated by at
> + // least one store.
> + auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
> + .getDomTree();
> +
> + for (auto *L : Loads)
> + if (!std::any_of(Stores.begin(), Stores.end(),
> + [&](Instruction *S) { return DT.dominates(S, L); }))
> + return false;
> +
> + // All loads have known dependences inside F, so the global can be localized.
> + return true;
> +}
> +
> /// ProcessInternalGlobal - Analyze the specified global variable and optimize
> /// it if possible. If we make a change, return true.
> bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
> Module::global_iterator &GVI,
> const GlobalStatus &GS) {
> auto &DL = GV->getParent()->getDataLayout();
> - // If this is a first class global and has only one accessing function
> - // and this function is main (which we know is not recursive), we replace
> - // the global with a local alloca in this function.
> - //
> - // NOTE: It doesn't make sense to promote non-single-value types since we
> - // are just replacing static memory to stack memory.
> - //
> - // If the global is in different address space, don't bring it to stack.
> - if (!GS.HasMultipleAccessingFunctions &&
> - GS.AccessingFunction && !GS.HasNonInstructionUser &&
> - GV->getType()->getElementType()->isSingleValueType() &&
> - GS.AccessingFunction->getName() == "main" &&
> - GS.AccessingFunction->hasExternalLinkage() &&
> - GV->getType()->getAddressSpace() == 0) {
> - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
> - Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
> - ->getEntryBlock().begin());
> - Type *ElemTy = GV->getType()->getElementType();
> - // FIXME: Pass Global's alignment when globals have alignment
> - AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
> - GV->getName(), &FirstI);
> - if (!isa<UndefValue>(GV->getInitializer()))
> - new StoreInst(GV->getInitializer(), Alloca, &FirstI);
> -
> - GV->replaceAllUsesWith(Alloca);
> - GV->eraseFromParent();
> - ++NumLocalized;
> - return true;
> - }
> -
> // If the global is never loaded (but may be stored to), it is dead.
> // Delete it now.
> if (!GS.IsLoaded) {
> - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
> + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
This fixup should be committed separately.
>
> bool Changed;
> if (isLeakCheckerRoot(GV)) {
> @@ -1784,7 +1834,10 @@
> ++NumDeleted;
> Changed = true;
> }
> - return Changed;
> + // If we didn't change anything, fall through to maybe perform more
> + // optimizations.
> + if (Changed)
> + return true;
Should this be committed separately? It seems like an unrelated
optimization. Might be good to have a test for it, too.
>
> } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
> DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
> @@ -1852,6 +1905,37 @@
> }
> }
>
> + // If this is a first class global and has only one accessing function and
> + // this function is non-recursive, we replace the global with a local alloca
> + // in this function.
> + //
> + // NOTE: It doesn't make sense to promote non-single-value types since we
> + // are just replacing static memory to stack memory.
> + //
> + // If the global is in different address space, don't bring it to stack.
> + if (!GS.HasMultipleAccessingFunctions &&
> + GS.AccessingFunction && !GS.HasNonInstructionUser &&
> + GV->getType()->getElementType()->isSingleValueType() &&
> + GV->getType()->getAddressSpace() == 0 &&
> + !GV->isExternallyInitialized() &&
> + GS.AccessingFunction->doesNotRecurse() &&
> + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
> + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
> + Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
> + ->getEntryBlock().begin());
> + Type *ElemTy = GV->getType()->getElementType();
> + // FIXME: Pass Global's alignment when globals have alignment
> + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
> + GV->getName(), &FirstI);
> + if (!isa<UndefValue>(GV->getInitializer()))
> + new StoreInst(GV->getInitializer(), Alloca, &FirstI);
> +
> + GV->replaceAllUsesWith(Alloca);
> + GV->eraseFromParent();
> + ++NumLocalized;
> + return true;
> + }
> +
> return false;
> }
>
> @@ -3044,7 +3128,7 @@
>
> auto &DL = M.getDataLayout();
> TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
> -
> +
This whitespace only change should be committed separately (if at all...
is it adding or removing spaces?).
> bool LocalChange = true;
> while (LocalChange) {
>
More information about the llvm-commits
mailing list