[PATCH] D14148: [GlobalOpt] Demote globals to locals more aggressively

Duncan P. N. Exon Smith via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 12 09:39:22 PST 2015


> On 2015-Nov-12, at 08:22, James Molloy <james.molloy at arm.com> wrote:
> 
> jmolloy updated this revision to Diff 40057.
> jmolloy added a comment.
> 
> Hi Manman, Mehdi,
> 
> Updated patch attached.
> 
> - Now, Clang knows how to add norecurse to "main", so we can remove that logic (hooray!)
> - Removed all the norecurse logic, replaced with a query to F->doesNotRecurse()
> 
> Please take a look.
> 
> Cheers,
> 
> James
> 
> 
> Repository:
>  rL LLVM
> 
> http://reviews.llvm.org/D14148
> 
> Files:
>  lib/Transforms/IPO/GlobalOpt.cpp
>  test/Transforms/GlobalOpt/global-demotion.ll
>  test/Transforms/GlobalOpt/metadata.ll
>  test/Transforms/MergeFunc/crash2.ll
> 
> <D14148.40057.patch>

> Index: test/Transforms/GlobalOpt/metadata.ll
> ===================================================================
> --- test/Transforms/GlobalOpt/metadata.ll
> +++ test/Transforms/GlobalOpt/metadata.ll
> @@ -5,10 +5,11 @@
>  ; to that containing %G should likewise drop to null.
>  @G = internal global i8** null
>  
> -define i32 @main(i32 %argc, i8** %argv) {
> +define i32 @main(i32 %argc, i8** %argv) norecurse {
>  ; CHECK-LABEL: @main(
>  ; CHECK: %G = alloca
>    store i8** %argv, i8*** @G
> +  %1 = load i8**, i8*** @G

What's this new `load` needed for?

>    ret i32 0
>  }
>  
> Index: lib/Transforms/IPO/GlobalOpt.cpp
> ===================================================================
> --- lib/Transforms/IPO/GlobalOpt.cpp
> +++ lib/Transforms/IPO/GlobalOpt.cpp
> @@ -28,6 +28,7 @@
>  #include "llvm/IR/Constants.h"
>  #include "llvm/IR/DataLayout.h"
>  #include "llvm/IR/DerivedTypes.h"
> +#include "llvm/IR/Dominators.h"
>  #include "llvm/IR/GetElementPtrTypeIterator.h"
>  #include "llvm/IR/Instructions.h"
>  #include "llvm/IR/IntrinsicInst.h"
> @@ -69,6 +70,7 @@
>    struct GlobalOpt : public ModulePass {
>      void getAnalysisUsage(AnalysisUsage &AU) const override {
>        AU.addRequired<TargetLibraryInfoWrapperPass>();
> +      AU.addRequired<DominatorTreeWrapperPass>();
>      }
>      static char ID; // Pass identification, replacement for typeid
>      GlobalOpt() : ModulePass(ID) {
> @@ -85,7 +87,9 @@
>      bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
>                                 const GlobalStatus &GS);
>      bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
> -
> +    
> +    bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV);
> +    
>      TargetLibraryInfo *TLI;
>      SmallSet<const Comdat *, 8> NotDiscardableComdats;
>    };
> @@ -95,6 +99,7 @@
>  INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
>                  "Global Variable Optimizer", false, false)
>  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
> +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
>  INITIALIZE_PASS_END(GlobalOpt, "globalopt",
>                  "Global Variable Optimizer", false, false)
>  
> @@ -1727,46 +1732,91 @@
>    return ProcessInternalGlobal(GV, GVI, GS);
>  }
>  
> +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
> +  // Find all uses of GV. We expect them all to be in F, and if we can't

I think you should word this more strongly, since it's a pre-requisite
that they're all in `F`.

> +  // identify any of the uses we bail out.
> +  //
> +  // On each of these uses, identify if the memory that GV points to is
> +  // used/required/live at the start of the function. If it is not, for example
> +  // if the first thing the function does is store to the GV, the GV can
> +  // possibly be demoted.
> +  //
> +  // We don't do an exhaustive search for memory operations - simply look
> +  // through bitcasts as they're quite common and benign.
> +  const DataLayout &DL = GV->getParent()->getDataLayout();
> +  SmallVector<Instruction *, 4> Loads, Stores;
> +  for (auto *U : GV->users()) {
> +    if (Operator::getOpcode(U) == Instruction::BitCast) {

This is a fair-sized block of indented code.  Might be nice to deal with
it last to reduce indentation?

> +      auto *OrigType = cast<PointerType>(U->getOperand(0)->getType());
> +      auto *NewType = cast<PointerType>(U->getType());
> +
> +      for (auto *UU : U->users()) {
> +        if (isa<LoadInst>(UU)) {
> +          if (DL.getTypeStoreSize(NewType->getElementType()) >
> +              DL.getTypeStoreSize(OrigType->getElementType()))
> +            // This load is larger than the GV; this means that it may access
> +            // memory past the end of any preceding stores which renders our
> +            // analysis useless. We must bail out here.
> +            return false;

Why do you need this logic here, but not below (where the direct user of
the GV is a `LoadInst`)?

Could you end up with multiple bitcasts all pointing at the same
`LoadInst`?

> +
> +          Loads.push_back(cast<Instruction>(UU));
> +        } else if (isa<StoreInst>(UU)) {
> +          if (DL.getTypeStoreSize(NewType->getElementType()) <
> +              DL.getTypeStoreSize(OrigType->getElementType()))
> +            // This store is smaller than the GV; this means it only partially
> +            // writes memory that subsequent loads may depend on. This may
> +            // cause us to think that a load doesn't depend on memory at the
> +            // start of the function when it actually does. We must bail out.
> +            return false;
> +
> +          Stores.push_back(cast<Instruction>(UU));
> +        } else {
> +          return false;
> +        }
> +      }
> +      continue;
> +    }
> +
> +    Instruction *I = dyn_cast<Instruction>(U);
> +    if (!I)
> +      return false;
> +    assert(I->getParent()->getParent() == F);

Comment inside the assertion would be nice.

> +
> +    if (isa<LoadInst>(I)) {
> +      Loads.push_back(I);
> +    } else if (isa<StoreInst>(I)) {
> +      Stores.push_back(I);
> +    } else {
> +      return false;
> +    }
> +  }
> +
> +  // We have identified all uses of GV into loads and stores. Now check if all
> +  // of them are known not to depend on the value of the global at the function
> +  // entry point. We do this by ensuring that every load is dominated by at
> +  // least one store.
> +  auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
> +                 .getDomTree();
> +
> +  for (auto *L : Loads)
> +    if (!std::any_of(Stores.begin(), Stores.end(),
> +                     [&](Instruction *S) { return DT.dominates(S, L); }))
> +      return false;
> +
> +  // All loads have known dependences inside F, so the global can be localized.
> +  return true;
> +}
> +
>  /// ProcessInternalGlobal - Analyze the specified global variable and optimize
>  /// it if possible.  If we make a change, return true.
>  bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
>                                        Module::global_iterator &GVI,
>                                        const GlobalStatus &GS) {
>    auto &DL = GV->getParent()->getDataLayout();
> -  // If this is a first class global and has only one accessing function
> -  // and this function is main (which we know is not recursive), we replace
> -  // the global with a local alloca in this function.
> -  //
> -  // NOTE: It doesn't make sense to promote non-single-value types since we
> -  // are just replacing static memory to stack memory.
> -  //
> -  // If the global is in different address space, don't bring it to stack.
> -  if (!GS.HasMultipleAccessingFunctions &&
> -      GS.AccessingFunction && !GS.HasNonInstructionUser &&
> -      GV->getType()->getElementType()->isSingleValueType() &&
> -      GS.AccessingFunction->getName() == "main" &&
> -      GS.AccessingFunction->hasExternalLinkage() &&
> -      GV->getType()->getAddressSpace() == 0) {
> -    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
> -    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
> -                                                   ->getEntryBlock().begin());
> -    Type *ElemTy = GV->getType()->getElementType();
> -    // FIXME: Pass Global's alignment when globals have alignment
> -    AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
> -                                        GV->getName(), &FirstI);
> -    if (!isa<UndefValue>(GV->getInitializer()))
> -      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
> -
> -    GV->replaceAllUsesWith(Alloca);
> -    GV->eraseFromParent();
> -    ++NumLocalized;
> -    return true;
> -  }
> -
>    // If the global is never loaded (but may be stored to), it is dead.
>    // Delete it now.
>    if (!GS.IsLoaded) {
> -    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
> +    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");

This fixup should be committed separately.

>  
>      bool Changed;
>      if (isLeakCheckerRoot(GV)) {
> @@ -1784,7 +1834,10 @@
>        ++NumDeleted;
>        Changed = true;
>      }
> -    return Changed;
> +    // If we didn't change anything, fall through to maybe perform more
> +    // optimizations.
> +    if (Changed)
> +      return true;

Should this be committed separately?  It seems like an unrelated
optimization.  Might be good to have a test for it, too.

>  
>    } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
>      DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
> @@ -1852,6 +1905,37 @@
>      }
>    }
>  
> +  // If this is a first class global and has only one accessing function and
> +  // this function is non-recursive, we replace the global with a local alloca
> +  // in this function.
> +  //
> +  // NOTE: It doesn't make sense to promote non-single-value types since we
> +  // are just replacing static memory to stack memory.
> +  //
> +  // If the global is in different address space, don't bring it to stack.
> +  if (!GS.HasMultipleAccessingFunctions &&
> +      GS.AccessingFunction && !GS.HasNonInstructionUser &&
> +      GV->getType()->getElementType()->isSingleValueType() &&
> +      GV->getType()->getAddressSpace() == 0 &&
> +      !GV->isExternallyInitialized() &&
> +      GS.AccessingFunction->doesNotRecurse() &&
> +      isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
> +    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
> +    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
> +                                                   ->getEntryBlock().begin());
> +    Type *ElemTy = GV->getType()->getElementType();
> +    // FIXME: Pass Global's alignment when globals have alignment
> +    AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
> +                                        GV->getName(), &FirstI);
> +    if (!isa<UndefValue>(GV->getInitializer()))
> +      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
> +
> +    GV->replaceAllUsesWith(Alloca);
> +    GV->eraseFromParent();
> +    ++NumLocalized;
> +    return true;
> +  }
> +
>    return false;
>  }
>  
> @@ -3044,7 +3128,7 @@
>  
>    auto &DL = M.getDataLayout();
>    TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
> -
> +  

This whitespace only change should be committed separately (if at all...
is it adding or removing spaces?).

>    bool LocalChange = true;
>    while (LocalChange) {
> 



More information about the llvm-commits mailing list