[llvm] r305530 - [PartialInlining] Code Refactoring

Sean Silva via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 16 03:43:43 PDT 2017


On Thu, Jun 15, 2017 at 4:56 PM, Xinliang David Li via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: davidxl
> Date: Thu Jun 15 18:56:59 2017
> New Revision: 305530
>
> URL: http://llvm.org/viewvc/llvm-project?rev=305530&view=rev
> Log:
> [PartialInlining] Code Refactoring
>
> This is a NFC code refactoring and interface cleanup. This paves the
> way to enable outlining-only mode for the partial inliner.
>
>
>
> Modified:
>     llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
>
> Modified: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/
> PartialInlining.cpp?rev=305530&r1=305529&r2=305530&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp Thu Jun 15 18:56:59
> 2017
> @@ -103,6 +103,35 @@ struct PartialInlinerImpl {
>    bool run(Module &M);
>    Function *unswitchFunction(Function *F);
>
> +  // This class speculatively clones the the function to be partial
> inlined.
> +  // At the end of partial inlining, the remaining callsites to the cloned
> +  // function that are not partially inlined will be fixed up to reference
> +  // the original function, and the cloned function will be erased.
> +  struct FunctionCloner {
> +    FunctionCloner(Function *F, FunctionOutliningInfo *OI);
> +    ~FunctionCloner();
> +
> +    // Prepare for function outlining: making sure there is only
> +    // one incoming edge from the extracted/outlined region to
> +    // the return block.
> +    void NormalizeReturnBlock();
> +
> +    // Do function outlining:
> +    Function *DoFunctionOutlining();
>

doFunctionOutlining in LLVM naming convention.

-- Sean Silva


> +
> +    Function *OrigFunc = nullptr;
> +    Function *ClonedFunc = nullptr;
> +    Function *OutlinedFunc = nullptr;
> +    BasicBlock *OutliningCallBB = nullptr;
> +    // ClonedFunc is inlined in one of its callers after function
> +    // outlining.
> +    bool IsFunctionInlined = false;
> +    // The cost of the region to be outlined.
> +    int OutlinedRegionCost = 0;
> +    std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
> +    std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
> +  };
> +
>  private:
>    int NumPartialInlining = 0;
>    std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
> @@ -114,27 +143,18 @@ private:
>    // The result is no larger than 1 and is represented using BP.
>    // (Note that the outlined region's 'head' block can only have incoming
>    // edges from the guarding entry blocks).
> -  BranchProbability getOutliningCallBBRelativeFreq(Function *F,
> -                                                   FunctionOutliningInfo
> *OI,
> -                                                   Function
> *DuplicateFunction,
> -                                                   BlockFrequencyInfo
> *BFI,
> -                                                   BasicBlock
> *OutliningCallBB);
> +  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner
> &Cloner);
>
>    // Return true if the callee of CS should be partially inlined with
>    // profit.
> -  bool shouldPartialInline(CallSite CS, Function *F,
> FunctionOutliningInfo *OI,
> -                           BlockFrequencyInfo *CalleeBFI,
> -                           BasicBlock *OutliningCallBB,
> -                           int OutliningCallOverhead,
> +  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
> +                           BlockFrequency WeightedOutliningRcost,
>                             OptimizationRemarkEmitter &ORE);
>
>    // Try to inline DuplicateFunction (cloned from F with call to
>    // the OutlinedFunction into its callers. Return true
>    // if there is any successful inlining.
> -  bool tryPartialInline(Function *DuplicateFunction,
> -                        Function *F, /*orignal function */
> -                        FunctionOutliningInfo *OI, Function
> *OutlinedFunction,
> -                        BlockFrequencyInfo *CalleeBFI);
> +  bool tryPartialInline(FunctionCloner &Cloner);
>
>    // Compute the mapping from use site of DuplicationFunction to the
> enclosing
>    // BB's profile count.
> @@ -146,7 +166,7 @@ private:
>              NumPartialInlining >= MaxNumPartialInlining);
>    }
>
> -  CallSite getCallSite(User *U) {
> +  static CallSite getCallSite(User *U) {
>      CallSite CS;
>      if (CallInst *CI = dyn_cast<CallInst>(U))
>        CS = CallSite(CI);
> @@ -157,7 +177,7 @@ private:
>      return CS;
>    }
>
> -  CallSite getOneCallSiteTo(Function *F) {
> +  static CallSite getOneCallSiteTo(Function *F) {
>      User *User = *F->user_begin();
>      return getCallSite(User);
>    }
> @@ -171,20 +191,15 @@ private:
>
>    // Returns the costs associated with function outlining:
>    // - The first value is the non-weighted runtime cost for making the
> call
> -  //   to the outlined function 'OutlinedFunction', including the
> addtional
> -  //   setup cost in the outlined function itself;
> +  //   to the outlined function, including the addtional  setup cost in
> the
> +  //    outlined function itself;
>    // - The second value is the estimated size of the new call sequence in
> -  //   basic block 'OutliningCallBB';
> -  // - The third value is the estimated size of the original code from
> -  //   function 'F' that is extracted into the outlined function.
> -  std::tuple<int, int, int>
> -  computeOutliningCosts(Function *F, const FunctionOutliningInfo
> *OutliningInfo,
> -                        Function *OutlinedFunction,
> -                        BasicBlock *OutliningCallBB);
> +  //   basic block Cloner.OutliningCallBB;
> +  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
>    // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
>    // approximate both the size and runtime cost (Note that in the current
>    // inline cost analysis, there is no clear distinction there either).
> -  int computeBBInlineCost(BasicBlock *BB);
> +  static int computeBBInlineCost(BasicBlock *BB);
>
>    std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function
> *F);
>
> @@ -396,19 +411,19 @@ static bool hasProfileData(Function *F,
>    return false;
>  }
>
> -BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
> -    Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
> -    BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
> +BranchProbability
> +PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner
> &Cloner) {
>
>    auto EntryFreq =
> -      BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
> -  auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
> +      Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.
> ClonedFunc->getEntryBlock());
> +  auto OutliningCallFreq =
> +      Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
>
>    auto OutlineRegionRelFreq =
>        BranchProbability::getBranchProbability(OutliningCallFreq.
> getFrequency(),
>                                                EntryFreq.getFrequency());
>
> -  if (hasProfileData(F, OI))
> +  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
>      return OutlineRegionRelFreq;
>
>    // When profile data is not available, we need to be conservative in
> @@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::ge
>  }
>
>  bool PartialInlinerImpl::shouldPartialInline(
> -    CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo
> *OI,
> -    BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
> -    int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
> +    CallSite CS, FunctionCloner &Cloner, BlockFrequency
> WeightedOutliningRcost,
> +    OptimizationRemarkEmitter &ORE) {
> +
>    using namespace ore;
>    if (SkipCostAnalysis)
>      return true;
>
>    Instruction *Call = CS.getInstruction();
>    Function *Callee = CS.getCalledFunction();
> +  assert(Callee == Cloner.ClonedFunc);
> +
>    Function *Caller = CS.getCaller();
>    auto &CalleeTTI = (*GetTTI)(*Callee);
>    InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
> @@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialIn
>
>    if (IC.isAlways()) {
>      ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
> -             << NV("Callee", F)
> +             << NV("Callee", Cloner.OrigFunc)
>               << " should always be fully inlined, not partially");
>      return false;
>    }
>
>    if (IC.isNever()) {
>      ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
> -             << NV("Callee", F) << " not partially inlined into "
> +             << NV("Callee", Cloner.OrigFunc) << " not partially inlined
> into "
>               << NV("Caller", Caller)
>               << " because it should never be inlined (cost=never)");
>      return false;
> @@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialIn
>
>    if (!IC) {
>      ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
> -             << NV("Callee", F) << " not partially inlined into "
> +             << NV("Callee", Cloner.OrigFunc) << " not partially inlined
> into "
>               << NV("Caller", Caller) << " because too costly to inline
> (cost="
>               << NV("Cost", IC.getCost()) << ", threshold="
>               << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
>      return false;
>    }
>    const DataLayout &DL = Caller->getParent()->getDataLayout();
> +
>    // The savings of eliminating the call:
>    int NonWeightedSavings = getCallsiteCost(CS, DL);
>    BlockFrequency NormWeightedSavings(NonWeightedSavings);
>
> -  auto RelativeFreq =
> -      getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI,
> OutliningCallBB);
> -  auto NormWeightedRcost =
> -      BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
> -
>    // Weighted saving is smaller than weighted cost, return false
> -  if (NormWeightedSavings < NormWeightedRcost) {
> +  if (NormWeightedSavings < WeightedOutliningRcost) {
>      ORE.emit(
>          OptimizationRemarkAnalysis(DEBUG_TYPE,
> "OutliningCallcostTooHigh", Call)
> -        << NV("Callee", F) << " not partially inlined into "
> +        << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
>          << NV("Caller", Caller) << " runtime overhead (overhead="
> -        << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
> +        << NV("Overhead", (unsigned)WeightedOutliningRcost.
> getFrequency())
>          << ", savings="
>          << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
> << ")"
>          << " of making the outlined call is too high");
> @@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialIn
>    }
>
>    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
> "CanBePartiallyInlined", Call)
> -           << NV("Callee", F) << " can be partially inlined into "
> +           << NV("Callee", Cloner.OrigFunc) << " can be partially inlined
> into "
>             << NV("Caller", Caller) << " with cost=" << NV("Cost",
> IC.getCost())
>             << " (threshold="
>             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
> @@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineC
>    return InlineCost;
>  }
>
> -std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
> -    Function *F, const FunctionOutliningInfo *OI, Function
> *OutlinedFunction,
> -    BasicBlock *OutliningCallBB) {
> -  // First compute the cost of the outlined region 'OI' in the original
> -  // function 'F'.
> -  // FIXME: The code extractor (outliner) can now do code sinking/hoisting
> -  // to reduce outlining cost. The hoisted/sunk code currently do not
> -  // incur any runtime cost so it is still OK to compare the outlined
> -  // function cost with the outlined region in the original function.
> -  // If this ever changes, we will need to introduce new extractor api
> -  // to pass the information.
> -  int OutlinedRegionCost = 0;
> -  for (BasicBlock &BB : *F) {
> -    if (&BB != OI->ReturnBlock &&
> -        // Assuming Entry set is small -- do a linear search here:
> -        std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
> -            OI->Entries.end()) {
> -      OutlinedRegionCost += computeBBInlineCost(&BB);
> -    }
> -  }
> +std::tuple<int, int>
> +PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
>
>    // Now compute the cost of the call sequence to the outlined function
>    // 'OutlinedFunction' in BB 'OutliningCallBB':
> -  int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
> +  int OutliningFuncCallCost = computeBBInlineCost(Cloner.
> OutliningCallBB);
>
>    // Now compute the cost of the extracted/outlined function itself:
>    int OutlinedFunctionCost = 0;
> -  for (BasicBlock &BB : *OutlinedFunction) {
> +  for (BasicBlock &BB : *Cloner.OutlinedFunc) {
>      OutlinedFunctionCost += computeBBInlineCost(&BB);
>    }
>
> -  assert(OutlinedFunctionCost >= OutlinedRegionCost &&
> +  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
>           "Outlined function cost should be no less than the outlined
> region");
>    // The code extractor introduces a new root and exit stub blocks with
>    // additional unconditional branches. Those branches will be eliminated
>    // later with bb layout. The cost should be adjusted accordingly:
>    OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
>
> -  int OutliningRuntimeOverhead = OutliningFuncCallCost +
> -                                 (OutlinedFunctionCost -
> OutlinedRegionCost) +
> -                                 ExtraOutliningPenalty;
> +  int OutliningRuntimeOverhead =
> +      OutliningFuncCallCost +
> +      (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
> +      ExtraOutliningPenalty;
>
> -  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
> -                         OutlinedRegionCost);
> +  return std::make_tuple(OutliningFuncCallCost,
> OutliningRuntimeOverhead);
>  }
>
>  // Create the callsite to profile count map which is
> @@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsite
>    }
>  }
>
> -Function *PartialInlinerImpl::unswitchFunction(Function *F) {
> -
> -  if (F->hasAddressTaken())
> -    return nullptr;
> -
> -  // Let inliner handle it
> -  if (F->hasFnAttribute(Attribute::AlwaysInline))
> -    return nullptr;
> -
> -  if (F->hasFnAttribute(Attribute::NoInline))
> -    return nullptr;
> -
> -  if (PSI->isFunctionEntryCold(F))
> -    return nullptr;
> -
> -  if (F->user_begin() == F->user_end())
> -    return nullptr;
> -
> -  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
> -
> -  if (!OI)
> -    return nullptr;
> +PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
> +                                                   FunctionOutliningInfo
> *OI)
> +    : OrigFunc(F) {
> +  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
>
>    // Clone the function, so that we can hack away on it.
>    ValueToValueMapTy VMap;
> -  Function *DuplicateFunction = CloneFunction(F, VMap);
> -  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
> -  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->
> NonReturnBlock]);
> -  DenseSet<BasicBlock *> NewEntries;
> +  ClonedFunc = CloneFunction(F, VMap);
> +
> +  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
> +  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
>    for (BasicBlock *BB : OI->Entries) {
> -    NewEntries.insert(cast<BasicBlock>(VMap[BB]));
> +    ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
> +  }
> +  for (BasicBlock *E : OI->ReturnBlockPreds) {
> +    BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
> +    ClonedOI->ReturnBlockPreds.push_back(NewE);
>    }
> -
>    // Go ahead and update all uses to the duplicate, so that we can just
>    // use the inliner functionality when we're done hacking.
> -  F->replaceAllUsesWith(DuplicateFunction);
> +  F->replaceAllUsesWith(ClonedFunc);
> +}
> +
> +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
>
>    auto getFirstPHI = [](BasicBlock *BB) {
>      BasicBlock::iterator I = BB->begin();
> @@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFu
>      }
>      return FirstPhi;
>    };
> +
>    // Special hackery is needed with PHI nodes that have inputs from more
> than
>    // one extracted block.  For simplicity, just split the PHIs into a
> two-level
>    // sequence of PHIs, some of which will go in the extracted region, and
> some
>    // of which will go outside.
> -  BasicBlock *PreReturn = NewReturnBlock;
> +  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
>    // only split block when necessary:
>    PHINode *FirstPhi = getFirstPHI(PreReturn);
> -  unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
> +  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
> +
> +  if (!FirstPhi || FirstPhi->getNumIncomingValues() <=
> NumPredsFromEntries + 1)
> +    return;
> +
>    auto IsTrivialPhi = [](PHINode *PN) -> Value * {
>      Value *CommonValue = PN->getIncomingValue(0);
>      if (all_of(PN->incoming_values(),
> @@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFu
>      return nullptr;
>    };
>
> -  if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries
> + 1) {
> -
> -    NewReturnBlock = NewReturnBlock->splitBasicBlock(
> -        NewReturnBlock->getFirstNonPHI()->getIterator());
> -    BasicBlock::iterator I = PreReturn->begin();
> -    Instruction *Ins = &NewReturnBlock->front();
> -    SmallVector<Instruction *, 4> DeadPhis;
> -    while (I != PreReturn->end()) {
> -      PHINode *OldPhi = dyn_cast<PHINode>(I);
> -      if (!OldPhi)
> -        break;
> -
> -      PHINode *RetPhi =
> -          PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1,
> "", Ins);
> -      OldPhi->replaceAllUsesWith(RetPhi);
> -      Ins = NewReturnBlock->getFirstNonPHI();
> -
> -      RetPhi->addIncoming(&*I, PreReturn);
> -      for (BasicBlock *E : OI->ReturnBlockPreds) {
> -        BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
> -        RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE),
> NewE);
> -        OldPhi->removeIncomingValue(NewE);
> -      }
> -
> -      // After incoming values splitting, the old phi may become trivial.
> -      // Keeping the trivial phi can introduce definition inside the
> outline
> -      // region which is live-out, causing necessary overhead (load, store
> -      // arg passing etc).
> -      if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
> -        OldPhi->replaceAllUsesWith(OldPhiVal);
> -        DeadPhis.push_back(OldPhi);
> -      }
> +  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
> +      ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
> +  BasicBlock::iterator I = PreReturn->begin();
> +  Instruction *Ins = &ClonedOI->ReturnBlock->front();
> +  SmallVector<Instruction *, 4> DeadPhis;
> +  while (I != PreReturn->end()) {
> +    PHINode *OldPhi = dyn_cast<PHINode>(I);
> +    if (!OldPhi)
> +      break;
>
> -      ++I;
> +    PHINode *RetPhi =
> +        PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "",
> Ins);
> +    OldPhi->replaceAllUsesWith(RetPhi);
> +    Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
> +
> +    RetPhi->addIncoming(&*I, PreReturn);
> +    for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
> +      RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
> +      OldPhi->removeIncomingValue(E);
> +    }
> +
> +    // After incoming values splitting, the old phi may become trivial.
> +    // Keeping the trivial phi can introduce definition inside the outline
> +    // region which is live-out, causing necessary overhead (load, store
> +    // arg passing etc).
> +    if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
> +      OldPhi->replaceAllUsesWith(OldPhiVal);
> +      DeadPhis.push_back(OldPhi);
> +    }
> +    ++I;
>      }
> -
>      for (auto *DP : DeadPhis)
>        DP->eraseFromParent();
>
> -    for (auto E : OI->ReturnBlockPreds) {
> -      BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
> -      NewE->getTerminator()->replaceUsesOfWith(PreReturn,
> NewReturnBlock);
> +    for (auto E : ClonedOI->ReturnBlockPreds) {
> +      E->getTerminator()->replaceUsesOfWith(PreReturn,
> ClonedOI->ReturnBlock);
>      }
> -  }
> +}
>
> +Function *PartialInlinerImpl::FunctionCloner::DoFunctionOutlining() {
>    // Returns true if the block is to be partial inlined into the caller
>    // (i.e. not to be extracted to the out of line function)
> -  auto ToBeInlined = [&](BasicBlock *BB) {
> -    return BB == NewReturnBlock || NewEntries.count(BB);
> +  auto ToBeInlined = [&, this](BasicBlock *BB) {
> +    return BB == ClonedOI->ReturnBlock ||
> +           (std::find(ClonedOI->Entries.begin(),
> ClonedOI->Entries.end(), BB) !=
> +            ClonedOI->Entries.end());
>    };
> +
>    // Gather up the blocks that we're going to extract.
>    std::vector<BasicBlock *> ToExtract;
> -  ToExtract.push_back(NewNonReturnBlock);
> -  for (BasicBlock &BB : *DuplicateFunction)
> -    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
> +  ToExtract.push_back(ClonedOI->NonReturnBlock);
> +  OutlinedRegionCost +=
> +      PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
> +  for (BasicBlock &BB : *ClonedFunc)
> +    if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
>        ToExtract.push_back(&BB);
> +      // FIXME: the code extractor may hoist/sink more code
> +      // into the outlined function which may make the outlining
> +      // overhead (the difference of the outlined function cost
> +      // and OutliningRegionCost) look larger.
> +      OutlinedRegionCost += computeBBInlineCost(&BB);
> +    }
>
>    // The CodeExtractor needs a dominator tree.
>    DominatorTree DT;
> -  DT.recalculate(*DuplicateFunction);
> +  DT.recalculate(*ClonedFunc);
>
>    // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
>    LoopInfo LI(DT);
> -  BranchProbabilityInfo BPI(*DuplicateFunction, LI);
> -  BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
> +  BranchProbabilityInfo BPI(*ClonedFunc, LI);
> +  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
>
>    // Extract the body of the if.
> -  Function *OutlinedFunction =
> -      CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
> -          .extractCodeRegion();
> +  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
> +                               ClonedFuncBFI.get(), &BPI)
> +                     .extractCodeRegion();
> +
> +  if (OutlinedFunc) {
> +    OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
> +        .getInstruction()
> +        ->getParent();
> +    assert(OutliningCallBB->getParent() == ClonedFunc);
> +  }
>
> -  bool AnyInline =
> -      tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction,
> &BFI);
> +  return OutlinedFunc;
> +}
>
> +PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
>    // Ditch the duplicate, since we're done with it, and rewrite all
> remaining
>    // users (function pointers, etc.) back to the original function.
> -  DuplicateFunction->replaceAllUsesWith(F);
> -  DuplicateFunction->eraseFromParent();
> +  ClonedFunc->replaceAllUsesWith(OrigFunc);
> +  ClonedFunc->eraseFromParent();
> +  if (!IsFunctionInlined) {
> +    // Remove the function that is speculatively created if there is no
> +    // reference.
> +    if (OutlinedFunc)
> +      OutlinedFunc->eraseFromParent();
> +  }
> +}
> +
> +Function *PartialInlinerImpl::unswitchFunction(Function *F) {
> +
> +  if (F->hasAddressTaken())
> +    return nullptr;
> +
> +  // Let inliner handle it
> +  if (F->hasFnAttribute(Attribute::AlwaysInline))
> +    return nullptr;
> +
> +  if (F->hasFnAttribute(Attribute::NoInline))
> +    return nullptr;
> +
> +  if (PSI->isFunctionEntryCold(F))
> +    return nullptr;
> +
> +  if (F->user_begin() == F->user_end())
> +    return nullptr;
> +
> +  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
> +
> +  if (!OI)
> +    return nullptr;
> +
> +  FunctionCloner Cloner(F, OI.get());
> +  Cloner.NormalizeReturnBlock();
> +  Function *OutlinedFunction = Cloner.DoFunctionOutlining();
> +
> +  bool AnyInline = tryPartialInline(Cloner);
>
>    if (AnyInline)
>      return OutlinedFunction;
>
> -  // Remove the function that is speculatively created:
> -  if (OutlinedFunction)
> -    OutlinedFunction->eraseFromParent();
> -
>    return nullptr;
>  }
>
> -bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
> -                                          Function *F,
> -                                          FunctionOutliningInfo *OI,
> -                                          Function *OutlinedFunction,
> -                                          BlockFrequencyInfo *CalleeBFI) {
> -  if (OutlinedFunction == nullptr)
> -    return false;
> -
> +bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
>    int NonWeightedRcost;
>    int SizeCost;
> -  int OutlinedRegionSizeCost;
>
> -  auto OutliningCallBB =
> -      getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
> +  if (Cloner.OutlinedFunc == nullptr)
> +    return false;
> +
> +  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
>
> -  std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
> -      computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
> +  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
> +  auto WeightedRcost = BlockFrequency(NonWeightedRcost) *
> RelativeToEntryFreq;
>
>    // The call sequence to the outlined function is larger than the
> original
>    // outlined region size, it does not increase the chances of inlining
> -  // 'F' with outlining (The inliner usies the size increase to model the
> -  // the cost of inlining a callee).
> -  if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
> -    OptimizationRemarkEmitter ORE(F);
> +  // the function with outlining (The inliner usies the size increase to
> +  // model the cost of inlining a callee).
> +  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
> +    OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
>      DebugLoc DLoc;
>      BasicBlock *Block;
> -    std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
> +    std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
>      ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
> "OutlineRegionTooSmall",
>                                          DLoc, Block)
> -             << ore::NV("Function", F)
> +             << ore::NV("Function", Cloner.OrigFunc)
>               << " not partially inlined into callers (Original Size = "
> -             << ore::NV("OutlinedRegionOriginalSize",
> OutlinedRegionSizeCost)
> +             << ore::NV("OutlinedRegionOriginalSize",
> Cloner.OutlinedRegionCost)
>               << ", Size of call sequence to outlined function = "
>               << ore::NV("NewSize", SizeCost) << ")");
>      return false;
>    }
>
> -  assert(F->user_begin() == F->user_end() &&
> +  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
>           "F's users should all be replaced!");
> -  std::vector<User *> Users(DuplicateFunction->user_begin(),
> -                            DuplicateFunction->user_end());
> +
> +  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
> +                            Cloner.ClonedFunc->user_end());
>
>    DenseMap<User *, uint64_t> CallSiteToProfCountMap;
> -  if (F->getEntryCount())
> -    computeCallsiteToProfCountMap(DuplicateFunction,
> CallSiteToProfCountMap);
> +  if (Cloner.OrigFunc->getEntryCount())
> +    computeCallsiteToProfCountMap(Cloner.ClonedFunc,
> CallSiteToProfCountMap);
>
> -  auto CalleeEntryCount = F->getEntryCount();
> +  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
>    uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
> +
>    bool AnyInline = false;
>    for (User *User : Users) {
>      CallSite CS = getCallSite(User);
> @@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInlin
>
>      OptimizationRemarkEmitter ORE(CS.getCaller());
>
> -    if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
> -                             NonWeightedRcost, ORE))
> +    if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
>        continue;
>
>      ORE.emit(
>          OptimizationRemark(DEBUG_TYPE, "PartiallyInlined",
> CS.getInstruction())
> -        << ore::NV("Callee", F) << " partially inlined into "
> +        << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into
> "
>          << ore::NV("Caller", CS.getCaller()));
>
>      InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
> @@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInlin
>      NumPartialInlined++;
>    }
>
> -  if (AnyInline && CalleeEntryCount)
> -    F->setEntryCount(CalleeEntryCountV);
> +  if (AnyInline) {
> +    Cloner.IsFunctionInlined = true;
> +    if (CalleeEntryCount)
> +      Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
> +  }
>
>    return AnyInline;
>  }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170616/ade1314e/attachment.html>


More information about the llvm-commits mailing list