[llvm] r305530 - [PartialInlining] Code Refactoring
Sean Silva via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 16 03:43:43 PDT 2017
On Thu, Jun 15, 2017 at 4:56 PM, Xinliang David Li via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: davidxl
> Date: Thu Jun 15 18:56:59 2017
> New Revision: 305530
>
> URL: http://llvm.org/viewvc/llvm-project?rev=305530&view=rev
> Log:
> [PartialInlining] Code Refactoring
>
> This is a NFC code refactoring and interface cleanup. This paves the
> way to enable outlining-only mode for the partial inliner.
>
>
>
> Modified:
> llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
>
> Modified: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/
> PartialInlining.cpp?rev=305530&r1=305529&r2=305530&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp Thu Jun 15 18:56:59
> 2017
> @@ -103,6 +103,35 @@ struct PartialInlinerImpl {
> bool run(Module &M);
> Function *unswitchFunction(Function *F);
>
> + // This class speculatively clones the the function to be partial
> inlined.
> + // At the end of partial inlining, the remaining callsites to the cloned
> + // function that are not partially inlined will be fixed up to reference
> + // the original function, and the cloned function will be erased.
> + struct FunctionCloner {
> + FunctionCloner(Function *F, FunctionOutliningInfo *OI);
> + ~FunctionCloner();
> +
> + // Prepare for function outlining: making sure there is only
> + // one incoming edge from the extracted/outlined region to
> + // the return block.
> + void NormalizeReturnBlock();
> +
> + // Do function outlining:
> + Function *DoFunctionOutlining();
>
doFunctionOutlining in LLVM naming convention.
-- Sean Silva
> +
> + Function *OrigFunc = nullptr;
> + Function *ClonedFunc = nullptr;
> + Function *OutlinedFunc = nullptr;
> + BasicBlock *OutliningCallBB = nullptr;
> + // ClonedFunc is inlined in one of its callers after function
> + // outlining.
> + bool IsFunctionInlined = false;
> + // The cost of the region to be outlined.
> + int OutlinedRegionCost = 0;
> + std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
> + std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
> + };
> +
> private:
> int NumPartialInlining = 0;
> std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
> @@ -114,27 +143,18 @@ private:
> // The result is no larger than 1 and is represented using BP.
> // (Note that the outlined region's 'head' block can only have incoming
> // edges from the guarding entry blocks).
> - BranchProbability getOutliningCallBBRelativeFreq(Function *F,
> - FunctionOutliningInfo
> *OI,
> - Function
> *DuplicateFunction,
> - BlockFrequencyInfo
> *BFI,
> - BasicBlock
> *OutliningCallBB);
> + BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner
> &Cloner);
>
> // Return true if the callee of CS should be partially inlined with
> // profit.
> - bool shouldPartialInline(CallSite CS, Function *F,
> FunctionOutliningInfo *OI,
> - BlockFrequencyInfo *CalleeBFI,
> - BasicBlock *OutliningCallBB,
> - int OutliningCallOverhead,
> + bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
> + BlockFrequency WeightedOutliningRcost,
> OptimizationRemarkEmitter &ORE);
>
> // Try to inline DuplicateFunction (cloned from F with call to
> // the OutlinedFunction into its callers. Return true
> // if there is any successful inlining.
> - bool tryPartialInline(Function *DuplicateFunction,
> - Function *F, /*orignal function */
> - FunctionOutliningInfo *OI, Function
> *OutlinedFunction,
> - BlockFrequencyInfo *CalleeBFI);
> + bool tryPartialInline(FunctionCloner &Cloner);
>
> // Compute the mapping from use site of DuplicationFunction to the
> enclosing
> // BB's profile count.
> @@ -146,7 +166,7 @@ private:
> NumPartialInlining >= MaxNumPartialInlining);
> }
>
> - CallSite getCallSite(User *U) {
> + static CallSite getCallSite(User *U) {
> CallSite CS;
> if (CallInst *CI = dyn_cast<CallInst>(U))
> CS = CallSite(CI);
> @@ -157,7 +177,7 @@ private:
> return CS;
> }
>
> - CallSite getOneCallSiteTo(Function *F) {
> + static CallSite getOneCallSiteTo(Function *F) {
> User *User = *F->user_begin();
> return getCallSite(User);
> }
> @@ -171,20 +191,15 @@ private:
>
> // Returns the costs associated with function outlining:
> // - The first value is the non-weighted runtime cost for making the
> call
> - // to the outlined function 'OutlinedFunction', including the
> addtional
> - // setup cost in the outlined function itself;
> + // to the outlined function, including the addtional setup cost in
> the
> + // outlined function itself;
> // - The second value is the estimated size of the new call sequence in
> - // basic block 'OutliningCallBB';
> - // - The third value is the estimated size of the original code from
> - // function 'F' that is extracted into the outlined function.
> - std::tuple<int, int, int>
> - computeOutliningCosts(Function *F, const FunctionOutliningInfo
> *OutliningInfo,
> - Function *OutlinedFunction,
> - BasicBlock *OutliningCallBB);
> + // basic block Cloner.OutliningCallBB;
> + std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
> // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
> // approximate both the size and runtime cost (Note that in the current
> // inline cost analysis, there is no clear distinction there either).
> - int computeBBInlineCost(BasicBlock *BB);
> + static int computeBBInlineCost(BasicBlock *BB);
>
> std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function
> *F);
>
> @@ -396,19 +411,19 @@ static bool hasProfileData(Function *F,
> return false;
> }
>
> -BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
> - Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
> - BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
> +BranchProbability
> +PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner
> &Cloner) {
>
> auto EntryFreq =
> - BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
> - auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
> + Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.
> ClonedFunc->getEntryBlock());
> + auto OutliningCallFreq =
> + Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
>
> auto OutlineRegionRelFreq =
> BranchProbability::getBranchProbability(OutliningCallFreq.
> getFrequency(),
> EntryFreq.getFrequency());
>
> - if (hasProfileData(F, OI))
> + if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
> return OutlineRegionRelFreq;
>
> // When profile data is not available, we need to be conservative in
> @@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::ge
> }
>
> bool PartialInlinerImpl::shouldPartialInline(
> - CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo
> *OI,
> - BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
> - int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
> + CallSite CS, FunctionCloner &Cloner, BlockFrequency
> WeightedOutliningRcost,
> + OptimizationRemarkEmitter &ORE) {
> +
> using namespace ore;
> if (SkipCostAnalysis)
> return true;
>
> Instruction *Call = CS.getInstruction();
> Function *Callee = CS.getCalledFunction();
> + assert(Callee == Cloner.ClonedFunc);
> +
> Function *Caller = CS.getCaller();
> auto &CalleeTTI = (*GetTTI)(*Callee);
> InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
> @@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialIn
>
> if (IC.isAlways()) {
> ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
> - << NV("Callee", F)
> + << NV("Callee", Cloner.OrigFunc)
> << " should always be fully inlined, not partially");
> return false;
> }
>
> if (IC.isNever()) {
> ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
> - << NV("Callee", F) << " not partially inlined into "
> + << NV("Callee", Cloner.OrigFunc) << " not partially inlined
> into "
> << NV("Caller", Caller)
> << " because it should never be inlined (cost=never)");
> return false;
> @@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialIn
>
> if (!IC) {
> ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
> - << NV("Callee", F) << " not partially inlined into "
> + << NV("Callee", Cloner.OrigFunc) << " not partially inlined
> into "
> << NV("Caller", Caller) << " because too costly to inline
> (cost="
> << NV("Cost", IC.getCost()) << ", threshold="
> << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
> return false;
> }
> const DataLayout &DL = Caller->getParent()->getDataLayout();
> +
> // The savings of eliminating the call:
> int NonWeightedSavings = getCallsiteCost(CS, DL);
> BlockFrequency NormWeightedSavings(NonWeightedSavings);
>
> - auto RelativeFreq =
> - getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI,
> OutliningCallBB);
> - auto NormWeightedRcost =
> - BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
> -
> // Weighted saving is smaller than weighted cost, return false
> - if (NormWeightedSavings < NormWeightedRcost) {
> + if (NormWeightedSavings < WeightedOutliningRcost) {
> ORE.emit(
> OptimizationRemarkAnalysis(DEBUG_TYPE,
> "OutliningCallcostTooHigh", Call)
> - << NV("Callee", F) << " not partially inlined into "
> + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
> << NV("Caller", Caller) << " runtime overhead (overhead="
> - << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
> + << NV("Overhead", (unsigned)WeightedOutliningRcost.
> getFrequency())
> << ", savings="
> << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
> << ")"
> << " of making the outlined call is too high");
> @@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialIn
> }
>
> ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
> "CanBePartiallyInlined", Call)
> - << NV("Callee", F) << " can be partially inlined into "
> + << NV("Callee", Cloner.OrigFunc) << " can be partially inlined
> into "
> << NV("Caller", Caller) << " with cost=" << NV("Cost",
> IC.getCost())
> << " (threshold="
> << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
> @@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineC
> return InlineCost;
> }
>
> -std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
> - Function *F, const FunctionOutliningInfo *OI, Function
> *OutlinedFunction,
> - BasicBlock *OutliningCallBB) {
> - // First compute the cost of the outlined region 'OI' in the original
> - // function 'F'.
> - // FIXME: The code extractor (outliner) can now do code sinking/hoisting
> - // to reduce outlining cost. The hoisted/sunk code currently do not
> - // incur any runtime cost so it is still OK to compare the outlined
> - // function cost with the outlined region in the original function.
> - // If this ever changes, we will need to introduce new extractor api
> - // to pass the information.
> - int OutlinedRegionCost = 0;
> - for (BasicBlock &BB : *F) {
> - if (&BB != OI->ReturnBlock &&
> - // Assuming Entry set is small -- do a linear search here:
> - std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
> - OI->Entries.end()) {
> - OutlinedRegionCost += computeBBInlineCost(&BB);
> - }
> - }
> +std::tuple<int, int>
> +PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
>
> // Now compute the cost of the call sequence to the outlined function
> // 'OutlinedFunction' in BB 'OutliningCallBB':
> - int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
> + int OutliningFuncCallCost = computeBBInlineCost(Cloner.
> OutliningCallBB);
>
> // Now compute the cost of the extracted/outlined function itself:
> int OutlinedFunctionCost = 0;
> - for (BasicBlock &BB : *OutlinedFunction) {
> + for (BasicBlock &BB : *Cloner.OutlinedFunc) {
> OutlinedFunctionCost += computeBBInlineCost(&BB);
> }
>
> - assert(OutlinedFunctionCost >= OutlinedRegionCost &&
> + assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
> "Outlined function cost should be no less than the outlined
> region");
> // The code extractor introduces a new root and exit stub blocks with
> // additional unconditional branches. Those branches will be eliminated
> // later with bb layout. The cost should be adjusted accordingly:
> OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
>
> - int OutliningRuntimeOverhead = OutliningFuncCallCost +
> - (OutlinedFunctionCost -
> OutlinedRegionCost) +
> - ExtraOutliningPenalty;
> + int OutliningRuntimeOverhead =
> + OutliningFuncCallCost +
> + (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
> + ExtraOutliningPenalty;
>
> - return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
> - OutlinedRegionCost);
> + return std::make_tuple(OutliningFuncCallCost,
> OutliningRuntimeOverhead);
> }
>
> // Create the callsite to profile count map which is
> @@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsite
> }
> }
>
> -Function *PartialInlinerImpl::unswitchFunction(Function *F) {
> -
> - if (F->hasAddressTaken())
> - return nullptr;
> -
> - // Let inliner handle it
> - if (F->hasFnAttribute(Attribute::AlwaysInline))
> - return nullptr;
> -
> - if (F->hasFnAttribute(Attribute::NoInline))
> - return nullptr;
> -
> - if (PSI->isFunctionEntryCold(F))
> - return nullptr;
> -
> - if (F->user_begin() == F->user_end())
> - return nullptr;
> -
> - std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
> -
> - if (!OI)
> - return nullptr;
> +PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
> + FunctionOutliningInfo
> *OI)
> + : OrigFunc(F) {
> + ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
>
> // Clone the function, so that we can hack away on it.
> ValueToValueMapTy VMap;
> - Function *DuplicateFunction = CloneFunction(F, VMap);
> - BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
> - BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->
> NonReturnBlock]);
> - DenseSet<BasicBlock *> NewEntries;
> + ClonedFunc = CloneFunction(F, VMap);
> +
> + ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
> + ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
> for (BasicBlock *BB : OI->Entries) {
> - NewEntries.insert(cast<BasicBlock>(VMap[BB]));
> + ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
> + }
> + for (BasicBlock *E : OI->ReturnBlockPreds) {
> + BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
> + ClonedOI->ReturnBlockPreds.push_back(NewE);
> }
> -
> // Go ahead and update all uses to the duplicate, so that we can just
> // use the inliner functionality when we're done hacking.
> - F->replaceAllUsesWith(DuplicateFunction);
> + F->replaceAllUsesWith(ClonedFunc);
> +}
> +
> +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
>
> auto getFirstPHI = [](BasicBlock *BB) {
> BasicBlock::iterator I = BB->begin();
> @@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFu
> }
> return FirstPhi;
> };
> +
> // Special hackery is needed with PHI nodes that have inputs from more
> than
> // one extracted block. For simplicity, just split the PHIs into a
> two-level
> // sequence of PHIs, some of which will go in the extracted region, and
> some
> // of which will go outside.
> - BasicBlock *PreReturn = NewReturnBlock;
> + BasicBlock *PreReturn = ClonedOI->ReturnBlock;
> // only split block when necessary:
> PHINode *FirstPhi = getFirstPHI(PreReturn);
> - unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
> + unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
> +
> + if (!FirstPhi || FirstPhi->getNumIncomingValues() <=
> NumPredsFromEntries + 1)
> + return;
> +
> auto IsTrivialPhi = [](PHINode *PN) -> Value * {
> Value *CommonValue = PN->getIncomingValue(0);
> if (all_of(PN->incoming_values(),
> @@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFu
> return nullptr;
> };
>
> - if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries
> + 1) {
> -
> - NewReturnBlock = NewReturnBlock->splitBasicBlock(
> - NewReturnBlock->getFirstNonPHI()->getIterator());
> - BasicBlock::iterator I = PreReturn->begin();
> - Instruction *Ins = &NewReturnBlock->front();
> - SmallVector<Instruction *, 4> DeadPhis;
> - while (I != PreReturn->end()) {
> - PHINode *OldPhi = dyn_cast<PHINode>(I);
> - if (!OldPhi)
> - break;
> -
> - PHINode *RetPhi =
> - PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1,
> "", Ins);
> - OldPhi->replaceAllUsesWith(RetPhi);
> - Ins = NewReturnBlock->getFirstNonPHI();
> -
> - RetPhi->addIncoming(&*I, PreReturn);
> - for (BasicBlock *E : OI->ReturnBlockPreds) {
> - BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
> - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE),
> NewE);
> - OldPhi->removeIncomingValue(NewE);
> - }
> -
> - // After incoming values splitting, the old phi may become trivial.
> - // Keeping the trivial phi can introduce definition inside the
> outline
> - // region which is live-out, causing necessary overhead (load, store
> - // arg passing etc).
> - if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
> - OldPhi->replaceAllUsesWith(OldPhiVal);
> - DeadPhis.push_back(OldPhi);
> - }
> + ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
> + ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
> + BasicBlock::iterator I = PreReturn->begin();
> + Instruction *Ins = &ClonedOI->ReturnBlock->front();
> + SmallVector<Instruction *, 4> DeadPhis;
> + while (I != PreReturn->end()) {
> + PHINode *OldPhi = dyn_cast<PHINode>(I);
> + if (!OldPhi)
> + break;
>
> - ++I;
> + PHINode *RetPhi =
> + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "",
> Ins);
> + OldPhi->replaceAllUsesWith(RetPhi);
> + Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
> +
> + RetPhi->addIncoming(&*I, PreReturn);
> + for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
> + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
> + OldPhi->removeIncomingValue(E);
> + }
> +
> + // After incoming values splitting, the old phi may become trivial.
> + // Keeping the trivial phi can introduce definition inside the outline
> + // region which is live-out, causing necessary overhead (load, store
> + // arg passing etc).
> + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
> + OldPhi->replaceAllUsesWith(OldPhiVal);
> + DeadPhis.push_back(OldPhi);
> + }
> + ++I;
> }
> -
> for (auto *DP : DeadPhis)
> DP->eraseFromParent();
>
> - for (auto E : OI->ReturnBlockPreds) {
> - BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
> - NewE->getTerminator()->replaceUsesOfWith(PreReturn,
> NewReturnBlock);
> + for (auto E : ClonedOI->ReturnBlockPreds) {
> + E->getTerminator()->replaceUsesOfWith(PreReturn,
> ClonedOI->ReturnBlock);
> }
> - }
> +}
>
> +Function *PartialInlinerImpl::FunctionCloner::DoFunctionOutlining() {
> // Returns true if the block is to be partial inlined into the caller
> // (i.e. not to be extracted to the out of line function)
> - auto ToBeInlined = [&](BasicBlock *BB) {
> - return BB == NewReturnBlock || NewEntries.count(BB);
> + auto ToBeInlined = [&, this](BasicBlock *BB) {
> + return BB == ClonedOI->ReturnBlock ||
> + (std::find(ClonedOI->Entries.begin(),
> ClonedOI->Entries.end(), BB) !=
> + ClonedOI->Entries.end());
> };
> +
> // Gather up the blocks that we're going to extract.
> std::vector<BasicBlock *> ToExtract;
> - ToExtract.push_back(NewNonReturnBlock);
> - for (BasicBlock &BB : *DuplicateFunction)
> - if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
> + ToExtract.push_back(ClonedOI->NonReturnBlock);
> + OutlinedRegionCost +=
> + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
> + for (BasicBlock &BB : *ClonedFunc)
> + if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
> ToExtract.push_back(&BB);
> + // FIXME: the code extractor may hoist/sink more code
> + // into the outlined function which may make the outlining
> + // overhead (the difference of the outlined function cost
> + // and OutliningRegionCost) look larger.
> + OutlinedRegionCost += computeBBInlineCost(&BB);
> + }
>
> // The CodeExtractor needs a dominator tree.
> DominatorTree DT;
> - DT.recalculate(*DuplicateFunction);
> + DT.recalculate(*ClonedFunc);
>
> // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
> LoopInfo LI(DT);
> - BranchProbabilityInfo BPI(*DuplicateFunction, LI);
> - BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
> + BranchProbabilityInfo BPI(*ClonedFunc, LI);
> + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
>
> // Extract the body of the if.
> - Function *OutlinedFunction =
> - CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
> - .extractCodeRegion();
> + OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
> + ClonedFuncBFI.get(), &BPI)
> + .extractCodeRegion();
> +
> + if (OutlinedFunc) {
> + OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
> + .getInstruction()
> + ->getParent();
> + assert(OutliningCallBB->getParent() == ClonedFunc);
> + }
>
> - bool AnyInline =
> - tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction,
> &BFI);
> + return OutlinedFunc;
> +}
>
> +PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
> // Ditch the duplicate, since we're done with it, and rewrite all
> remaining
> // users (function pointers, etc.) back to the original function.
> - DuplicateFunction->replaceAllUsesWith(F);
> - DuplicateFunction->eraseFromParent();
> + ClonedFunc->replaceAllUsesWith(OrigFunc);
> + ClonedFunc->eraseFromParent();
> + if (!IsFunctionInlined) {
> + // Remove the function that is speculatively created if there is no
> + // reference.
> + if (OutlinedFunc)
> + OutlinedFunc->eraseFromParent();
> + }
> +}
> +
> +Function *PartialInlinerImpl::unswitchFunction(Function *F) {
> +
> + if (F->hasAddressTaken())
> + return nullptr;
> +
> + // Let inliner handle it
> + if (F->hasFnAttribute(Attribute::AlwaysInline))
> + return nullptr;
> +
> + if (F->hasFnAttribute(Attribute::NoInline))
> + return nullptr;
> +
> + if (PSI->isFunctionEntryCold(F))
> + return nullptr;
> +
> + if (F->user_begin() == F->user_end())
> + return nullptr;
> +
> + std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
> +
> + if (!OI)
> + return nullptr;
> +
> + FunctionCloner Cloner(F, OI.get());
> + Cloner.NormalizeReturnBlock();
> + Function *OutlinedFunction = Cloner.DoFunctionOutlining();
> +
> + bool AnyInline = tryPartialInline(Cloner);
>
> if (AnyInline)
> return OutlinedFunction;
>
> - // Remove the function that is speculatively created:
> - if (OutlinedFunction)
> - OutlinedFunction->eraseFromParent();
> -
> return nullptr;
> }
>
> -bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
> - Function *F,
> - FunctionOutliningInfo *OI,
> - Function *OutlinedFunction,
> - BlockFrequencyInfo *CalleeBFI) {
> - if (OutlinedFunction == nullptr)
> - return false;
> -
> +bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
> int NonWeightedRcost;
> int SizeCost;
> - int OutlinedRegionSizeCost;
>
> - auto OutliningCallBB =
> - getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
> + if (Cloner.OutlinedFunc == nullptr)
> + return false;
> +
> + std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
>
> - std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
> - computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
> + auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
> + auto WeightedRcost = BlockFrequency(NonWeightedRcost) *
> RelativeToEntryFreq;
>
> // The call sequence to the outlined function is larger than the
> original
> // outlined region size, it does not increase the chances of inlining
> - // 'F' with outlining (The inliner usies the size increase to model the
> - // the cost of inlining a callee).
> - if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
> - OptimizationRemarkEmitter ORE(F);
> + // the function with outlining (The inliner usies the size increase to
> + // model the cost of inlining a callee).
> + if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
> + OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
> DebugLoc DLoc;
> BasicBlock *Block;
> - std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
> + std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
> ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
> "OutlineRegionTooSmall",
> DLoc, Block)
> - << ore::NV("Function", F)
> + << ore::NV("Function", Cloner.OrigFunc)
> << " not partially inlined into callers (Original Size = "
> - << ore::NV("OutlinedRegionOriginalSize",
> OutlinedRegionSizeCost)
> + << ore::NV("OutlinedRegionOriginalSize",
> Cloner.OutlinedRegionCost)
> << ", Size of call sequence to outlined function = "
> << ore::NV("NewSize", SizeCost) << ")");
> return false;
> }
>
> - assert(F->user_begin() == F->user_end() &&
> + assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
> "F's users should all be replaced!");
> - std::vector<User *> Users(DuplicateFunction->user_begin(),
> - DuplicateFunction->user_end());
> +
> + std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
> + Cloner.ClonedFunc->user_end());
>
> DenseMap<User *, uint64_t> CallSiteToProfCountMap;
> - if (F->getEntryCount())
> - computeCallsiteToProfCountMap(DuplicateFunction,
> CallSiteToProfCountMap);
> + if (Cloner.OrigFunc->getEntryCount())
> + computeCallsiteToProfCountMap(Cloner.ClonedFunc,
> CallSiteToProfCountMap);
>
> - auto CalleeEntryCount = F->getEntryCount();
> + auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
> uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
> +
> bool AnyInline = false;
> for (User *User : Users) {
> CallSite CS = getCallSite(User);
> @@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInlin
>
> OptimizationRemarkEmitter ORE(CS.getCaller());
>
> - if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
> - NonWeightedRcost, ORE))
> + if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
> continue;
>
> ORE.emit(
> OptimizationRemark(DEBUG_TYPE, "PartiallyInlined",
> CS.getInstruction())
> - << ore::NV("Callee", F) << " partially inlined into "
> + << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into
> "
> << ore::NV("Caller", CS.getCaller()));
>
> InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
> @@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInlin
> NumPartialInlined++;
> }
>
> - if (AnyInline && CalleeEntryCount)
> - F->setEntryCount(CalleeEntryCountV);
> + if (AnyInline) {
> + Cloner.IsFunctionInlined = true;
> + if (CalleeEntryCount)
> + Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
> + }
>
> return AnyInline;
> }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170616/ade1314e/attachment.html>
More information about the llvm-commits
mailing list