<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jun 15, 2017 at 4:56 PM, Xinliang David Li via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: davidxl<br>
Date: Thu Jun 15 18:56:59 2017<br>
New Revision: 305530<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=305530&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=305530&view=rev</a><br>
Log:<br>
[PartialInlining] Code Refactoring<br>
<br>
This is a NFC code refactoring and interface cleanup. This paves the<br>
way to enable outlining-only mode for the partial inliner.<br>
<br>
<br>
<br>
Modified:<br>
llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=305530&r1=305529&r2=305530&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/IPO/<wbr>PartialInlining.cpp?rev=<wbr>305530&r1=305529&r2=305530&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp Thu Jun 15 18:56:59 2017<br>
@@ -103,6 +103,35 @@ struct PartialInlinerImpl {<br>
bool run(Module &M);<br>
Function *unswitchFunction(Function *F);<br>
<br>
+ // This class speculatively clones the the function to be partial inlined.<br>
+ // At the end of partial inlining, the remaining callsites to the cloned<br>
+ // function that are not partially inlined will be fixed up to reference<br>
+ // the original function, and the cloned function will be erased.<br>
+ struct FunctionCloner {<br>
+ FunctionCloner(Function *F, FunctionOutliningInfo *OI);<br>
+ ~FunctionCloner();<br>
+<br>
+ // Prepare for function outlining: making sure there is only<br>
+ // one incoming edge from the extracted/outlined region to<br>
+ // the return block.<br>
+ void NormalizeReturnBlock();<br>
+<br>
+ // Do function outlining:<br>
+ Function *DoFunctionOutlining();<br></blockquote><div><br></div><div>doFunctionOutlining in LLVM naming convention.</div><div><br></div><div>-- Sean Silva</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ Function *OrigFunc = nullptr;<br>
+ Function *ClonedFunc = nullptr;<br>
+ Function *OutlinedFunc = nullptr;<br>
+ BasicBlock *OutliningCallBB = nullptr;<br>
+ // ClonedFunc is inlined in one of its callers after function<br>
+ // outlining.<br>
+ bool IsFunctionInlined = false;<br>
+ // The cost of the region to be outlined.<br>
+ int OutlinedRegionCost = 0;<br>
+ std::unique_ptr<<wbr>FunctionOutliningInfo> ClonedOI = nullptr;<br>
+ std::unique_ptr<<wbr>BlockFrequencyInfo> ClonedFuncBFI = nullptr;<br>
+ };<br>
+<br>
private:<br>
int NumPartialInlining = 0;<br>
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;<br>
@@ -114,27 +143,18 @@ private:<br>
// The result is no larger than 1 and is represented using BP.<br>
// (Note that the outlined region's 'head' block can only have incoming<br>
// edges from the guarding entry blocks).<br>
- BranchProbability getOutliningCallBBRelativeFreq<wbr>(Function *F,<br>
- FunctionOutliningInfo *OI,<br>
- Function *DuplicateFunction,<br>
- BlockFrequencyInfo *BFI,<br>
- BasicBlock *OutliningCallBB);<br>
+ BranchProbability getOutliningCallBBRelativeFreq<wbr>(FunctionCloner &Cloner);<br>
<br>
// Return true if the callee of CS should be partially inlined with<br>
// profit.<br>
- bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,<br>
- BlockFrequencyInfo *CalleeBFI,<br>
- BasicBlock *OutliningCallBB,<br>
- int OutliningCallOverhead,<br>
+ bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,<br>
+ BlockFrequency WeightedOutliningRcost,<br>
OptimizationRemarkEmitter &ORE);<br>
<br>
// Try to inline DuplicateFunction (cloned from F with call to<br>
// the OutlinedFunction into its callers. Return true<br>
// if there is any successful inlining.<br>
- bool tryPartialInline(Function *DuplicateFunction,<br>
- Function *F, /*orignal function */<br>
- FunctionOutliningInfo *OI, Function *OutlinedFunction,<br>
- BlockFrequencyInfo *CalleeBFI);<br>
+ bool tryPartialInline(<wbr>FunctionCloner &Cloner);<br>
<br>
// Compute the mapping from use site of DuplicationFunction to the enclosing<br>
// BB's profile count.<br>
@@ -146,7 +166,7 @@ private:<br>
NumPartialInlining >= MaxNumPartialInlining);<br>
}<br>
<br>
- CallSite getCallSite(User *U) {<br>
+ static CallSite getCallSite(User *U) {<br>
CallSite CS;<br>
if (CallInst *CI = dyn_cast<CallInst>(U))<br>
CS = CallSite(CI);<br>
@@ -157,7 +177,7 @@ private:<br>
return CS;<br>
}<br>
<br>
- CallSite getOneCallSiteTo(Function *F) {<br>
+ static CallSite getOneCallSiteTo(Function *F) {<br>
User *User = *F->user_begin();<br>
return getCallSite(User);<br>
}<br>
@@ -171,20 +191,15 @@ private:<br>
<br>
// Returns the costs associated with function outlining:<br>
// - The first value is the non-weighted runtime cost for making the call<br>
- // to the outlined function 'OutlinedFunction', including the addtional<br>
- // setup cost in the outlined function itself;<br>
+ // to the outlined function, including the addtional setup cost in the<br>
+ // outlined function itself;<br>
// - The second value is the estimated size of the new call sequence in<br>
- // basic block 'OutliningCallBB';<br>
- // - The third value is the estimated size of the original code from<br>
- // function 'F' that is extracted into the outlined function.<br>
- std::tuple<int, int, int><br>
- computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,<br>
- Function *OutlinedFunction,<br>
- BasicBlock *OutliningCallBB);<br>
+ // basic block Cloner.OutliningCallBB;<br>
+ std::tuple<int, int> computeOutliningCosts(<wbr>FunctionCloner &Cloner);<br>
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to<br>
// approximate both the size and runtime cost (Note that in the current<br>
// inline cost analysis, there is no clear distinction there either).<br>
- int computeBBInlineCost(BasicBlock *BB);<br>
+ static int computeBBInlineCost(BasicBlock *BB);<br>
<br>
std::unique_ptr<<wbr>FunctionOutliningInfo> computeOutliningInfo(Function *F);<br>
<br>
@@ -396,19 +411,19 @@ static bool hasProfileData(Function *F,<br>
return false;<br>
}<br>
<br>
-BranchProbability PartialInlinerImpl::<wbr>getOutliningCallBBRelativeFreq<wbr>(<br>
- Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,<br>
- BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {<br>
+BranchProbability<br>
+PartialInlinerImpl::<wbr>getOutliningCallBBRelativeFreq<wbr>(FunctionCloner &Cloner) {<br>
<br>
auto EntryFreq =<br>
- BFI->getBlockFreq(&<wbr>DuplicateFunction-><wbr>getEntryBlock());<br>
- auto OutliningCallFreq = BFI->getBlockFreq(<wbr>OutliningCallBB);<br>
+ Cloner.ClonedFuncBFI-><wbr>getBlockFreq(&Cloner.<wbr>ClonedFunc->getEntryBlock());<br>
+ auto OutliningCallFreq =<br>
+ Cloner.ClonedFuncBFI-><wbr>getBlockFreq(Cloner.<wbr>OutliningCallBB);<br>
<br>
auto OutlineRegionRelFreq =<br>
BranchProbability::<wbr>getBranchProbability(<wbr>OutliningCallFreq.<wbr>getFrequency(),<br>
EntryFreq.getFrequency());<br>
<br>
- if (hasProfileData(F, OI))<br>
+ if (hasProfileData(Cloner.<wbr>OrigFunc, Cloner.ClonedOI.get()))<br>
return OutlineRegionRelFreq;<br>
<br>
// When profile data is not available, we need to be conservative in<br>
@@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::ge<br>
}<br>
<br>
bool PartialInlinerImpl::<wbr>shouldPartialInline(<br>
- CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,<br>
- BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,<br>
- int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {<br>
+ CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,<br>
+ OptimizationRemarkEmitter &ORE) {<br>
+<br>
using namespace ore;<br>
if (SkipCostAnalysis)<br>
return true;<br>
<br>
Instruction *Call = CS.getInstruction();<br>
Function *Callee = CS.getCalledFunction();<br>
+ assert(Callee == Cloner.ClonedFunc);<br>
+<br>
Function *Caller = CS.getCaller();<br>
auto &CalleeTTI = (*GetTTI)(*Callee);<br>
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,<br>
@@ -449,14 +466,14 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
<br>
if (IC.isAlways()) {<br>
ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "AlwaysInline", Call)<br>
- << NV("Callee", F)<br>
+ << NV("Callee", Cloner.OrigFunc)<br>
<< " should always be fully inlined, not partially");<br>
return false;<br>
}<br>
<br>
if (IC.isNever()) {<br>
ORE.emit(<wbr>OptimizationRemarkMissed(<wbr>DEBUG_TYPE, "NeverInline", Call)<br>
- << NV("Callee", F) << " not partially inlined into "<br>
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
<< NV("Caller", Caller)<br>
<< " because it should never be inlined (cost=never)");<br>
return false;<br>
@@ -464,29 +481,25 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
<br>
if (!IC) {<br>
ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "TooCostly", Call)<br>
- << NV("Callee", F) << " not partially inlined into "<br>
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
<< NV("Caller", Caller) << " because too costly to inline (cost="<br>
<< NV("Cost", IC.getCost()) << ", threshold="<br>
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");<br>
return false;<br>
}<br>
const DataLayout &DL = Caller->getParent()-><wbr>getDataLayout();<br>
+<br>
// The savings of eliminating the call:<br>
int NonWeightedSavings = getCallsiteCost(CS, DL);<br>
BlockFrequency NormWeightedSavings(<wbr>NonWeightedSavings);<br>
<br>
- auto RelativeFreq =<br>
- getOutliningCallBBRelativeFreq<wbr>(F, OI, Callee, CalleeBFI, OutliningCallBB);<br>
- auto NormWeightedRcost =<br>
- BlockFrequency(<wbr>NonWeightedOutliningRcost) * RelativeFreq;<br>
-<br>
// Weighted saving is smaller than weighted cost, return false<br>
- if (NormWeightedSavings < NormWeightedRcost) {<br>
+ if (NormWeightedSavings < WeightedOutliningRcost) {<br>
ORE.emit(<br>
OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "OutliningCallcostTooHigh", Call)<br>
- << NV("Callee", F) << " not partially inlined into "<br>
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
<< NV("Caller", Caller) << " runtime overhead (overhead="<br>
- << NV("Overhead", (unsigned)NormWeightedRcost.<wbr>getFrequency())<br>
+ << NV("Overhead", (unsigned)<wbr>WeightedOutliningRcost.<wbr>getFrequency())<br>
<< ", savings="<br>
<< NV("Savings", (unsigned)NormWeightedSavings.<wbr>getFrequency()) << ")"<br>
<< " of making the outlined call is too high");<br>
@@ -495,7 +508,7 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
}<br>
<br>
ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "CanBePartiallyInlined", Call)<br>
- << NV("Callee", F) << " can be partially inlined into "<br>
+ << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "<br>
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())<br>
<< " (threshold="<br>
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");<br>
@@ -551,50 +564,32 @@ int PartialInlinerImpl::<wbr>computeBBInlineC<br>
return InlineCost;<br>
}<br>
<br>
-std::tuple<int, int, int> PartialInlinerImpl::<wbr>computeOutliningCosts(<br>
- Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,<br>
- BasicBlock *OutliningCallBB) {<br>
- // First compute the cost of the outlined region 'OI' in the original<br>
- // function 'F'.<br>
- // FIXME: The code extractor (outliner) can now do code sinking/hoisting<br>
- // to reduce outlining cost. The hoisted/sunk code currently do not<br>
- // incur any runtime cost so it is still OK to compare the outlined<br>
- // function cost with the outlined region in the original function.<br>
- // If this ever changes, we will need to introduce new extractor api<br>
- // to pass the information.<br>
- int OutlinedRegionCost = 0;<br>
- for (BasicBlock &BB : *F) {<br>
- if (&BB != OI->ReturnBlock &&<br>
- // Assuming Entry set is small -- do a linear search here:<br>
- std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==<br>
- OI->Entries.end()) {<br>
- OutlinedRegionCost += computeBBInlineCost(&BB);<br>
- }<br>
- }<br>
+std::tuple<int, int><br>
+PartialInlinerImpl::<wbr>computeOutliningCosts(<wbr>FunctionCloner &Cloner) {<br>
<br>
// Now compute the cost of the call sequence to the outlined function<br>
// 'OutlinedFunction' in BB 'OutliningCallBB':<br>
- int OutliningFuncCallCost = computeBBInlineCost(<wbr>OutliningCallBB);<br>
+ int OutliningFuncCallCost = computeBBInlineCost(Cloner.<wbr>OutliningCallBB);<br>
<br>
// Now compute the cost of the extracted/outlined function itself:<br>
int OutlinedFunctionCost = 0;<br>
- for (BasicBlock &BB : *OutlinedFunction) {<br>
+ for (BasicBlock &BB : *Cloner.OutlinedFunc) {<br>
OutlinedFunctionCost += computeBBInlineCost(&BB);<br>
}<br>
<br>
- assert(OutlinedFunctionCost >= OutlinedRegionCost &&<br>
+ assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&<br>
"Outlined function cost should be no less than the outlined region");<br>
// The code extractor introduces a new root and exit stub blocks with<br>
// additional unconditional branches. Those branches will be eliminated<br>
// later with bb layout. The cost should be adjusted accordingly:<br>
OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;<br>
<br>
- int OutliningRuntimeOverhead = OutliningFuncCallCost +<br>
- (OutlinedFunctionCost - OutlinedRegionCost) +<br>
- ExtraOutliningPenalty;<br>
+ int OutliningRuntimeOverhead =<br>
+ OutliningFuncCallCost +<br>
+ (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +<br>
+ ExtraOutliningPenalty;<br>
<br>
- return std::make_tuple(<wbr>OutliningFuncCallCost, OutliningRuntimeOverhead,<br>
- OutlinedRegionCost);<br>
+ return std::make_tuple(<wbr>OutliningFuncCallCost, OutliningRuntimeOverhead);<br>
}<br>
<br>
// Create the callsite to profile count map which is<br>
@@ -641,42 +636,30 @@ void PartialInlinerImpl::<wbr>computeCallsite<br>
}<br>
}<br>
<br>
-Function *PartialInlinerImpl::<wbr>unswitchFunction(Function *F) {<br>
-<br>
- if (F->hasAddressTaken())<br>
- return nullptr;<br>
-<br>
- // Let inliner handle it<br>
- if (F->hasFnAttribute(Attribute::<wbr>AlwaysInline))<br>
- return nullptr;<br>
-<br>
- if (F->hasFnAttribute(Attribute::<wbr>NoInline))<br>
- return nullptr;<br>
-<br>
- if (PSI->isFunctionEntryCold(F))<br>
- return nullptr;<br>
-<br>
- if (F->user_begin() == F->user_end())<br>
- return nullptr;<br>
-<br>
- std::unique_ptr<<wbr>FunctionOutliningInfo> OI = computeOutliningInfo(F);<br>
-<br>
- if (!OI)<br>
- return nullptr;<br>
+PartialInlinerImpl::<wbr>FunctionCloner::<wbr>FunctionCloner(Function *F,<br>
+ FunctionOutliningInfo *OI)<br>
+ : OrigFunc(F) {<br>
+ ClonedOI = llvm::make_unique<<wbr>FunctionOutliningInfo>();<br>
<br>
// Clone the function, so that we can hack away on it.<br>
ValueToValueMapTy VMap;<br>
- Function *DuplicateFunction = CloneFunction(F, VMap);<br>
- BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>ReturnBlock]);<br>
- BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>NonReturnBlock]);<br>
- DenseSet<BasicBlock *> NewEntries;<br>
+ ClonedFunc = CloneFunction(F, VMap);<br>
+<br>
+ ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>ReturnBlock]);<br>
+ ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>NonReturnBlock]);<br>
for (BasicBlock *BB : OI->Entries) {<br>
- NewEntries.insert(cast<<wbr>BasicBlock>(VMap[BB]));<br>
+ ClonedOI->Entries.push_back(<wbr>cast<BasicBlock>(VMap[BB]));<br>
+ }<br>
+ for (BasicBlock *E : OI->ReturnBlockPreds) {<br>
+ BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
+ ClonedOI->ReturnBlockPreds.<wbr>push_back(NewE);<br>
}<br>
-<br>
// Go ahead and update all uses to the duplicate, so that we can just<br>
// use the inliner functionality when we're done hacking.<br>
- F->replaceAllUsesWith(<wbr>DuplicateFunction);<br>
+ F->replaceAllUsesWith(<wbr>ClonedFunc);<br>
+}<br>
+<br>
+void PartialInlinerImpl::<wbr>FunctionCloner::<wbr>NormalizeReturnBlock() {<br>
<br>
auto getFirstPHI = [](BasicBlock *BB) {<br>
BasicBlock::iterator I = BB->begin();<br>
@@ -692,14 +675,19 @@ Function *PartialInlinerImpl::<wbr>unswitchFu<br>
}<br>
return FirstPhi;<br>
};<br>
+<br>
// Special hackery is needed with PHI nodes that have inputs from more than<br>
// one extracted block. For simplicity, just split the PHIs into a two-level<br>
// sequence of PHIs, some of which will go in the extracted region, and some<br>
// of which will go outside.<br>
- BasicBlock *PreReturn = NewReturnBlock;<br>
+ BasicBlock *PreReturn = ClonedOI->ReturnBlock;<br>
// only split block when necessary:<br>
PHINode *FirstPhi = getFirstPHI(PreReturn);<br>
- unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();<br>
+ unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.<wbr>size();<br>
+<br>
+ if (!FirstPhi || FirstPhi-><wbr>getNumIncomingValues() <= NumPredsFromEntries + 1)<br>
+ return;<br>
+<br>
auto IsTrivialPhi = [](PHINode *PN) -> Value * {<br>
Value *CommonValue = PN->getIncomingValue(0);<br>
if (all_of(PN->incoming_values(),<br>
@@ -708,143 +696,185 @@ Function *PartialInlinerImpl::<wbr>unswitchFu<br>
return nullptr;<br>
};<br>
<br>
- if (FirstPhi && FirstPhi-><wbr>getNumIncomingValues() > NumPredsFromEntries + 1) {<br>
-<br>
- NewReturnBlock = NewReturnBlock-><wbr>splitBasicBlock(<br>
- NewReturnBlock-><wbr>getFirstNonPHI()->getIterator(<wbr>));<br>
- BasicBlock::iterator I = PreReturn->begin();<br>
- Instruction *Ins = &NewReturnBlock->front();<br>
- SmallVector<Instruction *, 4> DeadPhis;<br>
- while (I != PreReturn->end()) {<br>
- PHINode *OldPhi = dyn_cast<PHINode>(I);<br>
- if (!OldPhi)<br>
- break;<br>
-<br>
- PHINode *RetPhi =<br>
- PHINode::Create(OldPhi-><wbr>getType(), NumPredsFromEntries + 1, "", Ins);<br>
- OldPhi->replaceAllUsesWith(<wbr>RetPhi);<br>
- Ins = NewReturnBlock-><wbr>getFirstNonPHI();<br>
-<br>
- RetPhi->addIncoming(&*I, PreReturn);<br>
- for (BasicBlock *E : OI->ReturnBlockPreds) {<br>
- BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
- RetPhi->addIncoming(OldPhi-><wbr>getIncomingValueForBlock(NewE)<wbr>, NewE);<br>
- OldPhi->removeIncomingValue(<wbr>NewE);<br>
- }<br>
-<br>
- // After incoming values splitting, the old phi may become trivial.<br>
- // Keeping the trivial phi can introduce definition inside the outline<br>
- // region which is live-out, causing necessary overhead (load, store<br>
- // arg passing etc).<br>
- if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {<br>
- OldPhi->replaceAllUsesWith(<wbr>OldPhiVal);<br>
- DeadPhis.push_back(OldPhi);<br>
- }<br>
+ ClonedOI->ReturnBlock = ClonedOI->ReturnBlock-><wbr>splitBasicBlock(<br>
+ ClonedOI->ReturnBlock-><wbr>getFirstNonPHI()->getIterator(<wbr>));<br>
+ BasicBlock::iterator I = PreReturn->begin();<br>
+ Instruction *Ins = &ClonedOI->ReturnBlock->front(<wbr>);<br>
+ SmallVector<Instruction *, 4> DeadPhis;<br>
+ while (I != PreReturn->end()) {<br>
+ PHINode *OldPhi = dyn_cast<PHINode>(I);<br>
+ if (!OldPhi)<br>
+ break;<br>
<br>
- ++I;<br>
+ PHINode *RetPhi =<br>
+ PHINode::Create(OldPhi-><wbr>getType(), NumPredsFromEntries + 1, "", Ins);<br>
+ OldPhi->replaceAllUsesWith(<wbr>RetPhi);<br>
+ Ins = ClonedOI->ReturnBlock-><wbr>getFirstNonPHI();<br>
+<br>
+ RetPhi->addIncoming(&*I, PreReturn);<br>
+ for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {<br>
+ RetPhi->addIncoming(OldPhi-><wbr>getIncomingValueForBlock(E), E);<br>
+ OldPhi->removeIncomingValue(E)<wbr>;<br>
+ }<br>
+<br>
+ // After incoming values splitting, the old phi may become trivial.<br>
+ // Keeping the trivial phi can introduce definition inside the outline<br>
+ // region which is live-out, causing necessary overhead (load, store<br>
+ // arg passing etc).<br>
+ if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {<br>
+ OldPhi->replaceAllUsesWith(<wbr>OldPhiVal);<br>
+ DeadPhis.push_back(OldPhi);<br>
+ }<br>
+ ++I;<br>
}<br>
-<br>
for (auto *DP : DeadPhis)<br>
DP->eraseFromParent();<br>
<br>
- for (auto E : OI->ReturnBlockPreds) {<br>
- BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
- NewE->getTerminator()-><wbr>replaceUsesOfWith(PreReturn, NewReturnBlock);<br>
+ for (auto E : ClonedOI->ReturnBlockPreds) {<br>
+ E->getTerminator()-><wbr>replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);<br>
}<br>
- }<br>
+}<br>
<br>
+Function *PartialInlinerImpl::<wbr>FunctionCloner::<wbr>DoFunctionOutlining() {<br>
// Returns true if the block is to be partial inlined into the caller<br>
// (i.e. not to be extracted to the out of line function)<br>
- auto ToBeInlined = [&](BasicBlock *BB) {<br>
- return BB == NewReturnBlock || NewEntries.count(BB);<br>
+ auto ToBeInlined = [&, this](BasicBlock *BB) {<br>
+ return BB == ClonedOI->ReturnBlock ||<br>
+ (std::find(ClonedOI->Entries.<wbr>begin(), ClonedOI->Entries.end(), BB) !=<br>
+ ClonedOI->Entries.end());<br>
};<br>
+<br>
// Gather up the blocks that we're going to extract.<br>
std::vector<BasicBlock *> ToExtract;<br>
- ToExtract.push_back(<wbr>NewNonReturnBlock);<br>
- for (BasicBlock &BB : *DuplicateFunction)<br>
- if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)<br>
+ ToExtract.push_back(ClonedOI-><wbr>NonReturnBlock);<br>
+ OutlinedRegionCost +=<br>
+ PartialInlinerImpl::<wbr>computeBBInlineCost(ClonedOI-><wbr>NonReturnBlock);<br>
+ for (BasicBlock &BB : *ClonedFunc)<br>
+ if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {<br>
ToExtract.push_back(&BB);<br>
+ // FIXME: the code extractor may hoist/sink more code<br>
+ // into the outlined function which may make the outlining<br>
+ // overhead (the difference of the outlined function cost<br>
+ // and OutliningRegionCost) look larger.<br>
+ OutlinedRegionCost += computeBBInlineCost(&BB);<br>
+ }<br>
<br>
// The CodeExtractor needs a dominator tree.<br>
DominatorTree DT;<br>
- DT.recalculate(*<wbr>DuplicateFunction);<br>
+ DT.recalculate(*ClonedFunc);<br>
<br>
// Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.<br>
LoopInfo LI(DT);<br>
- BranchProbabilityInfo BPI(*DuplicateFunction, LI);<br>
- BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);<br>
+ BranchProbabilityInfo BPI(*ClonedFunc, LI);<br>
+ ClonedFuncBFI.reset(new BlockFrequencyInfo(*<wbr>ClonedFunc, BPI, LI));<br>
<br>
// Extract the body of the if.<br>
- Function *OutlinedFunction =<br>
- CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)<br>
- .extractCodeRegion();<br>
+ OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,<br>
+ ClonedFuncBFI.get(), &BPI)<br>
+ .extractCodeRegion();<br>
+<br>
+ if (OutlinedFunc) {<br>
+ OutliningCallBB = PartialInlinerImpl::<wbr>getOneCallSiteTo(OutlinedFunc)<br>
+ .getInstruction()<br>
+ ->getParent();<br>
+ assert(OutliningCallBB-><wbr>getParent() == ClonedFunc);<br>
+ }<br>
<br>
- bool AnyInline =<br>
- tryPartialInline(<wbr>DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);<br>
+ return OutlinedFunc;<br>
+}<br>
<br>
+PartialInlinerImpl::<wbr>FunctionCloner::~<wbr>FunctionCloner() {<br>
// Ditch the duplicate, since we're done with it, and rewrite all remaining<br>
// users (function pointers, etc.) back to the original function.<br>
- DuplicateFunction-><wbr>replaceAllUsesWith(F);<br>
- DuplicateFunction-><wbr>eraseFromParent();<br>
+ ClonedFunc-><wbr>replaceAllUsesWith(OrigFunc);<br>
+ ClonedFunc->eraseFromParent();<br>
+ if (!IsFunctionInlined) {<br>
+ // Remove the function that is speculatively created if there is no<br>
+ // reference.<br>
+ if (OutlinedFunc)<br>
+ OutlinedFunc->eraseFromParent(<wbr>);<br>
+ }<br>
+}<br>
+<br>
+Function *PartialInlinerImpl::<wbr>unswitchFunction(Function *F) {<br>
+<br>
+ if (F->hasAddressTaken())<br>
+ return nullptr;<br>
+<br>
+ // Let inliner handle it<br>
+ if (F->hasFnAttribute(Attribute::<wbr>AlwaysInline))<br>
+ return nullptr;<br>
+<br>
+ if (F->hasFnAttribute(Attribute::<wbr>NoInline))<br>
+ return nullptr;<br>
+<br>
+ if (PSI->isFunctionEntryCold(F))<br>
+ return nullptr;<br>
+<br>
+ if (F->user_begin() == F->user_end())<br>
+ return nullptr;<br>
+<br>
+ std::unique_ptr<<wbr>FunctionOutliningInfo> OI = computeOutliningInfo(F);<br>
+<br>
+ if (!OI)<br>
+ return nullptr;<br>
+<br>
+ FunctionCloner Cloner(F, OI.get());<br>
+ Cloner.NormalizeReturnBlock();<br>
+ Function *OutlinedFunction = Cloner.DoFunctionOutlining();<br>
+<br>
+ bool AnyInline = tryPartialInline(Cloner);<br>
<br>
if (AnyInline)<br>
return OutlinedFunction;<br>
<br>
- // Remove the function that is speculatively created:<br>
- if (OutlinedFunction)<br>
- OutlinedFunction-><wbr>eraseFromParent();<br>
-<br>
return nullptr;<br>
}<br>
<br>
-bool PartialInlinerImpl::<wbr>tryPartialInline(Function *DuplicateFunction,<br>
- Function *F,<br>
- FunctionOutliningInfo *OI,<br>
- Function *OutlinedFunction,<br>
- BlockFrequencyInfo *CalleeBFI) {<br>
- if (OutlinedFunction == nullptr)<br>
- return false;<br>
-<br>
+bool PartialInlinerImpl::<wbr>tryPartialInline(<wbr>FunctionCloner &Cloner) {<br>
int NonWeightedRcost;<br>
int SizeCost;<br>
- int OutlinedRegionSizeCost;<br>
<br>
- auto OutliningCallBB =<br>
- getOneCallSiteTo(<wbr>OutlinedFunction).<wbr>getInstruction()->getParent();<br>
+ if (Cloner.OutlinedFunc == nullptr)<br>
+ return false;<br>
+<br>
+ std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);<br>
<br>
- std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =<br>
- computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);<br>
+ auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq<wbr>(Cloner);<br>
+ auto WeightedRcost = BlockFrequency(<wbr>NonWeightedRcost) * RelativeToEntryFreq;<br>
<br>
// The call sequence to the outlined function is larger than the original<br>
// outlined region size, it does not increase the chances of inlining<br>
- // 'F' with outlining (The inliner usies the size increase to model the<br>
- // the cost of inlining a callee).<br>
- if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {<br>
- OptimizationRemarkEmitter ORE(F);<br>
+ // the function with outlining (The inliner usies the size increase to<br>
+ // model the cost of inlining a callee).<br>
+ if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {<br>
+ OptimizationRemarkEmitter ORE(Cloner.OrigFunc);<br>
DebugLoc DLoc;<br>
BasicBlock *Block;<br>
- std::tie(DLoc, Block) = getOneDebugLoc(<wbr>DuplicateFunction);<br>
+ std::tie(DLoc, Block) = getOneDebugLoc(Cloner.<wbr>ClonedFunc);<br>
ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "OutlineRegionTooSmall",<br>
DLoc, Block)<br>
- << ore::NV("Function", F)<br>
+ << ore::NV("Function", Cloner.OrigFunc)<br>
<< " not partially inlined into callers (Original Size = "<br>
- << ore::NV("<wbr>OutlinedRegionOriginalSize", OutlinedRegionSizeCost)<br>
+ << ore::NV("<wbr>OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)<br>
<< ", Size of call sequence to outlined function = "<br>
<< ore::NV("NewSize", SizeCost) << ")");<br>
return false;<br>
}<br>
<br>
- assert(F->user_begin() == F->user_end() &&<br>
+ assert(Cloner.OrigFunc->user_<wbr>begin() == Cloner.OrigFunc->user_end() &&<br>
"F's users should all be replaced!");<br>
- std::vector<User *> Users(DuplicateFunction->user_<wbr>begin(),<br>
- DuplicateFunction->user_end())<wbr>;<br>
+<br>
+ std::vector<User *> Users(Cloner.ClonedFunc->user_<wbr>begin(),<br>
+ Cloner.ClonedFunc->user_end())<wbr>;<br>
<br>
DenseMap<User *, uint64_t> CallSiteToProfCountMap;<br>
- if (F->getEntryCount())<br>
- computeCallsiteToProfCountMap(<wbr>DuplicateFunction, CallSiteToProfCountMap);<br>
+ if (Cloner.OrigFunc-><wbr>getEntryCount())<br>
+ computeCallsiteToProfCountMap(<wbr>Cloner.ClonedFunc, CallSiteToProfCountMap);<br>
<br>
- auto CalleeEntryCount = F->getEntryCount();<br>
+ auto CalleeEntryCount = Cloner.OrigFunc-><wbr>getEntryCount();<br>
uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);<br>
+<br>
bool AnyInline = false;<br>
for (User *User : Users) {<br>
CallSite CS = getCallSite(User);<br>
@@ -854,13 +884,12 @@ bool PartialInlinerImpl::<wbr>tryPartialInlin<br>
<br>
OptimizationRemarkEmitter ORE(CS.getCaller());<br>
<br>
- if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,<br>
- NonWeightedRcost, ORE))<br>
+ if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))<br>
continue;<br>
<br>
ORE.emit(<br>
OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())<br>
- << ore::NV("Callee", F) << " partially inlined into "<br>
+ << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "<br>
<< ore::NV("Caller", CS.getCaller()));<br>
<br>
InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);<br>
@@ -878,8 +907,11 @@ bool PartialInlinerImpl::<wbr>tryPartialInlin<br>
NumPartialInlined++;<br>
}<br>
<br>
- if (AnyInline && CalleeEntryCount)<br>
- F->setEntryCount(<wbr>CalleeEntryCountV);<br>
+ if (AnyInline) {<br>
+ Cloner.IsFunctionInlined = true;<br>
+ if (CalleeEntryCount)<br>
+ Cloner.OrigFunc-><wbr>setEntryCount(<wbr>CalleeEntryCountV);<br>
+ }<br>
<br>
return AnyInline;<br>
}<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>