<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jun 15, 2017 at 4:56 PM, Xinliang David Li via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: davidxl<br>
Date: Thu Jun 15 18:56:59 2017<br>
New Revision: 305530<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=305530&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=305530&view=rev</a><br>
Log:<br>
[PartialInlining] Code Refactoring<br>
<br>
This is a NFC code refactoring and interface cleanup. This paves the<br>
way to enable outlining-only mode for the partial inliner.<br>
<br>
<br>
<br>
Modified:<br>
  llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=305530&r1=305529&r2=305530&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/IPO/<wbr>PartialInlining.cpp?rev=<wbr>305530&r1=305529&r2=305530&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp Thu Jun 15 18:56:59 2017<br>
@@ -103,6 +103,35 @@ struct PartialInlinerImpl {<br>
  bool run(Module &M);<br>
  Function *unswitchFunction(Function *F);<br>
<br>
+Â // This class speculatively clones the the function to be partial inlined.<br>
+Â // At the end of partial inlining, the remaining callsites to the cloned<br>
+Â // function that are not partially inlined will be fixed up to reference<br>
+Â // the original function, and the cloned function will be erased.<br>
+Â struct FunctionCloner {<br>
+Â Â FunctionCloner(Function *F, FunctionOutliningInfo *OI);<br>
+Â Â ~FunctionCloner();<br>
+<br>
+Â Â // Prepare for function outlining: making sure there is only<br>
+Â Â // one incoming edge from the extracted/outlined region to<br>
+Â Â // the return block.<br>
+Â Â void NormalizeReturnBlock();<br>
+<br>
+Â Â // Do function outlining:<br>
+Â Â Function *DoFunctionOutlining();<br></blockquote><div><br></div><div>doFunctionOutlining in LLVM naming convention.</div><div><br></div><div>-- Sean Silva</div><div>Â </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+Â Â Function *OrigFunc = nullptr;<br>
+Â Â Function *ClonedFunc = nullptr;<br>
+Â Â Function *OutlinedFunc = nullptr;<br>
+Â Â BasicBlock *OutliningCallBB = nullptr;<br>
+Â Â // ClonedFunc is inlined in one of its callers after function<br>
+Â Â // outlining.<br>
+Â Â bool IsFunctionInlined = false;<br>
+Â Â // The cost of the region to be outlined.<br>
+Â Â int OutlinedRegionCost = 0;<br>
+Â Â std::unique_ptr<<wbr>FunctionOutliningInfo> ClonedOI = nullptr;<br>
+Â Â std::unique_ptr<<wbr>BlockFrequencyInfo> ClonedFuncBFI = nullptr;<br>
+Â };<br>
+<br>
 private:<br>
  int NumPartialInlining = 0;<br>
  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;<br>
@@ -114,27 +143,18 @@ private:<br>
  // The result is no larger than 1 and is represented using BP.<br>
  // (Note that the outlined region's 'head' block can only have incoming<br>
  // edges from the guarding entry blocks).<br>
-Â BranchProbability getOutliningCallBBRelativeFreq<wbr>(Function *F,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â FunctionOutliningInfo *OI,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Function *DuplicateFunction,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â BlockFrequencyInfo *BFI,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â BasicBlock *OutliningCallBB);<br>
+Â BranchProbability getOutliningCallBBRelativeFreq<wbr>(FunctionCloner &Cloner);<br>
<br>
  // Return true if the callee of CS should be partially inlined with<br>
  // profit.<br>
-Â bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â BlockFrequencyInfo *CalleeBFI,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â BasicBlock *OutliningCallBB,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â int OutliningCallOverhead,<br>
+Â bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â BlockFrequency WeightedOutliningRcost,<br>
              OptimizationRemarkEmitter &ORE);<br>
<br>
  // Try to inline DuplicateFunction (cloned from F with call to<br>
  // the OutlinedFunction into its callers. Return true<br>
  // if there is any successful inlining.<br>
-Â bool tryPartialInline(Function *DuplicateFunction,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Function *F, /*orignal function */<br>
-Â Â Â Â Â Â Â Â Â Â Â Â FunctionOutliningInfo *OI, Function *OutlinedFunction,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â BlockFrequencyInfo *CalleeBFI);<br>
+Â bool tryPartialInline(<wbr>FunctionCloner &Cloner);<br>
<br>
  // Compute the mapping from use site of DuplicationFunction to the enclosing<br>
  // BB's profile count.<br>
@@ -146,7 +166,7 @@ private:<br>
       NumPartialInlining >= MaxNumPartialInlining);<br>
  }<br>
<br>
-Â CallSite getCallSite(User *U) {<br>
+Â static CallSite getCallSite(User *U) {<br>
   CallSite CS;<br>
   if (CallInst *CI = dyn_cast<CallInst>(U))<br>
    CS = CallSite(CI);<br>
@@ -157,7 +177,7 @@ private:<br>
   return CS;<br>
  }<br>
<br>
-Â CallSite getOneCallSiteTo(Function *F) {<br>
+Â static CallSite getOneCallSiteTo(Function *F) {<br>
   User *User = *F->user_begin();<br>
   return getCallSite(User);<br>
  }<br>
@@ -171,20 +191,15 @@ private:<br>
<br>
  // Returns the costs associated with function outlining:<br>
  // - The first value is the non-weighted runtime cost for making the call<br>
-Â //Â Â to the outlined function 'OutlinedFunction', including the addtional<br>
-Â //Â Â setup cost in the outlined function itself;<br>
+ //  to the outlined function, including the addtional setup cost in the<br>
+Â //Â Â outlined function itself;<br>
  // - The second value is the estimated size of the new call sequence in<br>
-Â //Â Â basic block 'OutliningCallBB';<br>
-Â // - The third value is the estimated size of the original code from<br>
-Â //Â Â function 'F' that is extracted into the outlined function.<br>
-Â std::tuple<int, int, int><br>
-Â computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Function *OutlinedFunction,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â BasicBlock *OutliningCallBB);<br>
+Â //Â Â basic block Cloner.OutliningCallBB;<br>
+Â std::tuple<int, int> computeOutliningCosts(<wbr>FunctionCloner &Cloner);<br>
  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to<br>
  // approximate both the size and runtime cost (Note that in the current<br>
  // inline cost analysis, there is no clear distinction there either).<br>
-Â int computeBBInlineCost(BasicBlock *BB);<br>
+Â static int computeBBInlineCost(BasicBlock *BB);<br>
<br>
  std::unique_ptr<<wbr>FunctionOutliningInfo> computeOutliningInfo(Function *F);<br>
<br>
@@ -396,19 +411,19 @@ static bool hasProfileData(Function *F,<br>
  return false;<br>
 }<br>
<br>
-BranchProbability PartialInlinerImpl::<wbr>getOutliningCallBBRelativeFreq<wbr>(<br>
-Â Â Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,<br>
-Â Â BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {<br>
+BranchProbability<br>
+PartialInlinerImpl::<wbr>getOutliningCallBBRelativeFreq<wbr>(FunctionCloner &Cloner) {<br>
<br>
  auto EntryFreq =<br>
-Â Â Â BFI->getBlockFreq(&<wbr>DuplicateFunction-><wbr>getEntryBlock());<br>
-Â auto OutliningCallFreq = BFI->getBlockFreq(<wbr>OutliningCallBB);<br>
+Â Â Â Cloner.ClonedFuncBFI-><wbr>getBlockFreq(&Cloner.<wbr>ClonedFunc->getEntryBlock());<br>
+Â auto OutliningCallFreq =<br>
+Â Â Â Cloner.ClonedFuncBFI-><wbr>getBlockFreq(Cloner.<wbr>OutliningCallBB);<br>
<br>
  auto OutlineRegionRelFreq =<br>
    BranchProbability::<wbr>getBranchProbability(<wbr>OutliningCallFreq.<wbr>getFrequency(),<br>
                        EntryFreq.getFrequency());<br>
<br>
-Â if (hasProfileData(F, OI))<br>
+Â if (hasProfileData(Cloner.<wbr>OrigFunc, Cloner.ClonedOI.get()))<br>
   return OutlineRegionRelFreq;<br>
<br>
  // When profile data is not available, we need to be conservative in<br>
@@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::ge<br>
 }<br>
<br>
 bool PartialInlinerImpl::<wbr>shouldPartialInline(<br>
-Â Â CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,<br>
-Â Â BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,<br>
-Â Â int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {<br>
+Â Â CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,<br>
+Â Â OptimizationRemarkEmitter &ORE) {<br>
+<br>
  using namespace ore;<br>
  if (SkipCostAnalysis)<br>
   return true;<br>
<br>
  Instruction *Call = CS.getInstruction();<br>
  Function *Callee = CS.getCalledFunction();<br>
+Â assert(Callee == Cloner.ClonedFunc);<br>
+<br>
  Function *Caller = CS.getCaller();<br>
  auto &CalleeTTI = (*GetTTI)(*Callee);<br>
  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,<br>
@@ -449,14 +466,14 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
<br>
  if (IC.isAlways()) {<br>
   ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "AlwaysInline", Call)<br>
-Â Â Â Â Â Â Â << NV("Callee", F)<br>
+Â Â Â Â Â Â Â << NV("Callee", Cloner.OrigFunc)<br>
       << " should always be fully inlined, not partially");<br>
   return false;<br>
  }<br>
<br>
  if (IC.isNever()) {<br>
   ORE.emit(<wbr>OptimizationRemarkMissed(<wbr>DEBUG_TYPE, "NeverInline", Call)<br>
-Â Â Â Â Â Â Â << NV("Callee", F) << " not partially inlined into "<br>
+Â Â Â Â Â Â Â << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
       << NV("Caller", Caller)<br>
       << " because it should never be inlined (cost=never)");<br>
   return false;<br>
@@ -464,29 +481,25 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
<br>
  if (!IC) {<br>
   ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "TooCostly", Call)<br>
-Â Â Â Â Â Â Â << NV("Callee", F) << " not partially inlined into "<br>
+Â Â Â Â Â Â Â << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
       << NV("Caller", Caller) << " because too costly to inline (cost="<br>
       << NV("Cost", IC.getCost()) << ", threshold="<br>
       << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");<br>
   return false;<br>
  }<br>
  const DataLayout &DL = Caller->getParent()-><wbr>getDataLayout();<br>
+<br>
  // The savings of eliminating the call:<br>
  int NonWeightedSavings = getCallsiteCost(CS, DL);<br>
  BlockFrequency NormWeightedSavings(<wbr>NonWeightedSavings);<br>
<br>
-Â auto RelativeFreq =<br>
-Â Â Â getOutliningCallBBRelativeFreq<wbr>(F, OI, Callee, CalleeBFI, OutliningCallBB);<br>
-Â auto NormWeightedRcost =<br>
-Â Â Â BlockFrequency(<wbr>NonWeightedOutliningRcost) * RelativeFreq;<br>
-<br>
  // Weighted saving is smaller than weighted cost, return false<br>
-Â if (NormWeightedSavings < NormWeightedRcost) {<br>
+Â if (NormWeightedSavings < WeightedOutliningRcost) {<br>
   ORE.emit(<br>
     OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "OutliningCallcostTooHigh", Call)<br>
-Â Â Â Â << NV("Callee", F) << " not partially inlined into "<br>
+Â Â Â Â << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
     << NV("Caller", Caller) << " runtime overhead (overhead="<br>
-Â Â Â Â << NV("Overhead", (unsigned)NormWeightedRcost.<wbr>getFrequency())<br>
+Â Â Â Â << NV("Overhead", (unsigned)<wbr>WeightedOutliningRcost.<wbr>getFrequency())<br>
     << ", savings="<br>
     << NV("Savings", (unsigned)NormWeightedSavings.<wbr>getFrequency()) << ")"<br>
     << " of making the outlined call is too high");<br>
@@ -495,7 +508,7 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
  }<br>
<br>
  ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "CanBePartiallyInlined", Call)<br>
-Â Â Â Â Â Â << NV("Callee", F) << " can be partially inlined into "<br>
+Â Â Â Â Â Â << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "<br>
      << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())<br>
      << " (threshold="<br>
      << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");<br>
@@ -551,50 +564,32 @@ int PartialInlinerImpl::<wbr>computeBBInlineC<br>
  return InlineCost;<br>
 }<br>
<br>
-std::tuple<int, int, int> PartialInlinerImpl::<wbr>computeOutliningCosts(<br>
-Â Â Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,<br>
-Â Â BasicBlock *OutliningCallBB) {<br>
-Â // First compute the cost of the outlined region 'OI' in the original<br>
-Â // function 'F'.<br>
-Â // FIXME: The code extractor (outliner) can now do code sinking/hoisting<br>
-Â // to reduce outlining cost. The hoisted/sunk code currently do not<br>
-Â // incur any runtime cost so it is still OK to compare the outlined<br>
-Â // function cost with the outlined region in the original function.<br>
-Â // If this ever changes, we will need to introduce new extractor api<br>
-Â // to pass the information.<br>
-Â int OutlinedRegionCost = 0;<br>
-Â for (BasicBlock &BB : *F) {<br>
-Â Â if (&BB != OI->ReturnBlock &&<br>
-Â Â Â Â // Assuming Entry set is small -- do a linear search here:<br>
-Â Â Â Â std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==<br>
-Â Â Â Â Â Â OI->Entries.end()) {<br>
-Â Â Â OutlinedRegionCost += computeBBInlineCost(&BB);<br>
-Â Â }<br>
-Â }<br>
+std::tuple<int, int><br>
+PartialInlinerImpl::<wbr>computeOutliningCosts(<wbr>FunctionCloner &Cloner) {<br>
<br>
  // Now compute the cost of the call sequence to the outlined function<br>
  // 'OutlinedFunction' in BB 'OutliningCallBB':<br>
-Â int OutliningFuncCallCost = computeBBInlineCost(<wbr>OutliningCallBB);<br>
+Â int OutliningFuncCallCost = computeBBInlineCost(Cloner.<wbr>OutliningCallBB);<br>
<br>
  // Now compute the cost of the extracted/outlined function itself:<br>
  int OutlinedFunctionCost = 0;<br>
-Â for (BasicBlock &BB : *OutlinedFunction) {<br>
+Â for (BasicBlock &BB : *Cloner.OutlinedFunc) {<br>
   OutlinedFunctionCost += computeBBInlineCost(&BB);<br>
  }<br>
<br>
-Â assert(OutlinedFunctionCost >= OutlinedRegionCost &&<br>
+Â assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&<br>
     "Outlined function cost should be no less than the outlined region");<br>
  // The code extractor introduces a new root and exit stub blocks with<br>
  // additional unconditional branches. Those branches will be eliminated<br>
  // later with bb layout. The cost should be adjusted accordingly:<br>
  OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;<br>
<br>
-Â int OutliningRuntimeOverhead = OutliningFuncCallCost +<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (OutlinedFunctionCost - OutlinedRegionCost) +<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ExtraOutliningPenalty;<br>
+Â int OutliningRuntimeOverhead =<br>
+Â Â Â OutliningFuncCallCost +<br>
+Â Â Â (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +<br>
+Â Â Â ExtraOutliningPenalty;<br>
<br>
-Â return std::make_tuple(<wbr>OutliningFuncCallCost, OutliningRuntimeOverhead,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â OutlinedRegionCost);<br>
+Â return std::make_tuple(<wbr>OutliningFuncCallCost, OutliningRuntimeOverhead);<br>
 }<br>
<br>
 // Create the callsite to profile count map which is<br>
@@ -641,42 +636,30 @@ void PartialInlinerImpl::<wbr>computeCallsite<br>
  }<br>
 }<br>
<br>
-Function *PartialInlinerImpl::<wbr>unswitchFunction(Function *F) {<br>
-<br>
-Â if (F->hasAddressTaken())<br>
-Â Â return nullptr;<br>
-<br>
-Â // Let inliner handle it<br>
-Â if (F->hasFnAttribute(Attribute::<wbr>AlwaysInline))<br>
-Â Â return nullptr;<br>
-<br>
-Â if (F->hasFnAttribute(Attribute::<wbr>NoInline))<br>
-Â Â return nullptr;<br>
-<br>
-Â if (PSI->isFunctionEntryCold(F))<br>
-Â Â return nullptr;<br>
-<br>
-Â if (F->user_begin() == F->user_end())<br>
-Â Â return nullptr;<br>
-<br>
-Â std::unique_ptr<<wbr>FunctionOutliningInfo> OI = computeOutliningInfo(F);<br>
-<br>
-Â if (!OI)<br>
-Â Â return nullptr;<br>
+PartialInlinerImpl::<wbr>FunctionCloner::<wbr>FunctionCloner(Function *F,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â FunctionOutliningInfo *OI)<br>
+Â Â : OrigFunc(F) {<br>
+Â ClonedOI = llvm::make_unique<<wbr>FunctionOutliningInfo>();<br>
<br>
  // Clone the function, so that we can hack away on it.<br>
  ValueToValueMapTy VMap;<br>
-Â Function *DuplicateFunction = CloneFunction(F, VMap);<br>
-Â BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>ReturnBlock]);<br>
-Â BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>NonReturnBlock]);<br>
-Â DenseSet<BasicBlock *> NewEntries;<br>
+Â ClonedFunc = CloneFunction(F, VMap);<br>
+<br>
+Â ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>ReturnBlock]);<br>
+Â ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>NonReturnBlock]);<br>
  for (BasicBlock *BB : OI->Entries) {<br>
-Â Â NewEntries.insert(cast<<wbr>BasicBlock>(VMap[BB]));<br>
+Â Â ClonedOI->Entries.push_back(<wbr>cast<BasicBlock>(VMap[BB]));<br>
+Â }<br>
+Â for (BasicBlock *E : OI->ReturnBlockPreds) {<br>
+Â Â BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
+Â Â ClonedOI->ReturnBlockPreds.<wbr>push_back(NewE);<br>
  }<br>
-<br>
  // Go ahead and update all uses to the duplicate, so that we can just<br>
  // use the inliner functionality when we're done hacking.<br>
-Â F->replaceAllUsesWith(<wbr>DuplicateFunction);<br>
+Â F->replaceAllUsesWith(<wbr>ClonedFunc);<br>
+}<br>
+<br>
+void PartialInlinerImpl::<wbr>FunctionCloner::<wbr>NormalizeReturnBlock() {<br>
<br>
  auto getFirstPHI = [](BasicBlock *BB) {<br>
   BasicBlock::iterator I = BB->begin();<br>
@@ -692,14 +675,19 @@ Function *PartialInlinerImpl::<wbr>unswitchFu<br>
   }<br>
   return FirstPhi;<br>
  };<br>
+<br>
  // Special hackery is needed with PHI nodes that have inputs from more than<br>
  // one extracted block. For simplicity, just split the PHIs into a two-level<br>
  // sequence of PHIs, some of which will go in the extracted region, and some<br>
  // of which will go outside.<br>
-Â BasicBlock *PreReturn = NewReturnBlock;<br>
+Â BasicBlock *PreReturn = ClonedOI->ReturnBlock;<br>
  // only split block when necessary:<br>
  PHINode *FirstPhi = getFirstPHI(PreReturn);<br>
-Â unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();<br>
+Â unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.<wbr>size();<br>
+<br>
+Â if (!FirstPhi || FirstPhi-><wbr>getNumIncomingValues() <= NumPredsFromEntries + 1)<br>
+Â Â return;<br>
+<br>
  auto IsTrivialPhi = [](PHINode *PN) -> Value * {<br>
   Value *CommonValue = PN->getIncomingValue(0);<br>
   if (all_of(PN->incoming_values(),<br>
@@ -708,143 +696,185 @@ Function *PartialInlinerImpl::<wbr>unswitchFu<br>
   return nullptr;<br>
  };<br>
<br>
-Â if (FirstPhi && FirstPhi-><wbr>getNumIncomingValues() > NumPredsFromEntries + 1) {<br>
-<br>
-Â Â NewReturnBlock = NewReturnBlock-><wbr>splitBasicBlock(<br>
-Â Â Â Â NewReturnBlock-><wbr>getFirstNonPHI()->getIterator(<wbr>));<br>
-Â Â BasicBlock::iterator I = PreReturn->begin();<br>
-Â Â Instruction *Ins = &NewReturnBlock->front();<br>
-Â Â SmallVector<Instruction *, 4> DeadPhis;<br>
-Â Â while (I != PreReturn->end()) {<br>
-Â Â Â PHINode *OldPhi = dyn_cast<PHINode>(I);<br>
-Â Â Â if (!OldPhi)<br>
-Â Â Â Â break;<br>
-<br>
-Â Â Â PHINode *RetPhi =<br>
-Â Â Â Â Â PHINode::Create(OldPhi-><wbr>getType(), NumPredsFromEntries + 1, "", Ins);<br>
-Â Â Â OldPhi->replaceAllUsesWith(<wbr>RetPhi);<br>
-Â Â Â Ins = NewReturnBlock-><wbr>getFirstNonPHI();<br>
-<br>
-Â Â Â RetPhi->addIncoming(&*I, PreReturn);<br>
-Â Â Â for (BasicBlock *E : OI->ReturnBlockPreds) {<br>
-Â Â Â Â BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
-Â Â Â Â RetPhi->addIncoming(OldPhi-><wbr>getIncomingValueForBlock(NewE)<wbr>, NewE);<br>
-Â Â Â Â OldPhi->removeIncomingValue(<wbr>NewE);<br>
-Â Â Â }<br>
-<br>
-Â Â Â // After incoming values splitting, the old phi may become trivial.<br>
-Â Â Â // Keeping the trivial phi can introduce definition inside the outline<br>
-Â Â Â // region which is live-out, causing necessary overhead (load, store<br>
-Â Â Â // arg passing etc).<br>
-Â Â Â if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {<br>
-Â Â Â Â OldPhi->replaceAllUsesWith(<wbr>OldPhiVal);<br>
-Â Â Â Â DeadPhis.push_back(OldPhi);<br>
-Â Â Â }<br>
+Â ClonedOI->ReturnBlock = ClonedOI->ReturnBlock-><wbr>splitBasicBlock(<br>
+Â Â Â ClonedOI->ReturnBlock-><wbr>getFirstNonPHI()->getIterator(<wbr>));<br>
+Â BasicBlock::iterator I = PreReturn->begin();<br>
+Â Instruction *Ins = &ClonedOI->ReturnBlock->front(<wbr>);<br>
+Â SmallVector<Instruction *, 4> DeadPhis;<br>
+Â while (I != PreReturn->end()) {<br>
+Â Â PHINode *OldPhi = dyn_cast<PHINode>(I);<br>
+Â Â if (!OldPhi)<br>
+Â Â Â break;<br>
<br>
-Â Â Â ++I;<br>
+Â Â PHINode *RetPhi =<br>
+Â Â Â Â PHINode::Create(OldPhi-><wbr>getType(), NumPredsFromEntries + 1, "", Ins);<br>
+Â Â OldPhi->replaceAllUsesWith(<wbr>RetPhi);<br>
+Â Â Ins = ClonedOI->ReturnBlock-><wbr>getFirstNonPHI();<br>
+<br>
+Â Â RetPhi->addIncoming(&*I, PreReturn);<br>
+Â Â for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {<br>
+Â Â Â RetPhi->addIncoming(OldPhi-><wbr>getIncomingValueForBlock(E), E);<br>
+Â Â Â OldPhi->removeIncomingValue(E)<wbr>;<br>
+Â Â }<br>
+<br>
+Â Â // After incoming values splitting, the old phi may become trivial.<br>
+Â Â // Keeping the trivial phi can introduce definition inside the outline<br>
+Â Â // region which is live-out, causing necessary overhead (load, store<br>
+Â Â // arg passing etc).<br>
+Â Â if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {<br>
+Â Â Â OldPhi->replaceAllUsesWith(<wbr>OldPhiVal);<br>
+Â Â Â DeadPhis.push_back(OldPhi);<br>
+Â Â }<br>
+Â Â ++I;<br>
   }<br>
-<br>
   for (auto *DP : DeadPhis)<br>
    DP->eraseFromParent();<br>
<br>
-Â Â for (auto E : OI->ReturnBlockPreds) {<br>
-Â Â Â BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
-Â Â Â NewE->getTerminator()-><wbr>replaceUsesOfWith(PreReturn, NewReturnBlock);<br>
+Â Â for (auto E : ClonedOI->ReturnBlockPreds) {<br>
+Â Â Â E->getTerminator()-><wbr>replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);<br>
   }<br>
-Â }<br>
+}<br>
<br>
+Function *PartialInlinerImpl::<wbr>FunctionCloner::<wbr>DoFunctionOutlining() {<br>
  // Returns true if the block is to be partial inlined into the caller<br>
  // (i.e. not to be extracted to the out of line function)<br>
-Â auto ToBeInlined = [&](BasicBlock *BB) {<br>
-Â Â return BB == NewReturnBlock || NewEntries.count(BB);<br>
+Â auto ToBeInlined = [&, this](BasicBlock *BB) {<br>
+Â Â return BB == ClonedOI->ReturnBlock ||<br>
+Â Â Â Â Â Â (std::find(ClonedOI->Entries.<wbr>begin(), ClonedOI->Entries.end(), BB) !=<br>
+Â Â Â Â Â Â ClonedOI->Entries.end());<br>
  };<br>
+<br>
  // Gather up the blocks that we're going to extract.<br>
  std::vector<BasicBlock *> ToExtract;<br>
-Â ToExtract.push_back(<wbr>NewNonReturnBlock);<br>
-Â for (BasicBlock &BB : *DuplicateFunction)<br>
-Â Â if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)<br>
+Â ToExtract.push_back(ClonedOI-><wbr>NonReturnBlock);<br>
+Â OutlinedRegionCost +=<br>
+Â Â Â PartialInlinerImpl::<wbr>computeBBInlineCost(ClonedOI-><wbr>NonReturnBlock);<br>
+Â for (BasicBlock &BB : *ClonedFunc)<br>
+Â Â if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {<br>
    ToExtract.push_back(&BB);<br>
+Â Â Â // FIXME: the code extractor may hoist/sink more code<br>
+Â Â Â // into the outlined function which may make the outlining<br>
+Â Â Â // overhead (the difference of the outlined function cost<br>
+Â Â Â // and OutliningRegionCost) look larger.<br>
+Â Â Â OutlinedRegionCost += computeBBInlineCost(&BB);<br>
+Â Â }<br>
<br>
  // The CodeExtractor needs a dominator tree.<br>
  DominatorTree DT;<br>
-Â DT.recalculate(*<wbr>DuplicateFunction);<br>
+Â DT.recalculate(*ClonedFunc);<br>
<br>
  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.<br>
  LoopInfo LI(DT);<br>
-Â BranchProbabilityInfo BPI(*DuplicateFunction, LI);<br>
-Â BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);<br>
+Â BranchProbabilityInfo BPI(*ClonedFunc, LI);<br>
+Â ClonedFuncBFI.reset(new BlockFrequencyInfo(*<wbr>ClonedFunc, BPI, LI));<br>
<br>
  // Extract the body of the if.<br>
-Â Function *OutlinedFunction =<br>
-Â Â Â CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)<br>
-Â Â Â Â Â .extractCodeRegion();<br>
+Â OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ClonedFuncBFI.get(), &BPI)<br>
+Â Â Â Â Â Â Â Â Â Â Â .extractCodeRegion();<br>
+<br>
+Â if (OutlinedFunc) {<br>
+Â Â OutliningCallBB = PartialInlinerImpl::<wbr>getOneCallSiteTo(OutlinedFunc)<br>
+Â Â Â Â .getInstruction()<br>
+Â Â Â Â ->getParent();<br>
+Â Â assert(OutliningCallBB-><wbr>getParent() == ClonedFunc);<br>
+Â }<br>
<br>
-Â bool AnyInline =<br>
-Â Â Â tryPartialInline(<wbr>DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);<br>
+Â return OutlinedFunc;<br>
+}<br>
<br>
+PartialInlinerImpl::<wbr>FunctionCloner::~<wbr>FunctionCloner() {<br>
  // Ditch the duplicate, since we're done with it, and rewrite all remaining<br>
  // users (function pointers, etc.) back to the original function.<br>
-Â DuplicateFunction-><wbr>replaceAllUsesWith(F);<br>
-Â DuplicateFunction-><wbr>eraseFromParent();<br>
+Â ClonedFunc-><wbr>replaceAllUsesWith(OrigFunc);<br>
+Â ClonedFunc->eraseFromParent();<br>
+Â if (!IsFunctionInlined) {<br>
+Â Â // Remove the function that is speculatively created if there is no<br>
+Â Â // reference.<br>
+Â Â if (OutlinedFunc)<br>
+Â Â Â OutlinedFunc->eraseFromParent(<wbr>);<br>
+Â }<br>
+}<br>
+<br>
+Function *PartialInlinerImpl::<wbr>unswitchFunction(Function *F) {<br>
+<br>
+Â if (F->hasAddressTaken())<br>
+Â Â return nullptr;<br>
+<br>
+Â // Let inliner handle it<br>
+Â if (F->hasFnAttribute(Attribute::<wbr>AlwaysInline))<br>
+Â Â return nullptr;<br>
+<br>
+Â if (F->hasFnAttribute(Attribute::<wbr>NoInline))<br>
+Â Â return nullptr;<br>
+<br>
+Â if (PSI->isFunctionEntryCold(F))<br>
+Â Â return nullptr;<br>
+<br>
+Â if (F->user_begin() == F->user_end())<br>
+Â Â return nullptr;<br>
+<br>
+Â std::unique_ptr<<wbr>FunctionOutliningInfo> OI = computeOutliningInfo(F);<br>
+<br>
+Â if (!OI)<br>
+Â Â return nullptr;<br>
+<br>
+Â FunctionCloner Cloner(F, OI.get());<br>
+Â Cloner.NormalizeReturnBlock();<br>
+Â Function *OutlinedFunction = Cloner.DoFunctionOutlining();<br>
+<br>
+Â bool AnyInline = tryPartialInline(Cloner);<br>
<br>
  if (AnyInline)<br>
   return OutlinedFunction;<br>
<br>
-Â // Remove the function that is speculatively created:<br>
-Â if (OutlinedFunction)<br>
-Â Â OutlinedFunction-><wbr>eraseFromParent();<br>
-<br>
  return nullptr;<br>
 }<br>
<br>
-bool PartialInlinerImpl::<wbr>tryPartialInline(Function *DuplicateFunction,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Function *F,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â FunctionOutliningInfo *OI,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Function *OutlinedFunction,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â BlockFrequencyInfo *CalleeBFI) {<br>
-Â if (OutlinedFunction == nullptr)<br>
-Â Â return false;<br>
-<br>
+bool PartialInlinerImpl::<wbr>tryPartialInline(<wbr>FunctionCloner &Cloner) {<br>
  int NonWeightedRcost;<br>
  int SizeCost;<br>
-Â int OutlinedRegionSizeCost;<br>
<br>
-Â auto OutliningCallBB =<br>
-Â Â Â getOneCallSiteTo(<wbr>OutlinedFunction).<wbr>getInstruction()->getParent();<br>
+Â if (Cloner.OutlinedFunc == nullptr)<br>
+Â Â return false;<br>
+<br>
+Â std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);<br>
<br>
-Â std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =<br>
-Â Â Â computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);<br>
+Â auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq<wbr>(Cloner);<br>
+Â auto WeightedRcost = BlockFrequency(<wbr>NonWeightedRcost) * RelativeToEntryFreq;<br>
<br>
  // The call sequence to the outlined function is larger than the original<br>
  // outlined region size, it does not increase the chances of inlining<br>
-Â // 'F' with outlining (The inliner usies the size increase to model the<br>
-Â // the cost of inlining a callee).<br>
-Â if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {<br>
-Â Â OptimizationRemarkEmitter ORE(F);<br>
+Â // the function with outlining (The inliner usies the size increase to<br>
+Â // model the cost of inlining a callee).<br>
+Â if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {<br>
+Â Â OptimizationRemarkEmitter ORE(Cloner.OrigFunc);<br>
   DebugLoc DLoc;<br>
   BasicBlock *Block;<br>
-Â Â std::tie(DLoc, Block) = getOneDebugLoc(<wbr>DuplicateFunction);<br>
+Â Â std::tie(DLoc, Block) = getOneDebugLoc(Cloner.<wbr>ClonedFunc);<br>
   ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "OutlineRegionTooSmall",<br>
                     DLoc, Block)<br>
-Â Â Â Â Â Â Â << ore::NV("Function", F)<br>
+Â Â Â Â Â Â Â << ore::NV("Function", Cloner.OrigFunc)<br>
       << " not partially inlined into callers (Original Size = "<br>
-Â Â Â Â Â Â Â << ore::NV("<wbr>OutlinedRegionOriginalSize", OutlinedRegionSizeCost)<br>
+Â Â Â Â Â Â Â << ore::NV("<wbr>OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)<br>
       << ", Size of call sequence to outlined function = "<br>
       << ore::NV("NewSize", SizeCost) << ")");<br>
   return false;<br>
  }<br>
<br>
-Â assert(F->user_begin() == F->user_end() &&<br>
+Â assert(Cloner.OrigFunc->user_<wbr>begin() == Cloner.OrigFunc->user_end() &&<br>
     "F's users should all be replaced!");<br>
-Â std::vector<User *> Users(DuplicateFunction->user_<wbr>begin(),<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â DuplicateFunction->user_end())<wbr>;<br>
+<br>
+Â std::vector<User *> Users(Cloner.ClonedFunc->user_<wbr>begin(),<br>
+Â Â Â Â Â Â Â Â Â Â Â Â Â Â Cloner.ClonedFunc->user_end())<wbr>;<br>
<br>
  DenseMap<User *, uint64_t> CallSiteToProfCountMap;<br>
-Â if (F->getEntryCount())<br>
-Â Â computeCallsiteToProfCountMap(<wbr>DuplicateFunction, CallSiteToProfCountMap);<br>
+Â if (Cloner.OrigFunc-><wbr>getEntryCount())<br>
+Â Â computeCallsiteToProfCountMap(<wbr>Cloner.ClonedFunc, CallSiteToProfCountMap);<br>
<br>
-Â auto CalleeEntryCount = F->getEntryCount();<br>
+Â auto CalleeEntryCount = Cloner.OrigFunc-><wbr>getEntryCount();<br>
  uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);<br>
+<br>
  bool AnyInline = false;<br>
  for (User *User : Users) {<br>
   CallSite CS = getCallSite(User);<br>
@@ -854,13 +884,12 @@ bool PartialInlinerImpl::<wbr>tryPartialInlin<br>
<br>
   OptimizationRemarkEmitter ORE(CS.getCaller());<br>
<br>
-Â Â if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,<br>
-Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â NonWeightedRcost, ORE))<br>
+Â Â if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))<br>
    continue;<br>
<br>
   ORE.emit(<br>
     OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())<br>
-Â Â Â Â << ore::NV("Callee", F) << " partially inlined into "<br>
+Â Â Â Â << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "<br>
     << ore::NV("Caller", CS.getCaller()));<br>
<br>
   InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);<br>
@@ -878,8 +907,11 @@ bool PartialInlinerImpl::<wbr>tryPartialInlin<br>
   NumPartialInlined++;<br>
  }<br>
<br>
-Â if (AnyInline && CalleeEntryCount)<br>
-Â Â F->setEntryCount(<wbr>CalleeEntryCountV);<br>
+Â if (AnyInline) {<br>
+Â Â Cloner.IsFunctionInlined = true;<br>
+Â Â if (CalleeEntryCount)<br>
+Â Â Â Cloner.OrigFunc-><wbr>setEntryCount(<wbr>CalleeEntryCountV);<br>
+Â }<br>
<br>
  return AnyInline;<br>
 }<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>