<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jun 15, 2017 at 4:56 PM, Xinliang David Li via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: davidxl<br>
Date: Thu Jun 15 18:56:59 2017<br>
New Revision: 305530<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=305530&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=305530&view=rev</a><br>
Log:<br>
[PartialInlining] Code Refactoring<br>
<br>
This is a NFC code refactoring and interface cleanup. This paves the<br>
way to enable outlining-only mode for the partial inliner.<br>
<br>
<br>
<br>
Modified:<br>
    llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=305530&r1=305529&r2=305530&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/IPO/<wbr>PartialInlining.cpp?rev=<wbr>305530&r1=305529&r2=305530&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/<wbr>PartialInlining.cpp Thu Jun 15 18:56:59 2017<br>
@@ -103,6 +103,35 @@ struct PartialInlinerImpl {<br>
   bool run(Module &M);<br>
   Function *unswitchFunction(Function *F);<br>
<br>
+  // This class speculatively clones the the function to be partial inlined.<br>
+  // At the end of partial inlining, the remaining callsites to the cloned<br>
+  // function that are not partially inlined will be fixed up to reference<br>
+  // the original function, and the cloned function will be erased.<br>
+  struct FunctionCloner {<br>
+    FunctionCloner(Function *F, FunctionOutliningInfo *OI);<br>
+    ~FunctionCloner();<br>
+<br>
+    // Prepare for function outlining: making sure there is only<br>
+    // one incoming edge from the extracted/outlined region to<br>
+    // the return block.<br>
+    void NormalizeReturnBlock();<br>
+<br>
+    // Do function outlining:<br>
+    Function *DoFunctionOutlining();<br></blockquote><div><br></div><div>doFunctionOutlining in LLVM naming convention.</div><div><br></div><div>-- Sean Silva</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+    Function *OrigFunc = nullptr;<br>
+    Function *ClonedFunc = nullptr;<br>
+    Function *OutlinedFunc = nullptr;<br>
+    BasicBlock *OutliningCallBB = nullptr;<br>
+    // ClonedFunc is inlined in one of its callers after function<br>
+    // outlining.<br>
+    bool IsFunctionInlined = false;<br>
+    // The cost of the region to be outlined.<br>
+    int OutlinedRegionCost = 0;<br>
+    std::unique_ptr<<wbr>FunctionOutliningInfo> ClonedOI = nullptr;<br>
+    std::unique_ptr<<wbr>BlockFrequencyInfo> ClonedFuncBFI = nullptr;<br>
+  };<br>
+<br>
 private:<br>
   int NumPartialInlining = 0;<br>
   std::function<AssumptionCache &(Function &)> *GetAssumptionCache;<br>
@@ -114,27 +143,18 @@ private:<br>
   // The result is no larger than 1 and is represented using BP.<br>
   // (Note that the outlined region's 'head' block can only have incoming<br>
   // edges from the guarding entry blocks).<br>
-  BranchProbability getOutliningCallBBRelativeFreq<wbr>(Function *F,<br>
-                                                   FunctionOutliningInfo *OI,<br>
-                                                   Function *DuplicateFunction,<br>
-                                                   BlockFrequencyInfo *BFI,<br>
-                                                   BasicBlock *OutliningCallBB);<br>
+  BranchProbability getOutliningCallBBRelativeFreq<wbr>(FunctionCloner &Cloner);<br>
<br>
   // Return true if the callee of CS should be partially inlined with<br>
   // profit.<br>
-  bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,<br>
-                           BlockFrequencyInfo *CalleeBFI,<br>
-                           BasicBlock *OutliningCallBB,<br>
-                           int OutliningCallOverhead,<br>
+  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,<br>
+                           BlockFrequency WeightedOutliningRcost,<br>
                            OptimizationRemarkEmitter &ORE);<br>
<br>
   // Try to inline DuplicateFunction (cloned from F with call to<br>
   // the OutlinedFunction into its callers. Return true<br>
   // if there is any successful inlining.<br>
-  bool tryPartialInline(Function *DuplicateFunction,<br>
-                        Function *F, /*orignal function */<br>
-                        FunctionOutliningInfo *OI, Function *OutlinedFunction,<br>
-                        BlockFrequencyInfo *CalleeBFI);<br>
+  bool tryPartialInline(<wbr>FunctionCloner &Cloner);<br>
<br>
   // Compute the mapping from use site of DuplicationFunction to the enclosing<br>
   // BB's profile count.<br>
@@ -146,7 +166,7 @@ private:<br>
             NumPartialInlining >= MaxNumPartialInlining);<br>
   }<br>
<br>
-  CallSite getCallSite(User *U) {<br>
+  static CallSite getCallSite(User *U) {<br>
     CallSite CS;<br>
     if (CallInst *CI = dyn_cast<CallInst>(U))<br>
       CS = CallSite(CI);<br>
@@ -157,7 +177,7 @@ private:<br>
     return CS;<br>
   }<br>
<br>
-  CallSite getOneCallSiteTo(Function *F) {<br>
+  static CallSite getOneCallSiteTo(Function *F) {<br>
     User *User = *F->user_begin();<br>
     return getCallSite(User);<br>
   }<br>
@@ -171,20 +191,15 @@ private:<br>
<br>
   // Returns the costs associated with function outlining:<br>
   // - The first value is the non-weighted runtime cost for making the call<br>
-  //   to the outlined function 'OutlinedFunction', including the addtional<br>
-  //   setup cost in the outlined function itself;<br>
+  //   to the outlined function, including the addtional  setup cost in the<br>
+  //    outlined function itself;<br>
   // - The second value is the estimated size of the new call sequence in<br>
-  //   basic block 'OutliningCallBB';<br>
-  // - The third value is the estimated size of the original code from<br>
-  //   function 'F' that is extracted into the outlined function.<br>
-  std::tuple<int, int, int><br>
-  computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,<br>
-                        Function *OutlinedFunction,<br>
-                        BasicBlock *OutliningCallBB);<br>
+  //   basic block Cloner.OutliningCallBB;<br>
+  std::tuple<int, int> computeOutliningCosts(<wbr>FunctionCloner &Cloner);<br>
   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to<br>
   // approximate both the size and runtime cost (Note that in the current<br>
   // inline cost analysis, there is no clear distinction there either).<br>
-  int computeBBInlineCost(BasicBlock *BB);<br>
+  static int computeBBInlineCost(BasicBlock *BB);<br>
<br>
   std::unique_ptr<<wbr>FunctionOutliningInfo> computeOutliningInfo(Function *F);<br>
<br>
@@ -396,19 +411,19 @@ static bool hasProfileData(Function *F,<br>
   return false;<br>
 }<br>
<br>
-BranchProbability PartialInlinerImpl::<wbr>getOutliningCallBBRelativeFreq<wbr>(<br>
-    Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,<br>
-    BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {<br>
+BranchProbability<br>
+PartialInlinerImpl::<wbr>getOutliningCallBBRelativeFreq<wbr>(FunctionCloner &Cloner) {<br>
<br>
   auto EntryFreq =<br>
-      BFI->getBlockFreq(&<wbr>DuplicateFunction-><wbr>getEntryBlock());<br>
-  auto OutliningCallFreq = BFI->getBlockFreq(<wbr>OutliningCallBB);<br>
+      Cloner.ClonedFuncBFI-><wbr>getBlockFreq(&Cloner.<wbr>ClonedFunc->getEntryBlock());<br>
+  auto OutliningCallFreq =<br>
+      Cloner.ClonedFuncBFI-><wbr>getBlockFreq(Cloner.<wbr>OutliningCallBB);<br>
<br>
   auto OutlineRegionRelFreq =<br>
       BranchProbability::<wbr>getBranchProbability(<wbr>OutliningCallFreq.<wbr>getFrequency(),<br>
                                               EntryFreq.getFrequency());<br>
<br>
-  if (hasProfileData(F, OI))<br>
+  if (hasProfileData(Cloner.<wbr>OrigFunc, Cloner.ClonedOI.get()))<br>
     return OutlineRegionRelFreq;<br>
<br>
   // When profile data is not available, we need to be conservative in<br>
@@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::ge<br>
 }<br>
<br>
 bool PartialInlinerImpl::<wbr>shouldPartialInline(<br>
-    CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,<br>
-    BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,<br>
-    int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {<br>
+    CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,<br>
+    OptimizationRemarkEmitter &ORE) {<br>
+<br>
   using namespace ore;<br>
   if (SkipCostAnalysis)<br>
     return true;<br>
<br>
   Instruction *Call = CS.getInstruction();<br>
   Function *Callee = CS.getCalledFunction();<br>
+  assert(Callee == Cloner.ClonedFunc);<br>
+<br>
   Function *Caller = CS.getCaller();<br>
   auto &CalleeTTI = (*GetTTI)(*Callee);<br>
   InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,<br>
@@ -449,14 +466,14 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
<br>
   if (IC.isAlways()) {<br>
     ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "AlwaysInline", Call)<br>
-             << NV("Callee", F)<br>
+             << NV("Callee", Cloner.OrigFunc)<br>
              << " should always be fully inlined, not partially");<br>
     return false;<br>
   }<br>
<br>
   if (IC.isNever()) {<br>
     ORE.emit(<wbr>OptimizationRemarkMissed(<wbr>DEBUG_TYPE, "NeverInline", Call)<br>
-             << NV("Callee", F) << " not partially inlined into "<br>
+             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
              << NV("Caller", Caller)<br>
              << " because it should never be inlined (cost=never)");<br>
     return false;<br>
@@ -464,29 +481,25 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
<br>
   if (!IC) {<br>
     ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "TooCostly", Call)<br>
-             << NV("Callee", F) << " not partially inlined into "<br>
+             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
              << NV("Caller", Caller) << " because too costly to inline (cost="<br>
              << NV("Cost", IC.getCost()) << ", threshold="<br>
              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");<br>
     return false;<br>
   }<br>
   const DataLayout &DL = Caller->getParent()-><wbr>getDataLayout();<br>
+<br>
   // The savings of eliminating the call:<br>
   int NonWeightedSavings = getCallsiteCost(CS, DL);<br>
   BlockFrequency NormWeightedSavings(<wbr>NonWeightedSavings);<br>
<br>
-  auto RelativeFreq =<br>
-      getOutliningCallBBRelativeFreq<wbr>(F, OI, Callee, CalleeBFI, OutliningCallBB);<br>
-  auto NormWeightedRcost =<br>
-      BlockFrequency(<wbr>NonWeightedOutliningRcost) * RelativeFreq;<br>
-<br>
   // Weighted saving is smaller than weighted cost, return false<br>
-  if (NormWeightedSavings < NormWeightedRcost) {<br>
+  if (NormWeightedSavings < WeightedOutliningRcost) {<br>
     ORE.emit(<br>
         OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "OutliningCallcostTooHigh", Call)<br>
-        << NV("Callee", F) << " not partially inlined into "<br>
+        << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "<br>
         << NV("Caller", Caller) << " runtime overhead (overhead="<br>
-        << NV("Overhead", (unsigned)NormWeightedRcost.<wbr>getFrequency())<br>
+        << NV("Overhead", (unsigned)<wbr>WeightedOutliningRcost.<wbr>getFrequency())<br>
         << ", savings="<br>
         << NV("Savings", (unsigned)NormWeightedSavings.<wbr>getFrequency()) << ")"<br>
         << " of making the outlined call is too high");<br>
@@ -495,7 +508,7 @@ bool PartialInlinerImpl::<wbr>shouldPartialIn<br>
   }<br>
<br>
   ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "CanBePartiallyInlined", Call)<br>
-           << NV("Callee", F) << " can be partially inlined into "<br>
+           << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "<br>
            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())<br>
            << " (threshold="<br>
            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");<br>
@@ -551,50 +564,32 @@ int PartialInlinerImpl::<wbr>computeBBInlineC<br>
   return InlineCost;<br>
 }<br>
<br>
-std::tuple<int, int, int> PartialInlinerImpl::<wbr>computeOutliningCosts(<br>
-    Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,<br>
-    BasicBlock *OutliningCallBB) {<br>
-  // First compute the cost of the outlined region 'OI' in the original<br>
-  // function 'F'.<br>
-  // FIXME: The code extractor (outliner) can now do code sinking/hoisting<br>
-  // to reduce outlining cost. The hoisted/sunk code currently do not<br>
-  // incur any runtime cost so it is still OK to compare the outlined<br>
-  // function cost with the outlined region in the original function.<br>
-  // If this ever changes, we will need to introduce new extractor api<br>
-  // to pass the information.<br>
-  int OutlinedRegionCost = 0;<br>
-  for (BasicBlock &BB : *F) {<br>
-    if (&BB != OI->ReturnBlock &&<br>
-        // Assuming Entry set is small -- do a linear search here:<br>
-        std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==<br>
-            OI->Entries.end()) {<br>
-      OutlinedRegionCost += computeBBInlineCost(&BB);<br>
-    }<br>
-  }<br>
+std::tuple<int, int><br>
+PartialInlinerImpl::<wbr>computeOutliningCosts(<wbr>FunctionCloner &Cloner) {<br>
<br>
   // Now compute the cost of the call sequence to the outlined function<br>
   // 'OutlinedFunction' in BB 'OutliningCallBB':<br>
-  int OutliningFuncCallCost = computeBBInlineCost(<wbr>OutliningCallBB);<br>
+  int OutliningFuncCallCost = computeBBInlineCost(Cloner.<wbr>OutliningCallBB);<br>
<br>
   // Now compute the cost of the extracted/outlined function itself:<br>
   int OutlinedFunctionCost = 0;<br>
-  for (BasicBlock &BB : *OutlinedFunction) {<br>
+  for (BasicBlock &BB : *Cloner.OutlinedFunc) {<br>
     OutlinedFunctionCost += computeBBInlineCost(&BB);<br>
   }<br>
<br>
-  assert(OutlinedFunctionCost >= OutlinedRegionCost &&<br>
+  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&<br>
          "Outlined function cost should be no less than the outlined region");<br>
   // The code extractor introduces a new root and exit stub blocks with<br>
   // additional unconditional branches. Those branches will be eliminated<br>
   // later with bb layout. The cost should be adjusted accordingly:<br>
   OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;<br>
<br>
-  int OutliningRuntimeOverhead = OutliningFuncCallCost +<br>
-                                 (OutlinedFunctionCost - OutlinedRegionCost) +<br>
-                                 ExtraOutliningPenalty;<br>
+  int OutliningRuntimeOverhead =<br>
+      OutliningFuncCallCost +<br>
+      (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +<br>
+      ExtraOutliningPenalty;<br>
<br>
-  return std::make_tuple(<wbr>OutliningFuncCallCost, OutliningRuntimeOverhead,<br>
-                         OutlinedRegionCost);<br>
+  return std::make_tuple(<wbr>OutliningFuncCallCost, OutliningRuntimeOverhead);<br>
 }<br>
<br>
 // Create the callsite to profile count map which is<br>
@@ -641,42 +636,30 @@ void PartialInlinerImpl::<wbr>computeCallsite<br>
   }<br>
 }<br>
<br>
-Function *PartialInlinerImpl::<wbr>unswitchFunction(Function *F) {<br>
-<br>
-  if (F->hasAddressTaken())<br>
-    return nullptr;<br>
-<br>
-  // Let inliner handle it<br>
-  if (F->hasFnAttribute(Attribute::<wbr>AlwaysInline))<br>
-    return nullptr;<br>
-<br>
-  if (F->hasFnAttribute(Attribute::<wbr>NoInline))<br>
-    return nullptr;<br>
-<br>
-  if (PSI->isFunctionEntryCold(F))<br>
-    return nullptr;<br>
-<br>
-  if (F->user_begin() == F->user_end())<br>
-    return nullptr;<br>
-<br>
-  std::unique_ptr<<wbr>FunctionOutliningInfo> OI = computeOutliningInfo(F);<br>
-<br>
-  if (!OI)<br>
-    return nullptr;<br>
+PartialInlinerImpl::<wbr>FunctionCloner::<wbr>FunctionCloner(Function *F,<br>
+                                                   FunctionOutliningInfo *OI)<br>
+    : OrigFunc(F) {<br>
+  ClonedOI = llvm::make_unique<<wbr>FunctionOutliningInfo>();<br>
<br>
   // Clone the function, so that we can hack away on it.<br>
   ValueToValueMapTy VMap;<br>
-  Function *DuplicateFunction = CloneFunction(F, VMap);<br>
-  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>ReturnBlock]);<br>
-  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>NonReturnBlock]);<br>
-  DenseSet<BasicBlock *> NewEntries;<br>
+  ClonedFunc = CloneFunction(F, VMap);<br>
+<br>
+  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>ReturnBlock]);<br>
+  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI-><wbr>NonReturnBlock]);<br>
   for (BasicBlock *BB : OI->Entries) {<br>
-    NewEntries.insert(cast<<wbr>BasicBlock>(VMap[BB]));<br>
+    ClonedOI->Entries.push_back(<wbr>cast<BasicBlock>(VMap[BB]));<br>
+  }<br>
+  for (BasicBlock *E : OI->ReturnBlockPreds) {<br>
+    BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
+    ClonedOI->ReturnBlockPreds.<wbr>push_back(NewE);<br>
   }<br>
-<br>
   // Go ahead and update all uses to the duplicate, so that we can just<br>
   // use the inliner functionality when we're done hacking.<br>
-  F->replaceAllUsesWith(<wbr>DuplicateFunction);<br>
+  F->replaceAllUsesWith(<wbr>ClonedFunc);<br>
+}<br>
+<br>
+void PartialInlinerImpl::<wbr>FunctionCloner::<wbr>NormalizeReturnBlock() {<br>
<br>
   auto getFirstPHI = [](BasicBlock *BB) {<br>
     BasicBlock::iterator I = BB->begin();<br>
@@ -692,14 +675,19 @@ Function *PartialInlinerImpl::<wbr>unswitchFu<br>
     }<br>
     return FirstPhi;<br>
   };<br>
+<br>
   // Special hackery is needed with PHI nodes that have inputs from more than<br>
   // one extracted block.  For simplicity, just split the PHIs into a two-level<br>
   // sequence of PHIs, some of which will go in the extracted region, and some<br>
   // of which will go outside.<br>
-  BasicBlock *PreReturn = NewReturnBlock;<br>
+  BasicBlock *PreReturn = ClonedOI->ReturnBlock;<br>
   // only split block when necessary:<br>
   PHINode *FirstPhi = getFirstPHI(PreReturn);<br>
-  unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();<br>
+  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.<wbr>size();<br>
+<br>
+  if (!FirstPhi || FirstPhi-><wbr>getNumIncomingValues() <= NumPredsFromEntries + 1)<br>
+    return;<br>
+<br>
   auto IsTrivialPhi = [](PHINode *PN) -> Value * {<br>
     Value *CommonValue = PN->getIncomingValue(0);<br>
     if (all_of(PN->incoming_values(),<br>
@@ -708,143 +696,185 @@ Function *PartialInlinerImpl::<wbr>unswitchFu<br>
     return nullptr;<br>
   };<br>
<br>
-  if (FirstPhi && FirstPhi-><wbr>getNumIncomingValues() > NumPredsFromEntries + 1) {<br>
-<br>
-    NewReturnBlock = NewReturnBlock-><wbr>splitBasicBlock(<br>
-        NewReturnBlock-><wbr>getFirstNonPHI()->getIterator(<wbr>));<br>
-    BasicBlock::iterator I = PreReturn->begin();<br>
-    Instruction *Ins = &NewReturnBlock->front();<br>
-    SmallVector<Instruction *, 4> DeadPhis;<br>
-    while (I != PreReturn->end()) {<br>
-      PHINode *OldPhi = dyn_cast<PHINode>(I);<br>
-      if (!OldPhi)<br>
-        break;<br>
-<br>
-      PHINode *RetPhi =<br>
-          PHINode::Create(OldPhi-><wbr>getType(), NumPredsFromEntries + 1, "", Ins);<br>
-      OldPhi->replaceAllUsesWith(<wbr>RetPhi);<br>
-      Ins = NewReturnBlock-><wbr>getFirstNonPHI();<br>
-<br>
-      RetPhi->addIncoming(&*I, PreReturn);<br>
-      for (BasicBlock *E : OI->ReturnBlockPreds) {<br>
-        BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
-        RetPhi->addIncoming(OldPhi-><wbr>getIncomingValueForBlock(NewE)<wbr>, NewE);<br>
-        OldPhi->removeIncomingValue(<wbr>NewE);<br>
-      }<br>
-<br>
-      // After incoming values splitting, the old phi may become trivial.<br>
-      // Keeping the trivial phi can introduce definition inside the outline<br>
-      // region which is live-out, causing necessary overhead (load, store<br>
-      // arg passing etc).<br>
-      if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {<br>
-        OldPhi->replaceAllUsesWith(<wbr>OldPhiVal);<br>
-        DeadPhis.push_back(OldPhi);<br>
-      }<br>
+  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock-><wbr>splitBasicBlock(<br>
+      ClonedOI->ReturnBlock-><wbr>getFirstNonPHI()->getIterator(<wbr>));<br>
+  BasicBlock::iterator I = PreReturn->begin();<br>
+  Instruction *Ins = &ClonedOI->ReturnBlock->front(<wbr>);<br>
+  SmallVector<Instruction *, 4> DeadPhis;<br>
+  while (I != PreReturn->end()) {<br>
+    PHINode *OldPhi = dyn_cast<PHINode>(I);<br>
+    if (!OldPhi)<br>
+      break;<br>
<br>
-      ++I;<br>
+    PHINode *RetPhi =<br>
+        PHINode::Create(OldPhi-><wbr>getType(), NumPredsFromEntries + 1, "", Ins);<br>
+    OldPhi->replaceAllUsesWith(<wbr>RetPhi);<br>
+    Ins = ClonedOI->ReturnBlock-><wbr>getFirstNonPHI();<br>
+<br>
+    RetPhi->addIncoming(&*I, PreReturn);<br>
+    for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {<br>
+      RetPhi->addIncoming(OldPhi-><wbr>getIncomingValueForBlock(E), E);<br>
+      OldPhi->removeIncomingValue(E)<wbr>;<br>
+    }<br>
+<br>
+    // After incoming values splitting, the old phi may become trivial.<br>
+    // Keeping the trivial phi can introduce definition inside the outline<br>
+    // region which is live-out, causing necessary overhead (load, store<br>
+    // arg passing etc).<br>
+    if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {<br>
+      OldPhi->replaceAllUsesWith(<wbr>OldPhiVal);<br>
+      DeadPhis.push_back(OldPhi);<br>
+    }<br>
+    ++I;<br>
     }<br>
-<br>
     for (auto *DP : DeadPhis)<br>
       DP->eraseFromParent();<br>
<br>
-    for (auto E : OI->ReturnBlockPreds) {<br>
-      BasicBlock *NewE = cast<BasicBlock>(VMap[E]);<br>
-      NewE->getTerminator()-><wbr>replaceUsesOfWith(PreReturn, NewReturnBlock);<br>
+    for (auto E : ClonedOI->ReturnBlockPreds) {<br>
+      E->getTerminator()-><wbr>replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);<br>
     }<br>
-  }<br>
+}<br>
<br>
+Function *PartialInlinerImpl::<wbr>FunctionCloner::<wbr>DoFunctionOutlining() {<br>
   // Returns true if the block is to be partial inlined into the caller<br>
   // (i.e. not to be extracted to the out of line function)<br>
-  auto ToBeInlined = [&](BasicBlock *BB) {<br>
-    return BB == NewReturnBlock || NewEntries.count(BB);<br>
+  auto ToBeInlined = [&, this](BasicBlock *BB) {<br>
+    return BB == ClonedOI->ReturnBlock ||<br>
+           (std::find(ClonedOI->Entries.<wbr>begin(), ClonedOI->Entries.end(), BB) !=<br>
+            ClonedOI->Entries.end());<br>
   };<br>
+<br>
   // Gather up the blocks that we're going to extract.<br>
   std::vector<BasicBlock *> ToExtract;<br>
-  ToExtract.push_back(<wbr>NewNonReturnBlock);<br>
-  for (BasicBlock &BB : *DuplicateFunction)<br>
-    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)<br>
+  ToExtract.push_back(ClonedOI-><wbr>NonReturnBlock);<br>
+  OutlinedRegionCost +=<br>
+      PartialInlinerImpl::<wbr>computeBBInlineCost(ClonedOI-><wbr>NonReturnBlock);<br>
+  for (BasicBlock &BB : *ClonedFunc)<br>
+    if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {<br>
       ToExtract.push_back(&BB);<br>
+      // FIXME: the code extractor may hoist/sink more code<br>
+      // into the outlined function which may make the outlining<br>
+      // overhead (the difference of the outlined function cost<br>
+      // and OutliningRegionCost) look larger.<br>
+      OutlinedRegionCost += computeBBInlineCost(&BB);<br>
+    }<br>
<br>
   // The CodeExtractor needs a dominator tree.<br>
   DominatorTree DT;<br>
-  DT.recalculate(*<wbr>DuplicateFunction);<br>
+  DT.recalculate(*ClonedFunc);<br>
<br>
   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.<br>
   LoopInfo LI(DT);<br>
-  BranchProbabilityInfo BPI(*DuplicateFunction, LI);<br>
-  BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);<br>
+  BranchProbabilityInfo BPI(*ClonedFunc, LI);<br>
+  ClonedFuncBFI.reset(new BlockFrequencyInfo(*<wbr>ClonedFunc, BPI, LI));<br>
<br>
   // Extract the body of the if.<br>
-  Function *OutlinedFunction =<br>
-      CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)<br>
-          .extractCodeRegion();<br>
+  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,<br>
+                               ClonedFuncBFI.get(), &BPI)<br>
+                     .extractCodeRegion();<br>
+<br>
+  if (OutlinedFunc) {<br>
+    OutliningCallBB = PartialInlinerImpl::<wbr>getOneCallSiteTo(OutlinedFunc)<br>
+        .getInstruction()<br>
+        ->getParent();<br>
+    assert(OutliningCallBB-><wbr>getParent() == ClonedFunc);<br>
+  }<br>
<br>
-  bool AnyInline =<br>
-      tryPartialInline(<wbr>DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);<br>
+  return OutlinedFunc;<br>
+}<br>
<br>
+PartialInlinerImpl::<wbr>FunctionCloner::~<wbr>FunctionCloner() {<br>
   // Ditch the duplicate, since we're done with it, and rewrite all remaining<br>
   // users (function pointers, etc.) back to the original function.<br>
-  DuplicateFunction-><wbr>replaceAllUsesWith(F);<br>
-  DuplicateFunction-><wbr>eraseFromParent();<br>
+  ClonedFunc-><wbr>replaceAllUsesWith(OrigFunc);<br>
+  ClonedFunc->eraseFromParent();<br>
+  if (!IsFunctionInlined) {<br>
+    // Remove the function that is speculatively created if there is no<br>
+    // reference.<br>
+    if (OutlinedFunc)<br>
+      OutlinedFunc->eraseFromParent(<wbr>);<br>
+  }<br>
+}<br>
+<br>
+Function *PartialInlinerImpl::<wbr>unswitchFunction(Function *F) {<br>
+<br>
+  if (F->hasAddressTaken())<br>
+    return nullptr;<br>
+<br>
+  // Let inliner handle it<br>
+  if (F->hasFnAttribute(Attribute::<wbr>AlwaysInline))<br>
+    return nullptr;<br>
+<br>
+  if (F->hasFnAttribute(Attribute::<wbr>NoInline))<br>
+    return nullptr;<br>
+<br>
+  if (PSI->isFunctionEntryCold(F))<br>
+    return nullptr;<br>
+<br>
+  if (F->user_begin() == F->user_end())<br>
+    return nullptr;<br>
+<br>
+  std::unique_ptr<<wbr>FunctionOutliningInfo> OI = computeOutliningInfo(F);<br>
+<br>
+  if (!OI)<br>
+    return nullptr;<br>
+<br>
+  FunctionCloner Cloner(F, OI.get());<br>
+  Cloner.NormalizeReturnBlock();<br>
+  Function *OutlinedFunction = Cloner.DoFunctionOutlining();<br>
+<br>
+  bool AnyInline = tryPartialInline(Cloner);<br>
<br>
   if (AnyInline)<br>
     return OutlinedFunction;<br>
<br>
-  // Remove the function that is speculatively created:<br>
-  if (OutlinedFunction)<br>
-    OutlinedFunction-><wbr>eraseFromParent();<br>
-<br>
   return nullptr;<br>
 }<br>
<br>
-bool PartialInlinerImpl::<wbr>tryPartialInline(Function *DuplicateFunction,<br>
-                                          Function *F,<br>
-                                          FunctionOutliningInfo *OI,<br>
-                                          Function *OutlinedFunction,<br>
-                                          BlockFrequencyInfo *CalleeBFI) {<br>
-  if (OutlinedFunction == nullptr)<br>
-    return false;<br>
-<br>
+bool PartialInlinerImpl::<wbr>tryPartialInline(<wbr>FunctionCloner &Cloner) {<br>
   int NonWeightedRcost;<br>
   int SizeCost;<br>
-  int OutlinedRegionSizeCost;<br>
<br>
-  auto OutliningCallBB =<br>
-      getOneCallSiteTo(<wbr>OutlinedFunction).<wbr>getInstruction()->getParent();<br>
+  if (Cloner.OutlinedFunc == nullptr)<br>
+    return false;<br>
+<br>
+  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);<br>
<br>
-  std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =<br>
-      computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);<br>
+  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq<wbr>(Cloner);<br>
+  auto WeightedRcost = BlockFrequency(<wbr>NonWeightedRcost) * RelativeToEntryFreq;<br>
<br>
   // The call sequence to the outlined function is larger than the original<br>
   // outlined region size, it does not increase the chances of inlining<br>
-  // 'F' with outlining (The inliner usies the size increase to model the<br>
-  // the cost of inlining a callee).<br>
-  if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {<br>
-    OptimizationRemarkEmitter ORE(F);<br>
+  // the function with outlining (The inliner usies the size increase to<br>
+  // model the cost of inlining a callee).<br>
+  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {<br>
+    OptimizationRemarkEmitter ORE(Cloner.OrigFunc);<br>
     DebugLoc DLoc;<br>
     BasicBlock *Block;<br>
-    std::tie(DLoc, Block) = getOneDebugLoc(<wbr>DuplicateFunction);<br>
+    std::tie(DLoc, Block) = getOneDebugLoc(Cloner.<wbr>ClonedFunc);<br>
     ORE.emit(<wbr>OptimizationRemarkAnalysis(<wbr>DEBUG_TYPE, "OutlineRegionTooSmall",<br>
                                         DLoc, Block)<br>
-             << ore::NV("Function", F)<br>
+             << ore::NV("Function", Cloner.OrigFunc)<br>
              << " not partially inlined into callers (Original Size = "<br>
-             << ore::NV("<wbr>OutlinedRegionOriginalSize", OutlinedRegionSizeCost)<br>
+             << ore::NV("<wbr>OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)<br>
              << ", Size of call sequence to outlined function = "<br>
              << ore::NV("NewSize", SizeCost) << ")");<br>
     return false;<br>
   }<br>
<br>
-  assert(F->user_begin() == F->user_end() &&<br>
+  assert(Cloner.OrigFunc->user_<wbr>begin() == Cloner.OrigFunc->user_end() &&<br>
          "F's users should all be replaced!");<br>
-  std::vector<User *> Users(DuplicateFunction->user_<wbr>begin(),<br>
-                            DuplicateFunction->user_end())<wbr>;<br>
+<br>
+  std::vector<User *> Users(Cloner.ClonedFunc->user_<wbr>begin(),<br>
+                            Cloner.ClonedFunc->user_end())<wbr>;<br>
<br>
   DenseMap<User *, uint64_t> CallSiteToProfCountMap;<br>
-  if (F->getEntryCount())<br>
-    computeCallsiteToProfCountMap(<wbr>DuplicateFunction, CallSiteToProfCountMap);<br>
+  if (Cloner.OrigFunc-><wbr>getEntryCount())<br>
+    computeCallsiteToProfCountMap(<wbr>Cloner.ClonedFunc, CallSiteToProfCountMap);<br>
<br>
-  auto CalleeEntryCount = F->getEntryCount();<br>
+  auto CalleeEntryCount = Cloner.OrigFunc-><wbr>getEntryCount();<br>
   uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);<br>
+<br>
   bool AnyInline = false;<br>
   for (User *User : Users) {<br>
     CallSite CS = getCallSite(User);<br>
@@ -854,13 +884,12 @@ bool PartialInlinerImpl::<wbr>tryPartialInlin<br>
<br>
     OptimizationRemarkEmitter ORE(CS.getCaller());<br>
<br>
-    if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,<br>
-                             NonWeightedRcost, ORE))<br>
+    if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))<br>
       continue;<br>
<br>
     ORE.emit(<br>
         OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())<br>
-        << ore::NV("Callee", F) << " partially inlined into "<br>
+        << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "<br>
         << ore::NV("Caller", CS.getCaller()));<br>
<br>
     InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);<br>
@@ -878,8 +907,11 @@ bool PartialInlinerImpl::<wbr>tryPartialInlin<br>
     NumPartialInlined++;<br>
   }<br>
<br>
-  if (AnyInline && CalleeEntryCount)<br>
-    F->setEntryCount(<wbr>CalleeEntryCountV);<br>
+  if (AnyInline) {<br>
+    Cloner.IsFunctionInlined = true;<br>
+    if (CalleeEntryCount)<br>
+      Cloner.OrigFunc-><wbr>setEntryCount(<wbr>CalleeEntryCountV);<br>
+  }<br>
<br>
   return AnyInline;<br>
 }<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>