[llvm] r305530 - [PartialInlining] Code Refactoring

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 15 16:56:59 PDT 2017


Author: davidxl
Date: Thu Jun 15 18:56:59 2017
New Revision: 305530

URL: http://llvm.org/viewvc/llvm-project?rev=305530&view=rev
Log:
[PartialInlining] Code Refactoring

This is a NFC code refactoring and interface cleanup. This paves the
way to enable outlining-only mode for the partial inliner.



Modified:
    llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp

Modified: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=305530&r1=305529&r2=305530&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp Thu Jun 15 18:56:59 2017
@@ -103,6 +103,35 @@ struct PartialInlinerImpl {
   bool run(Module &M);
   Function *unswitchFunction(Function *F);
 
+  // This class speculatively clones the the function to be partial inlined.
+  // At the end of partial inlining, the remaining callsites to the cloned
+  // function that are not partially inlined will be fixed up to reference
+  // the original function, and the cloned function will be erased.
+  struct FunctionCloner {
+    FunctionCloner(Function *F, FunctionOutliningInfo *OI);
+    ~FunctionCloner();
+
+    // Prepare for function outlining: making sure there is only
+    // one incoming edge from the extracted/outlined region to
+    // the return block.
+    void NormalizeReturnBlock();
+
+    // Do function outlining:
+    Function *DoFunctionOutlining();
+
+    Function *OrigFunc = nullptr;
+    Function *ClonedFunc = nullptr;
+    Function *OutlinedFunc = nullptr;
+    BasicBlock *OutliningCallBB = nullptr;
+    // ClonedFunc is inlined in one of its callers after function
+    // outlining.
+    bool IsFunctionInlined = false;
+    // The cost of the region to be outlined.
+    int OutlinedRegionCost = 0;
+    std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
+    std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
+  };
+
 private:
   int NumPartialInlining = 0;
   std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
@@ -114,27 +143,18 @@ private:
   // The result is no larger than 1 and is represented using BP.
   // (Note that the outlined region's 'head' block can only have incoming
   // edges from the guarding entry blocks).
-  BranchProbability getOutliningCallBBRelativeFreq(Function *F,
-                                                   FunctionOutliningInfo *OI,
-                                                   Function *DuplicateFunction,
-                                                   BlockFrequencyInfo *BFI,
-                                                   BasicBlock *OutliningCallBB);
+  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
 
   // Return true if the callee of CS should be partially inlined with
   // profit.
-  bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,
-                           BlockFrequencyInfo *CalleeBFI,
-                           BasicBlock *OutliningCallBB,
-                           int OutliningCallOverhead,
+  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
+                           BlockFrequency WeightedOutliningRcost,
                            OptimizationRemarkEmitter &ORE);
 
   // Try to inline DuplicateFunction (cloned from F with call to
   // the OutlinedFunction into its callers. Return true
   // if there is any successful inlining.
-  bool tryPartialInline(Function *DuplicateFunction,
-                        Function *F, /*orignal function */
-                        FunctionOutliningInfo *OI, Function *OutlinedFunction,
-                        BlockFrequencyInfo *CalleeBFI);
+  bool tryPartialInline(FunctionCloner &Cloner);
 
   // Compute the mapping from use site of DuplicationFunction to the enclosing
   // BB's profile count.
@@ -146,7 +166,7 @@ private:
             NumPartialInlining >= MaxNumPartialInlining);
   }
 
-  CallSite getCallSite(User *U) {
+  static CallSite getCallSite(User *U) {
     CallSite CS;
     if (CallInst *CI = dyn_cast<CallInst>(U))
       CS = CallSite(CI);
@@ -157,7 +177,7 @@ private:
     return CS;
   }
 
-  CallSite getOneCallSiteTo(Function *F) {
+  static CallSite getOneCallSiteTo(Function *F) {
     User *User = *F->user_begin();
     return getCallSite(User);
   }
@@ -171,20 +191,15 @@ private:
 
   // Returns the costs associated with function outlining:
   // - The first value is the non-weighted runtime cost for making the call
-  //   to the outlined function 'OutlinedFunction', including the addtional
-  //   setup cost in the outlined function itself;
+  //   to the outlined function, including the addtional  setup cost in the
+  //    outlined function itself;
   // - The second value is the estimated size of the new call sequence in
-  //   basic block 'OutliningCallBB';
-  // - The third value is the estimated size of the original code from
-  //   function 'F' that is extracted into the outlined function.
-  std::tuple<int, int, int>
-  computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,
-                        Function *OutlinedFunction,
-                        BasicBlock *OutliningCallBB);
+  //   basic block Cloner.OutliningCallBB;
+  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
   // approximate both the size and runtime cost (Note that in the current
   // inline cost analysis, there is no clear distinction there either).
-  int computeBBInlineCost(BasicBlock *BB);
+  static int computeBBInlineCost(BasicBlock *BB);
 
   std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
 
@@ -396,19 +411,19 @@ static bool hasProfileData(Function *F,
   return false;
 }
 
-BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
-    Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
-    BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
+BranchProbability
+PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
 
   auto EntryFreq =
-      BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
-  auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
+      Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
+  auto OutliningCallFreq =
+      Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
 
   auto OutlineRegionRelFreq =
       BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
                                               EntryFreq.getFrequency());
 
-  if (hasProfileData(F, OI))
+  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
     return OutlineRegionRelFreq;
 
   // When profile data is not available, we need to be conservative in
@@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::ge
 }
 
 bool PartialInlinerImpl::shouldPartialInline(
-    CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,
-    BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
-    int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
+    CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
+    OptimizationRemarkEmitter &ORE) {
+
   using namespace ore;
   if (SkipCostAnalysis)
     return true;
 
   Instruction *Call = CS.getInstruction();
   Function *Callee = CS.getCalledFunction();
+  assert(Callee == Cloner.ClonedFunc);
+
   Function *Caller = CS.getCaller();
   auto &CalleeTTI = (*GetTTI)(*Callee);
   InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
@@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialIn
 
   if (IC.isAlways()) {
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
-             << NV("Callee", F)
+             << NV("Callee", Cloner.OrigFunc)
              << " should always be fully inlined, not partially");
     return false;
   }
 
   if (IC.isNever()) {
     ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
-             << NV("Callee", F) << " not partially inlined into "
+             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
              << NV("Caller", Caller)
              << " because it should never be inlined (cost=never)");
     return false;
@@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialIn
 
   if (!IC) {
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
-             << NV("Callee", F) << " not partially inlined into "
+             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
              << NV("Caller", Caller) << " because too costly to inline (cost="
              << NV("Cost", IC.getCost()) << ", threshold="
              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
     return false;
   }
   const DataLayout &DL = Caller->getParent()->getDataLayout();
+
   // The savings of eliminating the call:
   int NonWeightedSavings = getCallsiteCost(CS, DL);
   BlockFrequency NormWeightedSavings(NonWeightedSavings);
 
-  auto RelativeFreq =
-      getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB);
-  auto NormWeightedRcost =
-      BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
-
   // Weighted saving is smaller than weighted cost, return false
-  if (NormWeightedSavings < NormWeightedRcost) {
+  if (NormWeightedSavings < WeightedOutliningRcost) {
     ORE.emit(
         OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
-        << NV("Callee", F) << " not partially inlined into "
+        << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
         << NV("Caller", Caller) << " runtime overhead (overhead="
-        << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
+        << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
         << ", savings="
         << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
         << " of making the outlined call is too high");
@@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialIn
   }
 
   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
-           << NV("Callee", F) << " can be partially inlined into "
+           << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
            << " (threshold="
            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
@@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineC
   return InlineCost;
 }
 
-std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
-    Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
-    BasicBlock *OutliningCallBB) {
-  // First compute the cost of the outlined region 'OI' in the original
-  // function 'F'.
-  // FIXME: The code extractor (outliner) can now do code sinking/hoisting
-  // to reduce outlining cost. The hoisted/sunk code currently do not
-  // incur any runtime cost so it is still OK to compare the outlined
-  // function cost with the outlined region in the original function.
-  // If this ever changes, we will need to introduce new extractor api
-  // to pass the information.
-  int OutlinedRegionCost = 0;
-  for (BasicBlock &BB : *F) {
-    if (&BB != OI->ReturnBlock &&
-        // Assuming Entry set is small -- do a linear search here:
-        std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
-            OI->Entries.end()) {
-      OutlinedRegionCost += computeBBInlineCost(&BB);
-    }
-  }
+std::tuple<int, int>
+PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
 
   // Now compute the cost of the call sequence to the outlined function
   // 'OutlinedFunction' in BB 'OutliningCallBB':
-  int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
+  int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
 
   // Now compute the cost of the extracted/outlined function itself:
   int OutlinedFunctionCost = 0;
-  for (BasicBlock &BB : *OutlinedFunction) {
+  for (BasicBlock &BB : *Cloner.OutlinedFunc) {
     OutlinedFunctionCost += computeBBInlineCost(&BB);
   }
 
-  assert(OutlinedFunctionCost >= OutlinedRegionCost &&
+  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
          "Outlined function cost should be no less than the outlined region");
   // The code extractor introduces a new root and exit stub blocks with
   // additional unconditional branches. Those branches will be eliminated
   // later with bb layout. The cost should be adjusted accordingly:
   OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
 
-  int OutliningRuntimeOverhead = OutliningFuncCallCost +
-                                 (OutlinedFunctionCost - OutlinedRegionCost) +
-                                 ExtraOutliningPenalty;
+  int OutliningRuntimeOverhead =
+      OutliningFuncCallCost +
+      (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
+      ExtraOutliningPenalty;
 
-  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
-                         OutlinedRegionCost);
+  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
 }
 
 // Create the callsite to profile count map which is
@@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsite
   }
 }
 
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
-
-  if (F->hasAddressTaken())
-    return nullptr;
-
-  // Let inliner handle it
-  if (F->hasFnAttribute(Attribute::AlwaysInline))
-    return nullptr;
-
-  if (F->hasFnAttribute(Attribute::NoInline))
-    return nullptr;
-
-  if (PSI->isFunctionEntryCold(F))
-    return nullptr;
-
-  if (F->user_begin() == F->user_end())
-    return nullptr;
-
-  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
-
-  if (!OI)
-    return nullptr;
+PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
+                                                   FunctionOutliningInfo *OI)
+    : OrigFunc(F) {
+  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
 
   // Clone the function, so that we can hack away on it.
   ValueToValueMapTy VMap;
-  Function *DuplicateFunction = CloneFunction(F, VMap);
-  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
-  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
-  DenseSet<BasicBlock *> NewEntries;
+  ClonedFunc = CloneFunction(F, VMap);
+
+  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
+  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
   for (BasicBlock *BB : OI->Entries) {
-    NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+    ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
+  }
+  for (BasicBlock *E : OI->ReturnBlockPreds) {
+    BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+    ClonedOI->ReturnBlockPreds.push_back(NewE);
   }
-
   // Go ahead and update all uses to the duplicate, so that we can just
   // use the inliner functionality when we're done hacking.
-  F->replaceAllUsesWith(DuplicateFunction);
+  F->replaceAllUsesWith(ClonedFunc);
+}
+
+void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
 
   auto getFirstPHI = [](BasicBlock *BB) {
     BasicBlock::iterator I = BB->begin();
@@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFu
     }
     return FirstPhi;
   };
+
   // Special hackery is needed with PHI nodes that have inputs from more than
   // one extracted block.  For simplicity, just split the PHIs into a two-level
   // sequence of PHIs, some of which will go in the extracted region, and some
   // of which will go outside.
-  BasicBlock *PreReturn = NewReturnBlock;
+  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
   // only split block when necessary:
   PHINode *FirstPhi = getFirstPHI(PreReturn);
-  unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
+  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
+
+  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
+    return;
+
   auto IsTrivialPhi = [](PHINode *PN) -> Value * {
     Value *CommonValue = PN->getIncomingValue(0);
     if (all_of(PN->incoming_values(),
@@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFu
     return nullptr;
   };
 
-  if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
-
-    NewReturnBlock = NewReturnBlock->splitBasicBlock(
-        NewReturnBlock->getFirstNonPHI()->getIterator());
-    BasicBlock::iterator I = PreReturn->begin();
-    Instruction *Ins = &NewReturnBlock->front();
-    SmallVector<Instruction *, 4> DeadPhis;
-    while (I != PreReturn->end()) {
-      PHINode *OldPhi = dyn_cast<PHINode>(I);
-      if (!OldPhi)
-        break;
-
-      PHINode *RetPhi =
-          PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
-      OldPhi->replaceAllUsesWith(RetPhi);
-      Ins = NewReturnBlock->getFirstNonPHI();
-
-      RetPhi->addIncoming(&*I, PreReturn);
-      for (BasicBlock *E : OI->ReturnBlockPreds) {
-        BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
-        RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
-        OldPhi->removeIncomingValue(NewE);
-      }
-
-      // After incoming values splitting, the old phi may become trivial.
-      // Keeping the trivial phi can introduce definition inside the outline
-      // region which is live-out, causing necessary overhead (load, store
-      // arg passing etc).
-      if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
-        OldPhi->replaceAllUsesWith(OldPhiVal);
-        DeadPhis.push_back(OldPhi);
-      }
+  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
+      ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
+  BasicBlock::iterator I = PreReturn->begin();
+  Instruction *Ins = &ClonedOI->ReturnBlock->front();
+  SmallVector<Instruction *, 4> DeadPhis;
+  while (I != PreReturn->end()) {
+    PHINode *OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi)
+      break;
 
-      ++I;
+    PHINode *RetPhi =
+        PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+    OldPhi->replaceAllUsesWith(RetPhi);
+    Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
+
+    RetPhi->addIncoming(&*I, PreReturn);
+    for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
+      RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
+      OldPhi->removeIncomingValue(E);
+    }
+
+    // After incoming values splitting, the old phi may become trivial.
+    // Keeping the trivial phi can introduce definition inside the outline
+    // region which is live-out, causing necessary overhead (load, store
+    // arg passing etc).
+    if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+      OldPhi->replaceAllUsesWith(OldPhiVal);
+      DeadPhis.push_back(OldPhi);
+    }
+    ++I;
     }
-
     for (auto *DP : DeadPhis)
       DP->eraseFromParent();
 
-    for (auto E : OI->ReturnBlockPreds) {
-      BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
-      NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+    for (auto E : ClonedOI->ReturnBlockPreds) {
+      E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
     }
-  }
+}
 
+Function *PartialInlinerImpl::FunctionCloner::DoFunctionOutlining() {
   // Returns true if the block is to be partial inlined into the caller
   // (i.e. not to be extracted to the out of line function)
-  auto ToBeInlined = [&](BasicBlock *BB) {
-    return BB == NewReturnBlock || NewEntries.count(BB);
+  auto ToBeInlined = [&, this](BasicBlock *BB) {
+    return BB == ClonedOI->ReturnBlock ||
+           (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
+            ClonedOI->Entries.end());
   };
+
   // Gather up the blocks that we're going to extract.
   std::vector<BasicBlock *> ToExtract;
-  ToExtract.push_back(NewNonReturnBlock);
-  for (BasicBlock &BB : *DuplicateFunction)
-    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
+  ToExtract.push_back(ClonedOI->NonReturnBlock);
+  OutlinedRegionCost +=
+      PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
+  for (BasicBlock &BB : *ClonedFunc)
+    if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
       ToExtract.push_back(&BB);
+      // FIXME: the code extractor may hoist/sink more code
+      // into the outlined function which may make the outlining
+      // overhead (the difference of the outlined function cost
+      // and OutliningRegionCost) look larger.
+      OutlinedRegionCost += computeBBInlineCost(&BB);
+    }
 
   // The CodeExtractor needs a dominator tree.
   DominatorTree DT;
-  DT.recalculate(*DuplicateFunction);
+  DT.recalculate(*ClonedFunc);
 
   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
   LoopInfo LI(DT);
-  BranchProbabilityInfo BPI(*DuplicateFunction, LI);
-  BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
+  BranchProbabilityInfo BPI(*ClonedFunc, LI);
+  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
 
   // Extract the body of the if.
-  Function *OutlinedFunction =
-      CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
-          .extractCodeRegion();
+  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
+                               ClonedFuncBFI.get(), &BPI)
+                     .extractCodeRegion();
+
+  if (OutlinedFunc) {
+    OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
+        .getInstruction()
+        ->getParent();
+    assert(OutliningCallBB->getParent() == ClonedFunc);
+  }
 
-  bool AnyInline =
-      tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);
+  return OutlinedFunc;
+}
 
+PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
   // Ditch the duplicate, since we're done with it, and rewrite all remaining
   // users (function pointers, etc.) back to the original function.
-  DuplicateFunction->replaceAllUsesWith(F);
-  DuplicateFunction->eraseFromParent();
+  ClonedFunc->replaceAllUsesWith(OrigFunc);
+  ClonedFunc->eraseFromParent();
+  if (!IsFunctionInlined) {
+    // Remove the function that is speculatively created if there is no
+    // reference.
+    if (OutlinedFunc)
+      OutlinedFunc->eraseFromParent();
+  }
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+  if (F->hasAddressTaken())
+    return nullptr;
+
+  // Let inliner handle it
+  if (F->hasFnAttribute(Attribute::AlwaysInline))
+    return nullptr;
+
+  if (F->hasFnAttribute(Attribute::NoInline))
+    return nullptr;
+
+  if (PSI->isFunctionEntryCold(F))
+    return nullptr;
+
+  if (F->user_begin() == F->user_end())
+    return nullptr;
+
+  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
+
+  if (!OI)
+    return nullptr;
+
+  FunctionCloner Cloner(F, OI.get());
+  Cloner.NormalizeReturnBlock();
+  Function *OutlinedFunction = Cloner.DoFunctionOutlining();
+
+  bool AnyInline = tryPartialInline(Cloner);
 
   if (AnyInline)
     return OutlinedFunction;
 
-  // Remove the function that is speculatively created:
-  if (OutlinedFunction)
-    OutlinedFunction->eraseFromParent();
-
   return nullptr;
 }
 
-bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
-                                          Function *F,
-                                          FunctionOutliningInfo *OI,
-                                          Function *OutlinedFunction,
-                                          BlockFrequencyInfo *CalleeBFI) {
-  if (OutlinedFunction == nullptr)
-    return false;
-
+bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
   int NonWeightedRcost;
   int SizeCost;
-  int OutlinedRegionSizeCost;
 
-  auto OutliningCallBB =
-      getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
+  if (Cloner.OutlinedFunc == nullptr)
+    return false;
+
+  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
 
-  std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
-      computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
+  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
+  auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
 
   // The call sequence to the outlined function is larger than the original
   // outlined region size, it does not increase the chances of inlining
-  // 'F' with outlining (The inliner usies the size increase to model the
-  // the cost of inlining a callee).
-  if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
-    OptimizationRemarkEmitter ORE(F);
+  // the function with outlining (The inliner usies the size increase to
+  // model the cost of inlining a callee).
+  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
+    OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
     DebugLoc DLoc;
     BasicBlock *Block;
-    std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
+    std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
                                         DLoc, Block)
-             << ore::NV("Function", F)
+             << ore::NV("Function", Cloner.OrigFunc)
              << " not partially inlined into callers (Original Size = "
-             << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost)
+             << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
              << ", Size of call sequence to outlined function = "
              << ore::NV("NewSize", SizeCost) << ")");
     return false;
   }
 
-  assert(F->user_begin() == F->user_end() &&
+  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
          "F's users should all be replaced!");
-  std::vector<User *> Users(DuplicateFunction->user_begin(),
-                            DuplicateFunction->user_end());
+
+  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
+                            Cloner.ClonedFunc->user_end());
 
   DenseMap<User *, uint64_t> CallSiteToProfCountMap;
-  if (F->getEntryCount())
-    computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap);
+  if (Cloner.OrigFunc->getEntryCount())
+    computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
 
-  auto CalleeEntryCount = F->getEntryCount();
+  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
   uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
+
   bool AnyInline = false;
   for (User *User : Users) {
     CallSite CS = getCallSite(User);
@@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInlin
 
     OptimizationRemarkEmitter ORE(CS.getCaller());
 
-    if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
-                             NonWeightedRcost, ORE))
+    if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
       continue;
 
     ORE.emit(
         OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
-        << ore::NV("Callee", F) << " partially inlined into "
+        << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
         << ore::NV("Caller", CS.getCaller()));
 
     InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
@@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInlin
     NumPartialInlined++;
   }
 
-  if (AnyInline && CalleeEntryCount)
-    F->setEntryCount(CalleeEntryCountV);
+  if (AnyInline) {
+    Cloner.IsFunctionInlined = true;
+    if (CalleeEntryCount)
+      Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
+  }
 
   return AnyInline;
 }




More information about the llvm-commits mailing list