<div dir="ltr">The instruction following the call has been deleted in InlineFunction.cpp:1998 and it's parent is being accessed. Will think of a fix and send a patch.</div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Mar 3, 2016 at 3:03 PM, Daniel Jasper <span dir="ltr"><<a href="mailto:djasper@google.com" target="_blank">djasper@google.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote"><span class="">On Thu, Mar 3, 2016 at 10:26 AM, Easwaran Raman via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br></span><div><div class="h5"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">Author: eraman<br>
Date: Thu Mar  3 12:26:33 2016<br>
New Revision: 262636<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=262636&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=262636&view=rev</a><br>
Log:<br>
Infrastructure for PGO enhancements in inliner<br>
<br>
This patch provides the following infrastructure for PGO enhancements in inliner:<br>
<br>
Enable the use of block level profile information in inliner<br>
Incremental update of block frequency information during inlining<br>
Update the function entry counts of callees when they get inlined into callers.<br>
<br>
Differential Revision: <a href="http://reviews.llvm.org/D16381" rel="noreferrer" target="_blank">http://reviews.llvm.org/D16381</a><br>
<br>
<br>
Added:<br>
    llvm/trunk/test/Transforms/Inline/function-count-update-2.ll<br>
    llvm/trunk/test/Transforms/Inline/function-count-update-3.ll<br>
    llvm/trunk/test/Transforms/Inline/function-count-update.ll<br>
Modified:<br>
    llvm/trunk/include/llvm/Analysis/InlineCost.h<br>
    llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h<br>
    llvm/trunk/include/llvm/Transforms/Utils/Cloning.h<br>
    llvm/trunk/lib/Analysis/InlineCost.cpp<br>
    llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp<br>
    llvm/trunk/lib/Transforms/IPO/Inliner.cpp<br>
    llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp<br>
    llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp<br>
<br>
Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/include/llvm/Analysis/InlineCost.h (original)<br>
+++ llvm/trunk/include/llvm/Analysis/InlineCost.h Thu Mar  3 12:26:33 2016<br>
@@ -20,6 +20,7 @@<br>
<br>
 namespace llvm {<br>
 class AssumptionCacheTracker;<br>
+class BlockFrequencyInfo;<br>
 class CallSite;<br>
 class DataLayout;<br>
 class Function;<br>
@@ -38,6 +39,21 @@ namespace InlineConstants {<br>
   const unsigned TotalAllocaSizeRecursiveCaller = 1024;<br>
 }<br>
<br>
+/// \brief Block frequency analysis for multiple functions.<br>
+/// This class mimics block frequency analysis on CGSCC level. Block frequency<br>
+/// info is computed on demand and cached unless they are invalidated.<br>
+class BlockFrequencyAnalysis {<br>
+private:<br>
+  DenseMap<Function *, BlockFrequencyInfo *> BFM;<br>
+<br>
+public:<br>
+  ~BlockFrequencyAnalysis();<br>
+  /// \brief Returns BlockFrequencyInfo for a function.<br>
+  BlockFrequencyInfo *getBlockFrequencyInfo(Function *);<br>
+  /// \brief Invalidates block frequency info for a function.<br>
+  void invalidateBlockFrequencyInfo(Function *);<br>
+};<br>
+<br>
 /// \brief Represents the cost of inlining a function.<br>
 ///<br>
 /// This supports special values for functions which should "always" or<br>
@@ -111,7 +127,8 @@ public:<br>
 /// inlining the callsite. It is an expensive, heavyweight call.<br>
 InlineCost getInlineCost(CallSite CS, int DefaultThreshold,<br>
                          TargetTransformInfo &CalleeTTI,<br>
-                         AssumptionCacheTracker *ACT);<br>
+                         AssumptionCacheTracker *ACT,<br>
+                         BlockFrequencyAnalysis *BFA);<br>
<br>
 /// \brief Get an InlineCost with the callee explicitly specified.<br>
 /// This allows you to calculate the cost of inlining a function via a<br>
@@ -120,7 +137,8 @@ InlineCost getInlineCost(CallSite CS, in<br>
 //<br>
 InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,<br>
                          TargetTransformInfo &CalleeTTI,<br>
-                         AssumptionCacheTracker *ACT);<br>
+                         AssumptionCacheTracker *ACT,<br>
+                         BlockFrequencyAnalysis *BFA);<br>
<br>
 int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);<br>
<br>
@@ -129,6 +147,9 @@ int getDefaultInlineThreshold();<br>
<br>
 /// \brief Minimal filter to detect invalid constructs for inlining.<br>
 bool isInlineViable(Function &Callee);<br>
+<br>
+/// \brief Return estimated count of the block \p BB.<br>
+Optional<uint64_t> getBlockCount(BasicBlock *BB, BlockFrequencyAnalysis *BFA);<br>
 }<br>
<br>
 #endif<br>
<br>
Modified: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h (original)<br>
+++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h Thu Mar  3 12:26:33 2016<br>
@@ -24,8 +24,18 @@ class AssumptionCacheTracker;<br>
 class CallSite;<br>
 class DataLayout;<br>
 class InlineCost;<br>
+class BlockFrequencyAnalysis;<br>
 template <class PtrType, unsigned SmallSize> class SmallPtrSet;<br>
<br>
+// Functor invoked when a block is cloned during inlining.<br>
+typedef std::function<void(const BasicBlock *, const BasicBlock *)><br>
+    BlockCloningFunctor;<br>
+// Functor invoked when a function is inlined inside the basic block<br>
+// containing the call.<br>
+typedef std::function<void(BasicBlock *, Function *)> FunctionCloningFunctor;<br>
+// Functor invoked when a function gets deleted during inlining.<br>
+typedef std::function<void(Function *)> FunctionDeletedFunctor;<br>
+<br>
 /// Inliner - This class contains all of the helper code which is used to<br>
 /// perform the inlining operations that do not depend on the policy.<br>
 ///<br>
@@ -69,9 +79,28 @@ private:<br>
   /// shouldInline - Return true if the inliner should attempt to<br>
   /// inline at the given CallSite.<br>
   bool shouldInline(CallSite CS);<br>
+  /// Set the BFI of \p Dst to be the same as \p Src.<br>
+  void copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst);<br>
+  /// Invalidates BFI for function \p F.<br>
+  void invalidateBFI(Function *F);<br>
+  /// Invalidates BFI for all functions in  \p SCC.<br>
+  void invalidateBFI(CallGraphSCC &SCC);<br>
+  /// Update function entry count for \p Callee which has been inlined into<br>
+  /// \p CallBB.<br>
+  void updateEntryCount(BasicBlock *CallBB, Function *Callee);<br>
+  /// \brief Update block frequency of an inlined block.<br>
+  /// This method updates the block frequency of \p NewBB which is a clone of<br>
+  /// \p OrigBB when the callsite \p CS gets inlined. The frequency of \p NewBB<br>
+  /// is computed as follows:<br>
+  /// Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.<br>
+  void updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,<br>
+                       const BasicBlock *NewBB);<br>
<br>
 protected:<br>
   AssumptionCacheTracker *ACT;<br>
+  std::unique_ptr<BlockFrequencyAnalysis> BFA;<br>
+  /// Are we using profile guided optimization?<br>
+  bool HasProfileData;<br>
 };<br>
<br>
 } // End llvm namespace<br>
<br>
Modified: llvm/trunk/include/llvm/Transforms/Utils/Cloning.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/Cloning.h?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/Cloning.h?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/include/llvm/Transforms/Utils/Cloning.h (original)<br>
+++ llvm/trunk/include/llvm/Transforms/Utils/Cloning.h Thu Mar  3 12:26:33 2016<br>
@@ -48,6 +48,9 @@ class AllocaInst;<br>
 class AssumptionCacheTracker;<br>
 class DominatorTree;<br>
<br>
+typedef std::function<void(const BasicBlock *, const BasicBlock *)><br>
+    BlockCloningFunctor;<br>
+<br>
 /// Return an exact copy of the specified module<br>
 ///<br>
 std::unique_ptr<Module> CloneModule(const Module *M);<br>
@@ -157,7 +160,8 @@ void CloneAndPruneIntoFromInst(Function<br>
                                ValueToValueMapTy &VMap, bool ModuleLevelChanges,<br>
                                SmallVectorImpl<ReturnInst *> &Returns,<br>
                                const char *NameSuffix = "",<br>
-                               ClonedCodeInfo *CodeInfo = nullptr);<br>
+                               ClonedCodeInfo *CodeInfo = nullptr,<br>
+                               BlockCloningFunctor Ftor = nullptr);<br>
<br>
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,<br>
 /// except that it does some simple constant prop and DCE on the fly.  The<br>
@@ -172,23 +176,27 @@ void CloneAndPruneIntoFromInst(Function<br>
 ///<br>
 void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,<br>
                                ValueToValueMapTy &VMap, bool ModuleLevelChanges,<br>
-                               SmallVectorImpl<ReturnInst*> &Returns,<br>
+                               SmallVectorImpl<ReturnInst *> &Returns,<br>
                                const char *NameSuffix = "",<br>
                                ClonedCodeInfo *CodeInfo = nullptr,<br>
-                               Instruction *TheCall = nullptr);<br>
+                               Instruction *TheCall = nullptr,<br>
+                               BlockCloningFunctor Ftor = nullptr);<br>
<br>
 /// InlineFunctionInfo - This class captures the data input to the<br>
 /// InlineFunction call, and records the auxiliary results produced by it.<br>
 class InlineFunctionInfo {<br>
 public:<br>
   explicit InlineFunctionInfo(CallGraph *cg = nullptr,<br>
-                              AssumptionCacheTracker *ACT = nullptr)<br>
-      : CG(cg), ACT(ACT) {}<br>
+                              AssumptionCacheTracker *ACT = nullptr,<br>
+                              BlockCloningFunctor Ftor = nullptr)<br>
+      : CG(cg), ACT(ACT), Ftor(Ftor) {}<br>
<br>
   /// CG - If non-null, InlineFunction will update the callgraph to reflect the<br>
   /// changes it makes.<br>
   CallGraph *CG;<br>
   AssumptionCacheTracker *ACT;<br>
+  // Functor that is invoked when a block is cloned into the new function.<br>
+  BlockCloningFunctor Ftor;<br>
<br>
   /// StaticAllocas - InlineFunction fills this in with all static allocas that<br>
   /// get copied into the caller.<br>
<br>
Modified: llvm/trunk/lib/Analysis/InlineCost.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)<br>
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Thu Mar  3 12:26:33 2016<br>
@@ -18,13 +18,18 @@<br>
 #include "llvm/ADT/SmallVector.h"<br>
 #include "llvm/ADT/Statistic.h"<br>
 #include "llvm/Analysis/AssumptionCache.h"<br>
+#include "llvm/Analysis/BlockFrequencyInfo.h"<br>
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"<br>
+#include "llvm/Analysis/BranchProbabilityInfo.h"<br>
 #include "llvm/Analysis/CodeMetrics.h"<br>
 #include "llvm/Analysis/ConstantFolding.h"<br>
 #include "llvm/Analysis/InstructionSimplify.h"<br>
+#include "llvm/Analysis/LoopInfo.h"<br>
 #include "llvm/Analysis/TargetTransformInfo.h"<br>
 #include "llvm/IR/CallSite.h"<br>
 #include "llvm/IR/CallingConv.h"<br>
 #include "llvm/IR/DataLayout.h"<br>
+#include "llvm/IR/Dominators.h"<br>
 #include "llvm/IR/GetElementPtrTypeIterator.h"<br>
 #include "llvm/IR/GlobalAlias.h"<br>
 #include "llvm/IR/InstVisitor.h"<br>
@@ -85,6 +90,7 @@ class CallAnalyzer : public InstVisitor<<br>
   // easily cacheable. Instead, use the cover function paramHasAttr.<br>
   CallSite CandidateCS;<br>
<br>
+  BlockFrequencyAnalysis *BFA;<br>
   int Threshold;<br>
   int Cost;<br>
<br>
@@ -153,6 +159,8 @@ class CallAnalyzer : public InstVisitor<<br>
   /// passed to support analyzing indirect calls whose target is inferred by<br>
   /// analysis.<br>
   void updateThreshold(CallSite CS, Function &Callee);<br>
+  /// Adjust Threshold based on CallSiteCount and return the adjusted threshold.<br>
+  int getAdjustedThreshold(int Threshold, Optional<uint64_t> CallSiteCount);<br>
<br>
   // Custom analysis routines.<br>
   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);<br>
@@ -194,17 +202,19 @@ class CallAnalyzer : public InstVisitor<<br>
<br>
 public:<br>
   CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,<br>
-               Function &Callee, int Threshold, CallSite CSArg)<br>
-    : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),<br>
-        Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),<br>
-        ExposesReturnsTwice(false), HasDynamicAlloca(false),<br>
-        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),<br>
-        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),<br>
-        NumVectorInstructions(0), FiftyPercentVectorBonus(0),<br>
-        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),<br>
-        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),<br>
-        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),<br>
-        SROACostSavings(0), SROACostSavingsLost(0) {}<br>
+               Function &Callee, int Threshold, CallSite CSArg,<br>
+               BlockFrequencyAnalysis *BFA)<br>
+      : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), BFA(BFA),<br>
+        Threshold(Threshold), Cost(0), IsCallerRecursive(false),<br>
+        IsRecursiveCall(false), ExposesReturnsTwice(false),<br>
+        HasDynamicAlloca(false), ContainsNoDuplicateCall(false),<br>
+        HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),<br>
+        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),<br>
+        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),<br>
+        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),<br>
+        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),<br>
+        NumInstructionsSimplified(0), SROACostSavings(0),<br>
+        SROACostSavingsLost(0) {}<br>
<br>
   bool analyzeCall(CallSite CS);<br>
<br>
@@ -572,6 +582,15 @@ bool CallAnalyzer::isKnownNonNullInCalle<br>
   return false;<br>
 }<br>
<br>
+// Adjust the threshold based on callsite hotness. Currently this is a nop.<br>
+int CallAnalyzer::getAdjustedThreshold(int Threshold,<br>
+                                       Optional<uint64_t> CallSiteCount<br>
+                                       __attribute__((unused))) {<br>
+  // FIXME: The new threshold should be computed from the given Threshold and<br>
+  // the callsite hotness.<br>
+  return Threshold;<br>
+}<br>
+<br>
 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {<br>
   // If -inline-threshold is not given, listen to the optsize and minsize<br>
   // attributes when they would decrease the threshold.<br>
@@ -596,6 +615,9 @@ void CallAnalyzer::updateThreshold(CallS<br>
     FunctionCount = Callee.getEntryCount().getValue();<br>
     MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();<br>
   }<br>
+  Optional<uint64_t> CallSiteCount =<br>
+      llvm::getBlockCount(CS.getInstruction()->getParent(), BFA);<br>
+  Threshold = getAdjustedThreshold(Threshold, CallSiteCount);<br>
<br>
   // Listen to the inlinehint attribute or profile based hotness information<br>
   // when it would increase the threshold and the caller does not need to<br>
@@ -912,7 +934,8 @@ bool CallAnalyzer::visitCallSite(CallSit<br>
   // during devirtualization and so we want to give it a hefty bonus for<br>
   // inlining, but cap that bonus in the event that inlining wouldn't pan<br>
   // out. Pretend to inline the function, with a custom threshold.<br>
-  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);<br>
+  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS,<br>
+                  BFA);<br>
   if (CA.analyzeCall(CS)) {<br>
     // We were able to inline the indirect call! Subtract the cost from the<br>
     // threshold to get the bonus we want to apply, but don't go below zero.<br>
@@ -1433,9 +1456,10 @@ static bool functionsHaveCompatibleAttri<br>
<br>
 InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,<br>
                                TargetTransformInfo &CalleeTTI,<br>
-                               AssumptionCacheTracker *ACT) {<br>
+                               AssumptionCacheTracker *ACT,<br>
+                               BlockFrequencyAnalysis *BFA) {<br>
   return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,<br>
-                       ACT);<br>
+                       ACT, BFA);<br>
 }<br>
<br>
 int llvm::computeThresholdFromOptLevels(unsigned OptLevel,<br>
@@ -1454,7 +1478,8 @@ int llvm::getDefaultInlineThreshold() {<br>
 InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,<br>
                                int DefaultThreshold,<br>
                                TargetTransformInfo &CalleeTTI,<br>
-                               AssumptionCacheTracker *ACT) {<br>
+                               AssumptionCacheTracker *ACT,<br>
+                               BlockFrequencyAnalysis *BFA) {<br>
<br>
   // Cannot inline indirect calls.<br>
   if (!Callee)<br>
@@ -1487,7 +1512,7 @@ InlineCost llvm::getInlineCost(CallSite<br>
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()<br>
         << "...\n");<br>
<br>
-  CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);<br>
+  CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS, BFA);<br>
   bool ShouldInline = CA.analyzeCall(CS);<br>
<br>
   DEBUG(CA.dump());<br>
@@ -1535,3 +1560,45 @@ bool llvm::isInlineViable(Function &F) {<br>
<br>
   return true;<br>
 }<br>
+<br>
+/// \brief Get estimated execution count for \p BB.<br>
+Optional<uint64_t> llvm::getBlockCount(BasicBlock *BB,<br>
+                                       BlockFrequencyAnalysis *BFA) {<br>
+  if (!BFA)<br>
+    return None;<br>
+  Function *F = BB->getParent();<br>
+  Optional<uint64_t> EntryCount = F->getEntryCount();<br>
+  if (!EntryCount)<br>
+    return None;<br>
+  BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);<br>
+  uint64_t BBFreq = BFI->getBlockFreq(BB).getFrequency();<br>
+  uint64_t FunctionEntryFreq = BFI->getEntryFreq();<br>
+  uint64_t BBCount = EntryCount.getValue() * BBFreq / FunctionEntryFreq;<br>
+  return BBCount;<br>
+}<br>
+<br>
+BlockFrequencyAnalysis::~BlockFrequencyAnalysis() {<br>
+  for (auto &Entry : BFM) {<br>
+    delete Entry.second;<br>
+  }<br>
+}<br>
+<br>
+/// \brief Get BlockFrequencyInfo for a function.<br>
+BlockFrequencyInfo *BlockFrequencyAnalysis::getBlockFrequencyInfo(Function *F) {<br>
+  auto Iter = BFM.find(F);<br>
+  if (Iter != BFM.end())<br>
+    return Iter->second;<br>
+  // We need to create a BlockFrequencyInfo object for F and store it.<br>
+  DominatorTree DT;<br>
+  DT.recalculate(*F);<br>
+  LoopInfo LI(DT);<br>
+  BranchProbabilityInfo BPI(*F, LI);<br>
+  BlockFrequencyInfo *BFI = new BlockFrequencyInfo(*F, BPI, LI);<br>
+  BFM[F] = BFI;<br>
+  return BFI;<br>
+}<br>
+<br>
+/// \brief Invalidate BlockFrequencyInfo for a function.<br>
+void BlockFrequencyAnalysis::invalidateBlockFrequencyInfo(Function *F) {<br>
+  BFM.erase(F);<br>
+}<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Thu Mar  3 12:26:33 2016<br>
@@ -59,7 +59,8 @@ public:<br>
   InlineCost getInlineCost(CallSite CS) override {<br>
     Function *Callee = CS.getCalledFunction();<br>
     TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);<br>
-    return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);<br>
+    return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT,<br>
+                               HasProfileData ? BFA.get() : nullptr);<br>
   }<br>
<br>
   bool runOnSCC(CallGraphSCC &SCC) override;<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Thu Mar  3 12:26:33 2016<br>
@@ -19,6 +19,7 @@<br>
 #include "llvm/Analysis/AliasAnalysis.h"<br>
 #include "llvm/Analysis/AssumptionCache.h"<br>
 #include "llvm/Analysis/BasicAliasAnalysis.h"<br>
+#include "llvm/Analysis/BlockFrequencyInfo.h"<br>
 #include "llvm/Analysis/CallGraph.h"<br>
 #include "llvm/Analysis/InlineCost.h"<br>
 #include "llvm/Analysis/TargetLibraryInfo.h"<br>
@@ -47,10 +48,13 @@ STATISTIC(NumMergedAllocas, "Number of a<br>
 // if those would be more profitable and blocked inline steps.<br>
 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");<br>
<br>
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}<br>
+Inliner::Inliner(char &ID)<br>
+    : CallGraphSCCPass(ID), InsertLifetime(true),<br>
+      BFA(new BlockFrequencyAnalysis()) {}<br>
<br>
 Inliner::Inliner(char &ID, bool InsertLifetime)<br>
-    : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}<br>
+    : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime),<br>
+      BFA(new BlockFrequencyAnalysis()) {}<br>
<br>
 /// For this class, we declare that we require and preserve the call graph.<br>
 /// If the derived class implements this method, it should<br>
@@ -259,7 +263,7 @@ bool Inliner::shouldInline(CallSite CS)<br>
                          Twine(IC.getCostDelta() + IC.getCost()) + ")");<br>
     return false;<br>
   }<br>
-<br>
+<br>
   // Try to detect the case where the current inlining candidate caller (call<br>
   // it B) is a static or linkonce-ODR function and is an inlining candidate<br>
   // elsewhere, and the current candidate callee (call it C) is large enough<br>
@@ -356,8 +360,90 @@ static bool InlineHistoryIncludes(Functi<br>
   return false;<br>
 }<br>
<br>
+/// \brief Update the frequency of a block that is cloned into the caller.<br>
+/// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in<br>
+/// the caller.<br>
+void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,<br>
+                              const BasicBlock *NewBB) {<br>
+  if (!HasProfileData)<br>
+    return;<br>
+  Instruction *Call = CS.getInstruction();<br>
+  BasicBlock *CallBB = Call->getParent();<br>
+  BlockFrequencyInfo *CalleeBFI =<br>
+      BFA->getBlockFrequencyInfo(CS.getCalledFunction());<br>
+  BlockFrequencyInfo *CallerBFI =<br>
+      BFA->getBlockFrequencyInfo(CallBB->getParent());<br>
+  // Find the number of times OrigBB is executed per invocation of the callee<br>
+  // and multiply by the number of times callee is executed in the caller.<br>
+  // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.<br>
+  uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency();<br>
+  uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq();<br>
+  // Frequency of OrigBB in the callee.<br>
+  BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB);<br>
+  CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) /<br>
+                                     CalleeEntryFreq * CallSiteFreq);<br>
+}<br>
+<br>
+/// \brief Update entry count of \p Callee after it got inlined at a callsite<br>
+/// in block \p CallBB.<br>
+void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) {<br>
+  if (!HasProfileData)<br>
+    return;<br>
+  // If the callee has a original count of N, and the estimated count of<br>
+  // callsite is M, the new callee count is set to N - M. M is estimated from<br>
+  // the caller's entry count, its entry block frequency and the block frequency<br>
+  // of the callsite.<br>
+  Optional<uint64_t> CalleeCount = Callee->getEntryCount();<br>
+  if (!CalleeCount)<br>
+    return;<br>
+  Optional<uint64_t> CallSiteCount = llvm::getBlockCount(CallBB, BFA.get());<br>
+  if (!CallSiteCount)<br>
+    return;<br>
+  // Since CallSiteCount is an estimate, it could exceed the original callee<br>
+  // count and has to be set to 0.<br>
+  if (CallSiteCount.getValue() > CalleeCount.getValue()) {<br>
+    Callee->setEntryCount(0);<br>
+    DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName()<br>
+                       << " is " << CallSiteCount.getValue()<br>
+                       << " which exceeds the entry count "<br>
+                       << CalleeCount.getValue() << " of the callee "<br>
+                       << Callee->getName() << "\n");<br>
+  } else<br>
+    Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());<br>
+}<br>
+<br>
+void Inliner::invalidateBFI(Function *F) {<br>
+  if (!HasProfileData)<br>
+    return;<br>
+  if (F)<br>
+    BFA->invalidateBlockFrequencyInfo(F);<br>
+}<br>
+void Inliner::invalidateBFI(CallGraphSCC &SCC) {<br>
+  if (!HasProfileData)<br>
+    return;<br>
+  for (CallGraphNode *Node : SCC) {<br>
+    Function *F = Node->getFunction();<br>
+    invalidateBFI(F);<br>
+  }<br>
+}<br>
+void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) {<br>
+  if (!HasProfileData)<br>
+    return;<br>
+  Function *F = Src->getParent();<br>
+  BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);<br>
+  BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency());<br>
+}<br>
+<br>
+static bool hasProfileData(Module &M) {<br>
+  // We check for the presence of MaxFunctionCount in the module.<br>
+  // FIXME: This now only works for frontend based instrumentation.<br>
+  return M.getMaximumFunctionCount().hasValue();<br>
+}<br>
+<br>
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {<br>
+  using namespace std::placeholders;<br>
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();<br>
+  HasProfileData = hasProfileData(CG.getModule());<br>
   ACT = &getAnalysis<AssumptionCacheTracker>();<br>
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();<br>
<br>
@@ -419,7 +505,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
<br>
<br>
   InlinedArrayAllocasTy InlinedArrayAllocas;<br>
-  InlineFunctionInfo InlineInfo(&CG, ACT);<br>
<br>
   // Now that we have all of the call sites, loop over them and inline them if<br>
   // it looks profitable to do so.<br>
@@ -448,6 +533,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
         CS.getInstruction()->eraseFromParent();<br>
         ++NumCallsDeleted;<br>
       } else {<br>
+        Instruction *TheCall = CS.getInstruction();<br></blockquote><br></div></div><div>In test/Transforms/Inline/inline-tail.ll, ASAN is telling me that there is a use after free here. Presumably, CS has already been inlined/deleted?</div><div><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+        BasicBlock *CallSiteBlock = TheCall->getParent();<br>
+        Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall));</blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+<br>
         // We can only inline direct calls to non-declarations.<br>
         if (!Callee || Callee->isDeclaration()) continue;<br>
<br>
@@ -476,6 +565,11 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
           continue;<br>
         }<br>
<br>
+        BlockCloningFunctor BCF = nullptr;<br>
+        if (HasProfileData)<br>
+          BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2);<br>
+        InlineFunctionInfo InlineInfo(&CG, ACT, BCF);<br>
+<br>
         // Attempt to inline the function.<br>
         if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,<br>
                                   InlineHistoryID, InsertLifetime)) {<br>
@@ -485,6 +579,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
                                              Caller->getName()));<br>
           continue;<br>
         }<br>
+        updateEntryCount(CallSiteBlock, Callee);<br>
+        // The instruction following the call is part of a new basic block<br>
+        // created during the inlining process. This does not have an entry in<br>
+        // the BFI. We create an entry by copying the frequency of the original<br>
+        // block containing the call.<br>
+        copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent());<br>
+<br>
         ++NumInlined;<br>
<br>
         // Report the inline decision.<br>
@@ -523,7 +624,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
         CalleeNode->removeAllCalledFunctions();<br>
<br>
         // Removing the node for callee from the call graph and delete it.<br>
-        delete CG.removeFunctionFromModule(CalleeNode);<br>
+        Function *F = CG.removeFunctionFromModule(CalleeNode);<br>
+        invalidateBFI(F);<br>
+        delete F;<br>
         ++NumDeleted;<br>
       }<br>
<br>
@@ -544,6 +647,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
     }<br>
   } while (LocalChange);<br>
<br>
+  invalidateBFI(SCC);<br>
   return Changed;<br>
 }<br>
<br>
@@ -651,7 +755,9 @@ bool Inliner::removeDeadFunctions(CallGr<br>
                                       FunctionsToRemove.end()),<br>
                           FunctionsToRemove.end());<br>
   for (CallGraphNode *CGN : FunctionsToRemove) {<br>
-    delete CG.removeFunctionFromModule(CGN);<br>
+    Function *F = CG.removeFunctionFromModule(CGN);<br>
+    invalidateBFI(F);<br>
+    delete F;<br>
     ++NumDeleted;<br>
   }<br>
   return true;<br>
<br>
Modified: llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp Thu Mar  3 12:26:33 2016<br>
@@ -277,9 +277,10 @@ namespace {<br>
<br>
     /// The specified block is found to be reachable, clone it and<br>
     /// anything that it can reach.<br>
-    void CloneBlock(const BasicBlock *BB,<br>
+    void CloneBlock(const BasicBlock *BB,<br>
                     BasicBlock::const_iterator StartingInst,<br>
-                    std::vector<const BasicBlock*> &ToClone);<br>
+                    std::vector<const BasicBlock *> &ToClone,<br>
+                    BlockCloningFunctor Ftor = nullptr);<br>
   };<br>
 }<br>
<br>
@@ -287,7 +288,8 @@ namespace {<br>
 /// anything that it can reach.<br>
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,<br>
                                        BasicBlock::const_iterator StartingInst,<br>
-                                       std::vector<const BasicBlock*> &ToClone){<br>
+                                       std::vector<const BasicBlock *> &ToClone,<br>
+                                       BlockCloningFunctor Ftor) {<br>
   WeakVH &BBEntry = VMap[BB];<br>
<br>
   // Have we already cloned this block?<br>
@@ -424,18 +426,19 @@ void PruningFunctionCloner::CloneBlock(c<br>
     CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&<br>
       BB != &BB->getParent()->front();<br>
   }<br>
+  // Call Ftor to tell BB has been cloned to NewBB<br>
+  if (Ftor)<br>
+    Ftor(BB, NewBB);<br>
 }<br>
<br>
 /// This works like CloneAndPruneFunctionInto, except that it does not clone the<br>
 /// entire function. Instead it starts at an instruction provided by the caller<br>
 /// and copies (and prunes) only the code reachable from that instruction.<br>
-void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,<br>
-                                     const Instruction *StartingInst,<br>
-                                     ValueToValueMapTy &VMap,<br>
-                                     bool ModuleLevelChanges,<br>
-                                     SmallVectorImpl<ReturnInst *> &Returns,<br>
-                                     const char *NameSuffix,<br>
-                                     ClonedCodeInfo *CodeInfo) {<br>
+void llvm::CloneAndPruneIntoFromInst(<br>
+    Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst,<br>
+    ValueToValueMapTy &VMap, bool ModuleLevelChanges,<br>
+    SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix,<br>
+    ClonedCodeInfo *CodeInfo, BlockCloningFunctor Ftor) {<br>
   assert(NameSuffix && "NameSuffix cannot be null!");<br>
<br>
   ValueMapTypeRemapper *TypeMapper = nullptr;<br>
@@ -461,11 +464,11 @@ void llvm::CloneAndPruneIntoFromInst(Fun<br>
<br>
   // Clone the entry block, and anything recursively reachable from it.<br>
   std::vector<const BasicBlock*> CloneWorklist;<br>
-  PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);<br>
+  PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist, Ftor);<br>
   while (!CloneWorklist.empty()) {<br>
     const BasicBlock *BB = CloneWorklist.back();<br>
     CloneWorklist.pop_back();<br>
-    PFC.CloneBlock(BB, BB->begin(), CloneWorklist);<br>
+    PFC.CloneBlock(BB, BB->begin(), CloneWorklist, Ftor);<br>
   }<br>
<br>
   // Loop over all of the basic blocks in the old function.  If the block was<br>
@@ -667,15 +670,14 @@ void llvm::CloneAndPruneIntoFromInst(Fun<br>
 /// constant arguments cause a significant amount of code in the callee to be<br>
 /// dead.  Since this doesn't produce an exact copy of the input, it can't be<br>
 /// used for things like CloneFunction or CloneModule.<br>
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,<br>
-                                     ValueToValueMapTy &VMap,<br>
-                                     bool ModuleLevelChanges,<br>
-                                     SmallVectorImpl<ReturnInst*> &Returns,<br>
-                                     const char *NameSuffix,<br>
-                                     ClonedCodeInfo *CodeInfo,<br>
-                                     Instruction *TheCall) {<br>
+void llvm::CloneAndPruneFunctionInto(<br>
+    Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,<br>
+    bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,<br>
+    const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall,<br>
+    BlockCloningFunctor Ftor) {<br>
   CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,<br>
-                            ModuleLevelChanges, Returns, NameSuffix, CodeInfo);<br>
+                            ModuleLevelChanges, Returns, NameSuffix, CodeInfo,<br>
+                            Ftor);<br>
 }<br>
<br>
 /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.<br>
<br>
Modified: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp Thu Mar  3 12:26:33 2016<br>
@@ -1319,7 +1319,7 @@ bool llvm::InlineFunction(CallSite CS, I<br>
<br>
   // If IFI has any state in it, zap it before we fill it in.<br>
   IFI.reset();<br>
-<br>
+<br>
   const Function *CalledFunc = CS.getCalledFunction();<br>
   if (!CalledFunc ||              // Can't inline external function or indirect<br>
       CalledFunc->isDeclaration() || // call, or call to a vararg function!<br>
@@ -1486,7 +1486,7 @@ bool llvm::InlineFunction(CallSite CS, I<br>
     // happy with whatever the cloner can do.<br>
     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,<br>
                               /*ModuleLevelChanges=*/false, Returns, ".i",<br>
-                              &InlinedFunctionInfo, TheCall);<br>
+                              &InlinedFunctionInfo, TheCall, IFI.Ftor);<br>
<br>
     // Remember the first block that is newly cloned over.<br>
     FirstNewBlock = LastBlock; ++FirstNewBlock;<br>
<br>
Added: llvm/trunk/test/Transforms/Inline/function-count-update-2.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-2.ll?rev=262636&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-2.ll?rev=262636&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/Inline/function-count-update-2.ll (added)<br>
+++ llvm/trunk/test/Transforms/Inline/function-count-update-2.ll Thu Mar  3 12:26:33 2016<br>
@@ -0,0 +1,27 @@<br>
+; RUN: opt < %s -inline -S | FileCheck %s<br>
+<br>
+; This tests that the function count of a callee gets correctly updated after it<br>
+; has been inlined into a two callsites.<br>
+<br>
+; CHECK: @callee() !prof [[COUNT:![0-9]+]]<br>
+define i32 @callee() !prof !1 {<br>
+  ret i32 0<br>
+}<br>
+<br>
+define i32 @caller1() !prof !2 {<br>
+  %i = call i32 @callee()<br>
+  ret i32 %i<br>
+}<br>
+<br>
+define i32 @caller2() !prof !3 {<br>
+  %i = call i32 @callee()<br>
+  ret i32 %i<br>
+}<br>
+<br>
+!llvm.module.flags = !{!0}<br>
+; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0}<br>
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}<br>
+!1 = !{!"function_entry_count", i64 1000}<br>
+!2 = !{!"function_entry_count", i64 600}<br>
+!3 = !{!"function_entry_count", i64 400}<br>
+<br>
<br>
Added: llvm/trunk/test/Transforms/Inline/function-count-update-3.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-3.ll?rev=262636&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-3.ll?rev=262636&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/Inline/function-count-update-3.ll (added)<br>
+++ llvm/trunk/test/Transforms/Inline/function-count-update-3.ll Thu Mar  3 12:26:33 2016<br>
@@ -0,0 +1,69 @@<br>
+; RUN: opt < %s -inline -S -inline-threshold=50 | FileCheck %s<br>
+<br>
+; This tests that the function count of a function gets properly scaled after<br>
+; inlining a call chain leading to the function.<br>
+; Function a calls c with count 200 (C1)<br>
+; Function b calls c with count 300<br>
+; Function c calls e with count 250 (C2)<br>
+; Entry count of e is 500 (C3)<br>
+; c->e inlining does not happen since the cost exceeds threshold.<br>
+; c then inlined into a.<br>
+; e now gets inlined into a (through c) since the branch condition in e is now<br>
+; known and hence the cost gets reduced.<br>
+; Estimated count of a->e callsite = C2 * (C1 / C3)<br>
+; Estimated count of a->e callsite = 250 * (200 / 500) = 100<br>
+; Remaining count of e = C3 - 100 = 500 - 100 = 400<br>
+<br>
+@data = external global i32<br>
+<br>
+define i32 @a(i32 %a1) !prof !1 {<br>
+  %a2 = call i32 @c(i32 %a1, i32 1)<br>
+  ret i32 %a2<br>
+}<br>
+<br>
+define i32 @b(i32 %b1) !prof !2 {<br>
+  %b2 = call i32 @c(i32 %b1, i32 %b1)<br>
+  ret i32 %b2<br>
+}<br>
+<br>
+define i32 @c(i32 %c1, i32 %c100) !prof !3 {<br>
+  %cond = icmp sle i32 %c1, 1<br>
+  br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_false:<br>
+  ret i32 0<br>
+<br>
+cond_true:<br>
+  %c11 = call i32 @e(i32 %c100)<br>
+  ret i32 %c11<br>
+}<br>
+<br>
+; CHECK: @e(i32 %c1) !prof [[COUNT:![0-9]+]]<br>
+define i32 @e(i32 %c1) !prof !4 {<br>
+  %cond = icmp sle i32 %c1, 1<br>
+  br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_false:<br>
+  %c2 = load i32, i32* @data, align 4<br>
+  %c3 = add i32 %c1, %c2<br>
+  %c4 = mul i32 %c3, %c2<br>
+  %c5 = add i32 %c4, %c2<br>
+  %c6 = mul i32 %c5, %c2<br>
+  %c7 = add i32 %c6, %c2<br>
+  %c8 = mul i32 %c7, %c2<br>
+  %c9 = add i32 %c8, %c2<br>
+  %c10 = mul i32 %c9, %c2<br>
+  ret i32 %c10<br>
+<br>
+cond_true:<br>
+  ret i32 0<br>
+}<br>
+<br>
+!llvm.module.flags = !{!0}<br>
+; CHECK: [[COUNT]] = !{!"function_entry_count", i64 400}<br>
+!0 = !{i32 1, !"MaxFunctionCount", i32 5000}<br>
+!1 = !{!"function_entry_count", i64 200}<br>
+!2 = !{!"function_entry_count", i64 300}<br>
+!3 = !{!"function_entry_count", i64 500}<br>
+!4 = !{!"function_entry_count", i64 500}<br>
+<br>
<br>
Added: llvm/trunk/test/Transforms/Inline/function-count-update.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update.ll?rev=262636&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update.ll?rev=262636&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/Inline/function-count-update.ll (added)<br>
+++ llvm/trunk/test/Transforms/Inline/function-count-update.ll Thu Mar  3 12:26:33 2016<br>
@@ -0,0 +1,51 @@<br>
+; RUN: opt < %s -inline -S | FileCheck %s<br>
+; RUN: opt < %s -always-inline -S | FileCheck %s<br>
+<br>
+; This tests that the function count of two callees get correctly updated after<br>
+; they have been inlined into two back-to-back callsites in a single basic block<br>
+; in the caller. The callees have the alwaysinline attribute and so they get<br>
+; inlined both with the regular inliner pass and the always inline pass. In<br>
+; both cases, the new count of each callee is the original count minus callsite<br>
+; count which is 200 (since the caller's entry count is 400 and the block<br>
+; containing the calls have a relative block frequency of 0.5).<br>
+<br>
+; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]]<br>
+define i32 @callee1(i32 %n) #0 !prof !1 {<br>
+  %cond = icmp sle i32 %n, 10<br>
+  br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_true:<br>
+  %r1 = add i32 %n, 1<br>
+  ret i32 %r1<br>
+cond_false:<br>
+  %r2 = add i32 %n, 2<br>
+  ret i32 %r2<br>
+}<br>
+<br>
+; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]]<br>
+define i32 @callee2(i32 %n) #0 !prof !2 {<br>
+  %r1 = add i32 %n, 1<br>
+  ret i32 %r1<br>
+}<br>
+<br>
+define i32 @caller(i32 %n) !prof !3 {<br>
+  %cond = icmp sle i32 %n, 100<br>
+  br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_true:<br>
+  %i = call i32 @callee1(i32 %n)<br>
+  %j = call i32 @callee2(i32 %i)<br>
+  ret i32 %j<br>
+cond_false:<br>
+  ret i32 0<br>
+}<br>
+<br>
+!llvm.module.flags = !{!0}<br>
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800}<br>
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800}<br>
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}<br>
+!1 = !{!"function_entry_count", i64 1000}<br>
+!2 = !{!"function_entry_count", i64 2000}<br>
+!3 = !{!"function_entry_count", i64 400}<br>
+attributes #0 = { alwaysinline }<br>
+<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div>