<div dir="ltr">The instruction following the call has been deleted in InlineFunction.cpp:1998 and it's parent is being accessed. Will think of a fix and send a patch.</div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Mar 3, 2016 at 3:03 PM, Daniel Jasper <span dir="ltr"><<a href="mailto:djasper@google.com" target="_blank">djasper@google.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote"><span class="">On Thu, Mar 3, 2016 at 10:26 AM, Easwaran Raman via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br></span><div><div class="h5"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">Author: eraman<br>
Date: Thu Mar 3 12:26:33 2016<br>
New Revision: 262636<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=262636&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=262636&view=rev</a><br>
Log:<br>
Infrastructure for PGO enhancements in inliner<br>
<br>
This patch provides the following infrastructure for PGO enhancements in inliner:<br>
<br>
Enable the use of block level profile information in inliner<br>
Incremental update of block frequency information during inlining<br>
Update the function entry counts of callees when they get inlined into callers.<br>
<br>
Differential Revision: <a href="http://reviews.llvm.org/D16381" rel="noreferrer" target="_blank">http://reviews.llvm.org/D16381</a><br>
<br>
<br>
Added:<br>
llvm/trunk/test/Transforms/Inline/function-count-update-2.ll<br>
llvm/trunk/test/Transforms/Inline/function-count-update-3.ll<br>
llvm/trunk/test/Transforms/Inline/function-count-update.ll<br>
Modified:<br>
llvm/trunk/include/llvm/Analysis/InlineCost.h<br>
llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h<br>
llvm/trunk/include/llvm/Transforms/Utils/Cloning.h<br>
llvm/trunk/lib/Analysis/InlineCost.cpp<br>
llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp<br>
llvm/trunk/lib/Transforms/IPO/Inliner.cpp<br>
llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp<br>
llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp<br>
<br>
Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/include/llvm/Analysis/InlineCost.h (original)<br>
+++ llvm/trunk/include/llvm/Analysis/InlineCost.h Thu Mar 3 12:26:33 2016<br>
@@ -20,6 +20,7 @@<br>
<br>
namespace llvm {<br>
class AssumptionCacheTracker;<br>
+class BlockFrequencyInfo;<br>
class CallSite;<br>
class DataLayout;<br>
class Function;<br>
@@ -38,6 +39,21 @@ namespace InlineConstants {<br>
const unsigned TotalAllocaSizeRecursiveCaller = 1024;<br>
}<br>
<br>
+/// \brief Block frequency analysis for multiple functions.<br>
+/// This class mimics block frequency analysis on CGSCC level. Block frequency<br>
+/// info is computed on demand and cached unless they are invalidated.<br>
+class BlockFrequencyAnalysis {<br>
+private:<br>
+ DenseMap<Function *, BlockFrequencyInfo *> BFM;<br>
+<br>
+public:<br>
+ ~BlockFrequencyAnalysis();<br>
+ /// \brief Returns BlockFrequencyInfo for a function.<br>
+ BlockFrequencyInfo *getBlockFrequencyInfo(Function *);<br>
+ /// \brief Invalidates block frequency info for a function.<br>
+ void invalidateBlockFrequencyInfo(Function *);<br>
+};<br>
+<br>
/// \brief Represents the cost of inlining a function.<br>
///<br>
/// This supports special values for functions which should "always" or<br>
@@ -111,7 +127,8 @@ public:<br>
/// inlining the callsite. It is an expensive, heavyweight call.<br>
InlineCost getInlineCost(CallSite CS, int DefaultThreshold,<br>
TargetTransformInfo &CalleeTTI,<br>
- AssumptionCacheTracker *ACT);<br>
+ AssumptionCacheTracker *ACT,<br>
+ BlockFrequencyAnalysis *BFA);<br>
<br>
/// \brief Get an InlineCost with the callee explicitly specified.<br>
/// This allows you to calculate the cost of inlining a function via a<br>
@@ -120,7 +137,8 @@ InlineCost getInlineCost(CallSite CS, in<br>
//<br>
InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,<br>
TargetTransformInfo &CalleeTTI,<br>
- AssumptionCacheTracker *ACT);<br>
+ AssumptionCacheTracker *ACT,<br>
+ BlockFrequencyAnalysis *BFA);<br>
<br>
int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);<br>
<br>
@@ -129,6 +147,9 @@ int getDefaultInlineThreshold();<br>
<br>
/// \brief Minimal filter to detect invalid constructs for inlining.<br>
bool isInlineViable(Function &Callee);<br>
+<br>
+/// \brief Return estimated count of the block \p BB.<br>
+Optional<uint64_t> getBlockCount(BasicBlock *BB, BlockFrequencyAnalysis *BFA);<br>
}<br>
<br>
#endif<br>
<br>
Modified: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h (original)<br>
+++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h Thu Mar 3 12:26:33 2016<br>
@@ -24,8 +24,18 @@ class AssumptionCacheTracker;<br>
class CallSite;<br>
class DataLayout;<br>
class InlineCost;<br>
+class BlockFrequencyAnalysis;<br>
template <class PtrType, unsigned SmallSize> class SmallPtrSet;<br>
<br>
+// Functor invoked when a block is cloned during inlining.<br>
+typedef std::function<void(const BasicBlock *, const BasicBlock *)><br>
+ BlockCloningFunctor;<br>
+// Functor invoked when a function is inlined inside the basic block<br>
+// containing the call.<br>
+typedef std::function<void(BasicBlock *, Function *)> FunctionCloningFunctor;<br>
+// Functor invoked when a function gets deleted during inlining.<br>
+typedef std::function<void(Function *)> FunctionDeletedFunctor;<br>
+<br>
/// Inliner - This class contains all of the helper code which is used to<br>
/// perform the inlining operations that do not depend on the policy.<br>
///<br>
@@ -69,9 +79,28 @@ private:<br>
/// shouldInline - Return true if the inliner should attempt to<br>
/// inline at the given CallSite.<br>
bool shouldInline(CallSite CS);<br>
+ /// Set the BFI of \p Dst to be the same as \p Src.<br>
+ void copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst);<br>
+ /// Invalidates BFI for function \p F.<br>
+ void invalidateBFI(Function *F);<br>
+ /// Invalidates BFI for all functions in \p SCC.<br>
+ void invalidateBFI(CallGraphSCC &SCC);<br>
+ /// Update function entry count for \p Callee which has been inlined into<br>
+ /// \p CallBB.<br>
+ void updateEntryCount(BasicBlock *CallBB, Function *Callee);<br>
+ /// \brief Update block frequency of an inlined block.<br>
+ /// This method updates the block frequency of \p NewBB which is a clone of<br>
+ /// \p OrigBB when the callsite \p CS gets inlined. The frequency of \p NewBB<br>
+ /// is computed as follows:<br>
+ /// Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.<br>
+ void updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,<br>
+ const BasicBlock *NewBB);<br>
<br>
protected:<br>
AssumptionCacheTracker *ACT;<br>
+ std::unique_ptr<BlockFrequencyAnalysis> BFA;<br>
+ /// Are we using profile guided optimization?<br>
+ bool HasProfileData;<br>
};<br>
<br>
} // End llvm namespace<br>
<br>
Modified: llvm/trunk/include/llvm/Transforms/Utils/Cloning.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/Cloning.h?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/Cloning.h?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/include/llvm/Transforms/Utils/Cloning.h (original)<br>
+++ llvm/trunk/include/llvm/Transforms/Utils/Cloning.h Thu Mar 3 12:26:33 2016<br>
@@ -48,6 +48,9 @@ class AllocaInst;<br>
class AssumptionCacheTracker;<br>
class DominatorTree;<br>
<br>
+typedef std::function<void(const BasicBlock *, const BasicBlock *)><br>
+ BlockCloningFunctor;<br>
+<br>
/// Return an exact copy of the specified module<br>
///<br>
std::unique_ptr<Module> CloneModule(const Module *M);<br>
@@ -157,7 +160,8 @@ void CloneAndPruneIntoFromInst(Function<br>
ValueToValueMapTy &VMap, bool ModuleLevelChanges,<br>
SmallVectorImpl<ReturnInst *> &Returns,<br>
const char *NameSuffix = "",<br>
- ClonedCodeInfo *CodeInfo = nullptr);<br>
+ ClonedCodeInfo *CodeInfo = nullptr,<br>
+ BlockCloningFunctor Ftor = nullptr);<br>
<br>
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,<br>
/// except that it does some simple constant prop and DCE on the fly. The<br>
@@ -172,23 +176,27 @@ void CloneAndPruneIntoFromInst(Function<br>
///<br>
void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,<br>
ValueToValueMapTy &VMap, bool ModuleLevelChanges,<br>
- SmallVectorImpl<ReturnInst*> &Returns,<br>
+ SmallVectorImpl<ReturnInst *> &Returns,<br>
const char *NameSuffix = "",<br>
ClonedCodeInfo *CodeInfo = nullptr,<br>
- Instruction *TheCall = nullptr);<br>
+ Instruction *TheCall = nullptr,<br>
+ BlockCloningFunctor Ftor = nullptr);<br>
<br>
/// InlineFunctionInfo - This class captures the data input to the<br>
/// InlineFunction call, and records the auxiliary results produced by it.<br>
class InlineFunctionInfo {<br>
public:<br>
explicit InlineFunctionInfo(CallGraph *cg = nullptr,<br>
- AssumptionCacheTracker *ACT = nullptr)<br>
- : CG(cg), ACT(ACT) {}<br>
+ AssumptionCacheTracker *ACT = nullptr,<br>
+ BlockCloningFunctor Ftor = nullptr)<br>
+ : CG(cg), ACT(ACT), Ftor(Ftor) {}<br>
<br>
/// CG - If non-null, InlineFunction will update the callgraph to reflect the<br>
/// changes it makes.<br>
CallGraph *CG;<br>
AssumptionCacheTracker *ACT;<br>
+ // Functor that is invoked when a block is cloned into the new function.<br>
+ BlockCloningFunctor Ftor;<br>
<br>
/// StaticAllocas - InlineFunction fills this in with all static allocas that<br>
/// get copied into the caller.<br>
<br>
Modified: llvm/trunk/lib/Analysis/InlineCost.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)<br>
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Thu Mar 3 12:26:33 2016<br>
@@ -18,13 +18,18 @@<br>
#include "llvm/ADT/SmallVector.h"<br>
#include "llvm/ADT/Statistic.h"<br>
#include "llvm/Analysis/AssumptionCache.h"<br>
+#include "llvm/Analysis/BlockFrequencyInfo.h"<br>
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"<br>
+#include "llvm/Analysis/BranchProbabilityInfo.h"<br>
#include "llvm/Analysis/CodeMetrics.h"<br>
#include "llvm/Analysis/ConstantFolding.h"<br>
#include "llvm/Analysis/InstructionSimplify.h"<br>
+#include "llvm/Analysis/LoopInfo.h"<br>
#include "llvm/Analysis/TargetTransformInfo.h"<br>
#include "llvm/IR/CallSite.h"<br>
#include "llvm/IR/CallingConv.h"<br>
#include "llvm/IR/DataLayout.h"<br>
+#include "llvm/IR/Dominators.h"<br>
#include "llvm/IR/GetElementPtrTypeIterator.h"<br>
#include "llvm/IR/GlobalAlias.h"<br>
#include "llvm/IR/InstVisitor.h"<br>
@@ -85,6 +90,7 @@ class CallAnalyzer : public InstVisitor<<br>
// easily cacheable. Instead, use the cover function paramHasAttr.<br>
CallSite CandidateCS;<br>
<br>
+ BlockFrequencyAnalysis *BFA;<br>
int Threshold;<br>
int Cost;<br>
<br>
@@ -153,6 +159,8 @@ class CallAnalyzer : public InstVisitor<<br>
/// passed to support analyzing indirect calls whose target is inferred by<br>
/// analysis.<br>
void updateThreshold(CallSite CS, Function &Callee);<br>
+ /// Adjust Threshold based on CallSiteCount and return the adjusted threshold.<br>
+ int getAdjustedThreshold(int Threshold, Optional<uint64_t> CallSiteCount);<br>
<br>
// Custom analysis routines.<br>
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);<br>
@@ -194,17 +202,19 @@ class CallAnalyzer : public InstVisitor<<br>
<br>
public:<br>
CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,<br>
- Function &Callee, int Threshold, CallSite CSArg)<br>
- : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),<br>
- Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),<br>
- ExposesReturnsTwice(false), HasDynamicAlloca(false),<br>
- ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),<br>
- HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),<br>
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),<br>
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),<br>
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),<br>
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),<br>
- SROACostSavings(0), SROACostSavingsLost(0) {}<br>
+ Function &Callee, int Threshold, CallSite CSArg,<br>
+ BlockFrequencyAnalysis *BFA)<br>
+ : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), BFA(BFA),<br>
+ Threshold(Threshold), Cost(0), IsCallerRecursive(false),<br>
+ IsRecursiveCall(false), ExposesReturnsTwice(false),<br>
+ HasDynamicAlloca(false), ContainsNoDuplicateCall(false),<br>
+ HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),<br>
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),<br>
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),<br>
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),<br>
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),<br>
+ NumInstructionsSimplified(0), SROACostSavings(0),<br>
+ SROACostSavingsLost(0) {}<br>
<br>
bool analyzeCall(CallSite CS);<br>
<br>
@@ -572,6 +582,15 @@ bool CallAnalyzer::isKnownNonNullInCalle<br>
return false;<br>
}<br>
<br>
+// Adjust the threshold based on callsite hotness. Currently this is a nop.<br>
+int CallAnalyzer::getAdjustedThreshold(int Threshold,<br>
+ Optional<uint64_t> CallSiteCount<br>
+ __attribute__((unused))) {<br>
+ // FIXME: The new threshold should be computed from the given Threshold and<br>
+ // the callsite hotness.<br>
+ return Threshold;<br>
+}<br>
+<br>
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {<br>
// If -inline-threshold is not given, listen to the optsize and minsize<br>
// attributes when they would decrease the threshold.<br>
@@ -596,6 +615,9 @@ void CallAnalyzer::updateThreshold(CallS<br>
FunctionCount = Callee.getEntryCount().getValue();<br>
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();<br>
}<br>
+ Optional<uint64_t> CallSiteCount =<br>
+ llvm::getBlockCount(CS.getInstruction()->getParent(), BFA);<br>
+ Threshold = getAdjustedThreshold(Threshold, CallSiteCount);<br>
<br>
// Listen to the inlinehint attribute or profile based hotness information<br>
// when it would increase the threshold and the caller does not need to<br>
@@ -912,7 +934,8 @@ bool CallAnalyzer::visitCallSite(CallSit<br>
// during devirtualization and so we want to give it a hefty bonus for<br>
// inlining, but cap that bonus in the event that inlining wouldn't pan<br>
// out. Pretend to inline the function, with a custom threshold.<br>
- CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);<br>
+ CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS,<br>
+ BFA);<br>
if (CA.analyzeCall(CS)) {<br>
// We were able to inline the indirect call! Subtract the cost from the<br>
// threshold to get the bonus we want to apply, but don't go below zero.<br>
@@ -1433,9 +1456,10 @@ static bool functionsHaveCompatibleAttri<br>
<br>
InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,<br>
TargetTransformInfo &CalleeTTI,<br>
- AssumptionCacheTracker *ACT) {<br>
+ AssumptionCacheTracker *ACT,<br>
+ BlockFrequencyAnalysis *BFA) {<br>
return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,<br>
- ACT);<br>
+ ACT, BFA);<br>
}<br>
<br>
int llvm::computeThresholdFromOptLevels(unsigned OptLevel,<br>
@@ -1454,7 +1478,8 @@ int llvm::getDefaultInlineThreshold() {<br>
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,<br>
int DefaultThreshold,<br>
TargetTransformInfo &CalleeTTI,<br>
- AssumptionCacheTracker *ACT) {<br>
+ AssumptionCacheTracker *ACT,<br>
+ BlockFrequencyAnalysis *BFA) {<br>
<br>
// Cannot inline indirect calls.<br>
if (!Callee)<br>
@@ -1487,7 +1512,7 @@ InlineCost llvm::getInlineCost(CallSite<br>
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()<br>
<< "...\n");<br>
<br>
- CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);<br>
+ CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS, BFA);<br>
bool ShouldInline = CA.analyzeCall(CS);<br>
<br>
DEBUG(CA.dump());<br>
@@ -1535,3 +1560,45 @@ bool llvm::isInlineViable(Function &F) {<br>
<br>
return true;<br>
}<br>
+<br>
+/// \brief Get estimated execution count for \p BB.<br>
+Optional<uint64_t> llvm::getBlockCount(BasicBlock *BB,<br>
+ BlockFrequencyAnalysis *BFA) {<br>
+ if (!BFA)<br>
+ return None;<br>
+ Function *F = BB->getParent();<br>
+ Optional<uint64_t> EntryCount = F->getEntryCount();<br>
+ if (!EntryCount)<br>
+ return None;<br>
+ BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);<br>
+ uint64_t BBFreq = BFI->getBlockFreq(BB).getFrequency();<br>
+ uint64_t FunctionEntryFreq = BFI->getEntryFreq();<br>
+ uint64_t BBCount = EntryCount.getValue() * BBFreq / FunctionEntryFreq;<br>
+ return BBCount;<br>
+}<br>
+<br>
+BlockFrequencyAnalysis::~BlockFrequencyAnalysis() {<br>
+ for (auto &Entry : BFM) {<br>
+ delete Entry.second;<br>
+ }<br>
+}<br>
+<br>
+/// \brief Get BlockFrequencyInfo for a function.<br>
+BlockFrequencyInfo *BlockFrequencyAnalysis::getBlockFrequencyInfo(Function *F) {<br>
+ auto Iter = BFM.find(F);<br>
+ if (Iter != BFM.end())<br>
+ return Iter->second;<br>
+ // We need to create a BlockFrequencyInfo object for F and store it.<br>
+ DominatorTree DT;<br>
+ DT.recalculate(*F);<br>
+ LoopInfo LI(DT);<br>
+ BranchProbabilityInfo BPI(*F, LI);<br>
+ BlockFrequencyInfo *BFI = new BlockFrequencyInfo(*F, BPI, LI);<br>
+ BFM[F] = BFI;<br>
+ return BFI;<br>
+}<br>
+<br>
+/// \brief Invalidate BlockFrequencyInfo for a function.<br>
+void BlockFrequencyAnalysis::invalidateBlockFrequencyInfo(Function *F) {<br>
+ BFM.erase(F);<br>
+}<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Thu Mar 3 12:26:33 2016<br>
@@ -59,7 +59,8 @@ public:<br>
InlineCost getInlineCost(CallSite CS) override {<br>
Function *Callee = CS.getCalledFunction();<br>
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);<br>
- return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);<br>
+ return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT,<br>
+ HasProfileData ? BFA.get() : nullptr);<br>
}<br>
<br>
bool runOnSCC(CallGraphSCC &SCC) override;<br>
<br>
Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Thu Mar 3 12:26:33 2016<br>
@@ -19,6 +19,7 @@<br>
#include "llvm/Analysis/AliasAnalysis.h"<br>
#include "llvm/Analysis/AssumptionCache.h"<br>
#include "llvm/Analysis/BasicAliasAnalysis.h"<br>
+#include "llvm/Analysis/BlockFrequencyInfo.h"<br>
#include "llvm/Analysis/CallGraph.h"<br>
#include "llvm/Analysis/InlineCost.h"<br>
#include "llvm/Analysis/TargetLibraryInfo.h"<br>
@@ -47,10 +48,13 @@ STATISTIC(NumMergedAllocas, "Number of a<br>
// if those would be more profitable and blocked inline steps.<br>
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");<br>
<br>
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}<br>
+Inliner::Inliner(char &ID)<br>
+ : CallGraphSCCPass(ID), InsertLifetime(true),<br>
+ BFA(new BlockFrequencyAnalysis()) {}<br>
<br>
Inliner::Inliner(char &ID, bool InsertLifetime)<br>
- : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}<br>
+ : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime),<br>
+ BFA(new BlockFrequencyAnalysis()) {}<br>
<br>
/// For this class, we declare that we require and preserve the call graph.<br>
/// If the derived class implements this method, it should<br>
@@ -259,7 +263,7 @@ bool Inliner::shouldInline(CallSite CS)<br>
Twine(IC.getCostDelta() + IC.getCost()) + ")");<br>
return false;<br>
}<br>
-<br>
+<br>
// Try to detect the case where the current inlining candidate caller (call<br>
// it B) is a static or linkonce-ODR function and is an inlining candidate<br>
// elsewhere, and the current candidate callee (call it C) is large enough<br>
@@ -356,8 +360,90 @@ static bool InlineHistoryIncludes(Functi<br>
return false;<br>
}<br>
<br>
+/// \brief Update the frequency of a block that is cloned into the caller.<br>
+/// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in<br>
+/// the caller.<br>
+void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,<br>
+ const BasicBlock *NewBB) {<br>
+ if (!HasProfileData)<br>
+ return;<br>
+ Instruction *Call = CS.getInstruction();<br>
+ BasicBlock *CallBB = Call->getParent();<br>
+ BlockFrequencyInfo *CalleeBFI =<br>
+ BFA->getBlockFrequencyInfo(CS.getCalledFunction());<br>
+ BlockFrequencyInfo *CallerBFI =<br>
+ BFA->getBlockFrequencyInfo(CallBB->getParent());<br>
+ // Find the number of times OrigBB is executed per invocation of the callee<br>
+ // and multiply by the number of times callee is executed in the caller.<br>
+ // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.<br>
+ uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency();<br>
+ uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq();<br>
+ // Frequency of OrigBB in the callee.<br>
+ BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB);<br>
+ CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) /<br>
+ CalleeEntryFreq * CallSiteFreq);<br>
+}<br>
+<br>
+/// \brief Update entry count of \p Callee after it got inlined at a callsite<br>
+/// in block \p CallBB.<br>
+void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) {<br>
+ if (!HasProfileData)<br>
+ return;<br>
+ // If the callee has a original count of N, and the estimated count of<br>
+ // callsite is M, the new callee count is set to N - M. M is estimated from<br>
+ // the caller's entry count, its entry block frequency and the block frequency<br>
+ // of the callsite.<br>
+ Optional<uint64_t> CalleeCount = Callee->getEntryCount();<br>
+ if (!CalleeCount)<br>
+ return;<br>
+ Optional<uint64_t> CallSiteCount = llvm::getBlockCount(CallBB, BFA.get());<br>
+ if (!CallSiteCount)<br>
+ return;<br>
+ // Since CallSiteCount is an estimate, it could exceed the original callee<br>
+ // count and has to be set to 0.<br>
+ if (CallSiteCount.getValue() > CalleeCount.getValue()) {<br>
+ Callee->setEntryCount(0);<br>
+ DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName()<br>
+ << " is " << CallSiteCount.getValue()<br>
+ << " which exceeds the entry count "<br>
+ << CalleeCount.getValue() << " of the callee "<br>
+ << Callee->getName() << "\n");<br>
+ } else<br>
+ Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());<br>
+}<br>
+<br>
+void Inliner::invalidateBFI(Function *F) {<br>
+ if (!HasProfileData)<br>
+ return;<br>
+ if (F)<br>
+ BFA->invalidateBlockFrequencyInfo(F);<br>
+}<br>
+void Inliner::invalidateBFI(CallGraphSCC &SCC) {<br>
+ if (!HasProfileData)<br>
+ return;<br>
+ for (CallGraphNode *Node : SCC) {<br>
+ Function *F = Node->getFunction();<br>
+ invalidateBFI(F);<br>
+ }<br>
+}<br>
+void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) {<br>
+ if (!HasProfileData)<br>
+ return;<br>
+ Function *F = Src->getParent();<br>
+ BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);<br>
+ BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency());<br>
+}<br>
+<br>
+static bool hasProfileData(Module &M) {<br>
+ // We check for the presence of MaxFunctionCount in the module.<br>
+ // FIXME: This now only works for frontend based instrumentation.<br>
+ return M.getMaximumFunctionCount().hasValue();<br>
+}<br>
+<br>
bool Inliner::runOnSCC(CallGraphSCC &SCC) {<br>
+ using namespace std::placeholders;<br>
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();<br>
+ HasProfileData = hasProfileData(CG.getModule());<br>
ACT = &getAnalysis<AssumptionCacheTracker>();<br>
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();<br>
<br>
@@ -419,7 +505,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
<br>
<br>
InlinedArrayAllocasTy InlinedArrayAllocas;<br>
- InlineFunctionInfo InlineInfo(&CG, ACT);<br>
<br>
// Now that we have all of the call sites, loop over them and inline them if<br>
// it looks profitable to do so.<br>
@@ -448,6 +533,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
CS.getInstruction()->eraseFromParent();<br>
++NumCallsDeleted;<br>
} else {<br>
+ Instruction *TheCall = CS.getInstruction();<br></blockquote><br></div></div><div>In test/Transforms/Inline/inline-tail.ll, ASAN is telling me that there is a use after free here. Presumably, CS has already been inlined/deleted?</div><div><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ BasicBlock *CallSiteBlock = TheCall->getParent();<br>
+ Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall));</blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+<br>
// We can only inline direct calls to non-declarations.<br>
if (!Callee || Callee->isDeclaration()) continue;<br>
<br>
@@ -476,6 +565,11 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
continue;<br>
}<br>
<br>
+ BlockCloningFunctor BCF = nullptr;<br>
+ if (HasProfileData)<br>
+ BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2);<br>
+ InlineFunctionInfo InlineInfo(&CG, ACT, BCF);<br>
+<br>
// Attempt to inline the function.<br>
if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,<br>
InlineHistoryID, InsertLifetime)) {<br>
@@ -485,6 +579,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
Caller->getName()));<br>
continue;<br>
}<br>
+ updateEntryCount(CallSiteBlock, Callee);<br>
+ // The instruction following the call is part of a new basic block<br>
+ // created during the inlining process. This does not have an entry in<br>
+ // the BFI. We create an entry by copying the frequency of the original<br>
+ // block containing the call.<br>
+ copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent());<br>
+<br>
++NumInlined;<br>
<br>
// Report the inline decision.<br>
@@ -523,7 +624,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
CalleeNode->removeAllCalledFunctions();<br>
<br>
// Removing the node for callee from the call graph and delete it.<br>
- delete CG.removeFunctionFromModule(CalleeNode);<br>
+ Function *F = CG.removeFunctionFromModule(CalleeNode);<br>
+ invalidateBFI(F);<br>
+ delete F;<br>
++NumDeleted;<br>
}<br>
<br>
@@ -544,6 +647,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC<br>
}<br>
} while (LocalChange);<br>
<br>
+ invalidateBFI(SCC);<br>
return Changed;<br>
}<br>
<br>
@@ -651,7 +755,9 @@ bool Inliner::removeDeadFunctions(CallGr<br>
FunctionsToRemove.end()),<br>
FunctionsToRemove.end());<br>
for (CallGraphNode *CGN : FunctionsToRemove) {<br>
- delete CG.removeFunctionFromModule(CGN);<br>
+ Function *F = CG.removeFunctionFromModule(CGN);<br>
+ invalidateBFI(F);<br>
+ delete F;<br>
++NumDeleted;<br>
}<br>
return true;<br>
<br>
Modified: llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp Thu Mar 3 12:26:33 2016<br>
@@ -277,9 +277,10 @@ namespace {<br>
<br>
/// The specified block is found to be reachable, clone it and<br>
/// anything that it can reach.<br>
- void CloneBlock(const BasicBlock *BB,<br>
+ void CloneBlock(const BasicBlock *BB,<br>
BasicBlock::const_iterator StartingInst,<br>
- std::vector<const BasicBlock*> &ToClone);<br>
+ std::vector<const BasicBlock *> &ToClone,<br>
+ BlockCloningFunctor Ftor = nullptr);<br>
};<br>
}<br>
<br>
@@ -287,7 +288,8 @@ namespace {<br>
/// anything that it can reach.<br>
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,<br>
BasicBlock::const_iterator StartingInst,<br>
- std::vector<const BasicBlock*> &ToClone){<br>
+ std::vector<const BasicBlock *> &ToClone,<br>
+ BlockCloningFunctor Ftor) {<br>
WeakVH &BBEntry = VMap[BB];<br>
<br>
// Have we already cloned this block?<br>
@@ -424,18 +426,19 @@ void PruningFunctionCloner::CloneBlock(c<br>
CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&<br>
BB != &BB->getParent()->front();<br>
}<br>
+ // Call Ftor to tell BB has been cloned to NewBB<br>
+ if (Ftor)<br>
+ Ftor(BB, NewBB);<br>
}<br>
<br>
/// This works like CloneAndPruneFunctionInto, except that it does not clone the<br>
/// entire function. Instead it starts at an instruction provided by the caller<br>
/// and copies (and prunes) only the code reachable from that instruction.<br>
-void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,<br>
- const Instruction *StartingInst,<br>
- ValueToValueMapTy &VMap,<br>
- bool ModuleLevelChanges,<br>
- SmallVectorImpl<ReturnInst *> &Returns,<br>
- const char *NameSuffix,<br>
- ClonedCodeInfo *CodeInfo) {<br>
+void llvm::CloneAndPruneIntoFromInst(<br>
+ Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst,<br>
+ ValueToValueMapTy &VMap, bool ModuleLevelChanges,<br>
+ SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix,<br>
+ ClonedCodeInfo *CodeInfo, BlockCloningFunctor Ftor) {<br>
assert(NameSuffix && "NameSuffix cannot be null!");<br>
<br>
ValueMapTypeRemapper *TypeMapper = nullptr;<br>
@@ -461,11 +464,11 @@ void llvm::CloneAndPruneIntoFromInst(Fun<br>
<br>
// Clone the entry block, and anything recursively reachable from it.<br>
std::vector<const BasicBlock*> CloneWorklist;<br>
- PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);<br>
+ PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist, Ftor);<br>
while (!CloneWorklist.empty()) {<br>
const BasicBlock *BB = CloneWorklist.back();<br>
CloneWorklist.pop_back();<br>
- PFC.CloneBlock(BB, BB->begin(), CloneWorklist);<br>
+ PFC.CloneBlock(BB, BB->begin(), CloneWorklist, Ftor);<br>
}<br>
<br>
// Loop over all of the basic blocks in the old function. If the block was<br>
@@ -667,15 +670,14 @@ void llvm::CloneAndPruneIntoFromInst(Fun<br>
/// constant arguments cause a significant amount of code in the callee to be<br>
/// dead. Since this doesn't produce an exact copy of the input, it can't be<br>
/// used for things like CloneFunction or CloneModule.<br>
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,<br>
- ValueToValueMapTy &VMap,<br>
- bool ModuleLevelChanges,<br>
- SmallVectorImpl<ReturnInst*> &Returns,<br>
- const char *NameSuffix,<br>
- ClonedCodeInfo *CodeInfo,<br>
- Instruction *TheCall) {<br>
+void llvm::CloneAndPruneFunctionInto(<br>
+ Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,<br>
+ bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,<br>
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall,<br>
+ BlockCloningFunctor Ftor) {<br>
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,<br>
- ModuleLevelChanges, Returns, NameSuffix, CodeInfo);<br>
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo,<br>
+ Ftor);<br>
}<br>
<br>
/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.<br>
<br>
Modified: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=262636&r1=262635&r2=262636&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp Thu Mar 3 12:26:33 2016<br>
@@ -1319,7 +1319,7 @@ bool llvm::InlineFunction(CallSite CS, I<br>
<br>
// If IFI has any state in it, zap it before we fill it in.<br>
IFI.reset();<br>
-<br>
+<br>
const Function *CalledFunc = CS.getCalledFunction();<br>
if (!CalledFunc || // Can't inline external function or indirect<br>
CalledFunc->isDeclaration() || // call, or call to a vararg function!<br>
@@ -1486,7 +1486,7 @@ bool llvm::InlineFunction(CallSite CS, I<br>
// happy with whatever the cloner can do.<br>
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,<br>
/*ModuleLevelChanges=*/false, Returns, ".i",<br>
- &InlinedFunctionInfo, TheCall);<br>
+ &InlinedFunctionInfo, TheCall, IFI.Ftor);<br>
<br>
// Remember the first block that is newly cloned over.<br>
FirstNewBlock = LastBlock; ++FirstNewBlock;<br>
<br>
Added: llvm/trunk/test/Transforms/Inline/function-count-update-2.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-2.ll?rev=262636&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-2.ll?rev=262636&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/Inline/function-count-update-2.ll (added)<br>
+++ llvm/trunk/test/Transforms/Inline/function-count-update-2.ll Thu Mar 3 12:26:33 2016<br>
@@ -0,0 +1,27 @@<br>
+; RUN: opt < %s -inline -S | FileCheck %s<br>
+<br>
+; This tests that the function count of a callee gets correctly updated after it<br>
+; has been inlined into a two callsites.<br>
+<br>
+; CHECK: @callee() !prof [[COUNT:![0-9]+]]<br>
+define i32 @callee() !prof !1 {<br>
+ ret i32 0<br>
+}<br>
+<br>
+define i32 @caller1() !prof !2 {<br>
+ %i = call i32 @callee()<br>
+ ret i32 %i<br>
+}<br>
+<br>
+define i32 @caller2() !prof !3 {<br>
+ %i = call i32 @callee()<br>
+ ret i32 %i<br>
+}<br>
+<br>
+!llvm.module.flags = !{!0}<br>
+; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0}<br>
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}<br>
+!1 = !{!"function_entry_count", i64 1000}<br>
+!2 = !{!"function_entry_count", i64 600}<br>
+!3 = !{!"function_entry_count", i64 400}<br>
+<br>
<br>
Added: llvm/trunk/test/Transforms/Inline/function-count-update-3.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-3.ll?rev=262636&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update-3.ll?rev=262636&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/Inline/function-count-update-3.ll (added)<br>
+++ llvm/trunk/test/Transforms/Inline/function-count-update-3.ll Thu Mar 3 12:26:33 2016<br>
@@ -0,0 +1,69 @@<br>
+; RUN: opt < %s -inline -S -inline-threshold=50 | FileCheck %s<br>
+<br>
+; This tests that the function count of a function gets properly scaled after<br>
+; inlining a call chain leading to the function.<br>
+; Function a calls c with count 200 (C1)<br>
+; Function b calls c with count 300<br>
+; Function c calls e with count 250 (C2)<br>
+; Entry count of e is 500 (C3)<br>
+; c->e inlining does not happen since the cost exceeds threshold.<br>
+; c then inlined into a.<br>
+; e now gets inlined into a (through c) since the branch condition in e is now<br>
+; known and hence the cost gets reduced.<br>
+; Estimated count of a->e callsite = C2 * (C1 / C3)<br>
+; Estimated count of a->e callsite = 250 * (200 / 500) = 100<br>
+; Remaining count of e = C3 - 100 = 500 - 100 = 400<br>
+<br>
+@data = external global i32<br>
+<br>
+define i32 @a(i32 %a1) !prof !1 {<br>
+ %a2 = call i32 @c(i32 %a1, i32 1)<br>
+ ret i32 %a2<br>
+}<br>
+<br>
+define i32 @b(i32 %b1) !prof !2 {<br>
+ %b2 = call i32 @c(i32 %b1, i32 %b1)<br>
+ ret i32 %b2<br>
+}<br>
+<br>
+define i32 @c(i32 %c1, i32 %c100) !prof !3 {<br>
+ %cond = icmp sle i32 %c1, 1<br>
+ br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_false:<br>
+ ret i32 0<br>
+<br>
+cond_true:<br>
+ %c11 = call i32 @e(i32 %c100)<br>
+ ret i32 %c11<br>
+}<br>
+<br>
+; CHECK: @e(i32 %c1) !prof [[COUNT:![0-9]+]]<br>
+define i32 @e(i32 %c1) !prof !4 {<br>
+ %cond = icmp sle i32 %c1, 1<br>
+ br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_false:<br>
+ %c2 = load i32, i32* @data, align 4<br>
+ %c3 = add i32 %c1, %c2<br>
+ %c4 = mul i32 %c3, %c2<br>
+ %c5 = add i32 %c4, %c2<br>
+ %c6 = mul i32 %c5, %c2<br>
+ %c7 = add i32 %c6, %c2<br>
+ %c8 = mul i32 %c7, %c2<br>
+ %c9 = add i32 %c8, %c2<br>
+ %c10 = mul i32 %c9, %c2<br>
+ ret i32 %c10<br>
+<br>
+cond_true:<br>
+ ret i32 0<br>
+}<br>
+<br>
+!llvm.module.flags = !{!0}<br>
+; CHECK: [[COUNT]] = !{!"function_entry_count", i64 400}<br>
+!0 = !{i32 1, !"MaxFunctionCount", i32 5000}<br>
+!1 = !{!"function_entry_count", i64 200}<br>
+!2 = !{!"function_entry_count", i64 300}<br>
+!3 = !{!"function_entry_count", i64 500}<br>
+!4 = !{!"function_entry_count", i64 500}<br>
+<br>
<br>
Added: llvm/trunk/test/Transforms/Inline/function-count-update.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update.ll?rev=262636&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/function-count-update.ll?rev=262636&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/Transforms/Inline/function-count-update.ll (added)<br>
+++ llvm/trunk/test/Transforms/Inline/function-count-update.ll Thu Mar 3 12:26:33 2016<br>
@@ -0,0 +1,51 @@<br>
+; RUN: opt < %s -inline -S | FileCheck %s<br>
+; RUN: opt < %s -always-inline -S | FileCheck %s<br>
+<br>
+; This tests that the function count of two callees get correctly updated after<br>
+; they have been inlined into two back-to-back callsites in a single basic block<br>
+; in the caller. The callees have the alwaysinline attribute and so they get<br>
+; inlined both with the regular inliner pass and the always inline pass. In<br>
+; both cases, the new count of each callee is the original count minus callsite<br>
+; count which is 200 (since the caller's entry count is 400 and the block<br>
+; containing the calls have a relative block frequency of 0.5).<br>
+<br>
+; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]]<br>
+define i32 @callee1(i32 %n) #0 !prof !1 {<br>
+ %cond = icmp sle i32 %n, 10<br>
+ br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_true:<br>
+ %r1 = add i32 %n, 1<br>
+ ret i32 %r1<br>
+cond_false:<br>
+ %r2 = add i32 %n, 2<br>
+ ret i32 %r2<br>
+}<br>
+<br>
+; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]]<br>
+define i32 @callee2(i32 %n) #0 !prof !2 {<br>
+ %r1 = add i32 %n, 1<br>
+ ret i32 %r1<br>
+}<br>
+<br>
+define i32 @caller(i32 %n) !prof !3 {<br>
+ %cond = icmp sle i32 %n, 100<br>
+ br i1 %cond, label %cond_true, label %cond_false<br>
+<br>
+cond_true:<br>
+ %i = call i32 @callee1(i32 %n)<br>
+ %j = call i32 @callee2(i32 %i)<br>
+ ret i32 %j<br>
+cond_false:<br>
+ ret i32 0<br>
+}<br>
+<br>
+!llvm.module.flags = !{!0}<br>
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800}<br>
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800}<br>
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}<br>
+!1 = !{!"function_entry_count", i64 1000}<br>
+!2 = !{!"function_entry_count", i64 2000}<br>
+!3 = !{!"function_entry_count", i64 400}<br>
+attributes #0 = { alwaysinline }<br>
+<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div>