[llvm] r311371 - [InlineCost] Add cl::opt to allow full inline cost to be computed for debugging purposes.

Mon Aug 21 13:00:09 PDT 2017

Author: haicheng
Date: Mon Aug 21 13:00:09 2017
New Revision: 311371

URL: http://llvm.org/viewvc/llvm-project?rev=311371&view=rev
Log:
[InlineCost] Add cl::opt to allow full inline cost to be computed for debugging purposes.

Currently, the inline cost model will bail once the inline cost exceeds the
inline threshold in order to avoid unnecessary compile-time. However, when
debugging it is useful to compute the full cost, so this command line option
is added to override the default behavior.

I took over this work from Chad Rosier (mcrosier at codeaurora.org).

Differential Revision: https://reviews.llvm.org/D35850

Modified:
    llvm/trunk/include/llvm/Analysis/InlineCost.h
    llvm/trunk/lib/Analysis/InlineCost.cpp
    llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp
    llvm/trunk/lib/Transforms/IPO/Inliner.cpp
    llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp

Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=311371&r1=311370&r2=311371&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/InlineCost.h (original)
+++ llvm/trunk/include/llvm/Analysis/InlineCost.h Mon Aug 21 13:00:09 2017
@@ -16,6 +16,7 @@
 
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include <cassert>
 #include <climits>
 
@@ -185,12 +186,11 @@ int getCallsiteCost(CallSite CS, const D
 ///
 /// Also note that calling this function *dynamically* computes the cost of
 /// inlining the callsite. It is an expensive, heavyweight call.
-InlineCost
-getInlineCost(CallSite CS, const InlineParams &Params,
-              TargetTransformInfo &CalleeTTI,
-              std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
-              Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
-              ProfileSummaryInfo *PSI);
+InlineCost getInlineCost(
+    CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
 
 /// \brief Get an InlineCost with the callee explicitly specified.
 /// This allows you to calculate the cost of inlining a function via a
@@ -202,7 +202,7 @@ getInlineCost(CallSite CS, Function *Cal
               TargetTransformInfo &CalleeTTI,
               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
               Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
-              ProfileSummaryInfo *PSI);
+              ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
 
 /// \brief Minimal filter to detect invalid constructs for inlining.
 bool isInlineViable(Function &Callee);

Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=311371&r1=311370&r2=311371&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Mon Aug 21 13:00:09 2017
@@ -82,6 +82,11 @@ static cl::opt<int> HotCallSiteRelFreq(
              "entry frequency, for a callsite to be hot in the absence of "
              "profile information."));
 
+static cl::opt<bool> ComputeFullInlineCost(
+    "inline-cost-full", cl::Hidden, cl::init(false),
+    cl::desc("Compute the full inline cost of a call site even when the cost "
+             "exceeds the threshold."));
+
 namespace {
 
 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -106,6 +111,9 @@ class CallAnalyzer : public InstVisitor<
   // Cache the DataLayout since we use it a lot.
   const DataLayout &DL;
 
+  /// The OptimizationRemarkEmitter available for this compilation.
+  OptimizationRemarkEmitter *ORE;
+
   /// The candidate callsite being analyzed. Please do not use this to do
   /// analysis in the caller function; we want the inline cost query to be
   /// easily cacheable. Instead, use the cover function paramHasAttr.
@@ -243,10 +251,10 @@ public:
   CallAnalyzer(const TargetTransformInfo &TTI,
                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
-               ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
-               const InlineParams &Params)
+               ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
+               Function &Callee, CallSite CSArg, const InlineParams &Params)
       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
-        PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
+        PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
         Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
@@ -1138,7 +1146,7 @@ bool CallAnalyzer::visitCallSite(CallSit
   // out. Pretend to inline the function, with a custom threshold.
   auto IndirectCallParams = Params;
   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
-  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
+  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
                   IndirectCallParams);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
@@ -1198,7 +1206,7 @@ bool CallAnalyzer::visitSwitchInst(Switc
       std::min((int64_t)CostUpperBound,
                (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
 
-  if (CostLowerBound > Threshold) {
+  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
     Cost = CostLowerBound;
     return false;
   }
@@ -1347,21 +1355,36 @@ bool CallAnalyzer::analyzeBlock(BasicBlo
     else
       Cost += InlineConstants::InstrCost;
 
+    using namespace ore;
     // If the visit this instruction detected an uninlinable pattern, abort.
     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-        HasIndirectBr || HasFrameEscape)
+        HasIndirectBr || HasFrameEscape) {
+      if (ORE)
+        ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
+                                           CandidateCS.getInstruction())
+                  << NV("Callee", &F)
+                  << " has uninlinable pattern and cost is not fully computed");
       return false;
+    }
 
     // If the caller is a recursive function then we don't want to inline
     // functions which allocate a lot of stack space because it would increase
     // the caller stack usage dramatically.
     if (IsCallerRecursive &&
-        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+      if (ORE)
+        ORE->emit(
+            OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
+                                     CandidateCS.getInstruction())
+            << NV("Callee", &F)
+            << " is recursive and allocates too much stack space. Cost is "
+               "not fully computed");
       return false;
+    }
 
     // Check if we've past the maximum possible threshold so we don't spin in
     // huge basic blocks that will never inline.
-    if (Cost > Threshold)
+    if (Cost > Threshold && !ComputeFullInlineCost)
       return false;
   }
 
@@ -1447,7 +1470,7 @@ bool CallAnalyzer::analyzeCall(CallSite
     Cost += InlineConstants::ColdccPenalty;
 
   // Check if we're done. This can happen due to bonuses and penalties.
-  if (Cost > Threshold)
+  if (Cost > Threshold && !ComputeFullInlineCost)
     return false;
 
   if (F.empty())
@@ -1513,7 +1536,7 @@ bool CallAnalyzer::analyzeCall(CallSite
   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
     // Bail out the moment we cross the threshold. This means we'll under-count
     // the cost, but only when undercounting doesn't matter.
-    if (Cost > Threshold)
+    if (Cost > Threshold && !ComputeFullInlineCost)
       break;
 
     BasicBlock *BB = BBWorklist[Idx];
@@ -1657,9 +1680,9 @@ InlineCost llvm::getInlineCost(
     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
-    ProfileSummaryInfo *PSI) {
+    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
-                       GetAssumptionCache, GetBFI, PSI);
+                       GetAssumptionCache, GetBFI, PSI, ORE);
 }
 
 InlineCost llvm::getInlineCost(
@@ -1667,7 +1690,7 @@ InlineCost llvm::getInlineCost(
     TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
-    ProfileSummaryInfo *PSI) {
+    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
 
   // Cannot inline indirect calls.
   if (!Callee)
@@ -1699,10 +1722,13 @@ InlineCost llvm::getInlineCost(
       CS.isNoInline())
     return llvm::InlineCost::getNever();
 
+  if (ORE)
+    ComputeFullInlineCost = true;
+
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                      << "... (caller:" << Caller->getName() << ")\n");
 
-  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
+  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
                   Params);
   bool ShouldInline = CA.analyzeCall(CS);
 

Modified: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=311371&r1=311370&r2=311371&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Mon Aug 21 13:00:09 2017
@@ -57,12 +57,13 @@ public:
   InlineCost getInlineCost(CallSite CS) override {
     Function *Callee = CS.getCalledFunction();
     TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+    OptimizationRemarkEmitter ORE(CS.getCaller());
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&](Function &F) -> AssumptionCache & {
       return ACT->getAssumptionCache(F);
     };
     return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
-                               /*GetBFI=*/None, PSI);
+                               /*GetBFI=*/None, PSI, &ORE);
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;

Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=311371&r1=311370&r2=311371&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Mon Aug 21 13:00:09 2017
@@ -845,6 +845,10 @@ PreservedAnalyses InlinerPass::run(LazyC
     FunctionAnalysisManager &FAM =
         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
             .getManager();
+
+    // Get the remarks emission analysis for the caller.
+    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&](Function &F) -> AssumptionCache & {
       return FAM.getResult<AssumptionAnalysis>(F);
@@ -857,12 +861,9 @@ PreservedAnalyses InlinerPass::run(LazyC
       Function &Callee = *CS.getCalledFunction();
       auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
       return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
-                           PSI);
+                           PSI, &ORE);
     };
 
-    // Get the remarks emission analysis for the caller.
-    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
     // Now process as many calls as we have within this caller in the sequnece.
     // We bail out as soon as the caller has to change so we can update the
     // call graph and prepare the context of that new caller.

Modified: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=311371&r1=311370&r2=311371&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp Mon Aug 21 13:00:09 2017
@@ -462,7 +462,7 @@ bool PartialInlinerImpl::shouldPartialIn
   Function *Caller = CS.getCaller();
   auto &CalleeTTI = (*GetTTI)(*Callee);
   InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
-                                *GetAssumptionCache, GetBFI, PSI);
+                                *GetAssumptionCache, GetBFI, PSI, &ORE);
 
   if (IC.isAlways()) {
     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)