[llvm] 9a03058 - [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 00:34:45 PST 2021


Author: Wenlei He
Date: 2021-02-02T00:34:06-08:00
New Revision: 9a03058d6322edb8abc803ba3e436cc62647d979

URL: https://github.com/llvm/llvm-project/commit/9a03058d6322edb8abc803ba3e436cc62647d979
DIFF: https://github.com/llvm/llvm-project/commit/9a03058d6322edb8abc803ba3e436cc62647d979.diff

LOG: [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline

Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO inlining and reduce similar code in the inlining loop, plus minor cleanup for AFDO path.

Test Plan:

Differential Revision: https://reviews.llvm.org/D95024

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
    llvm/test/Transforms/SampleProfile/remarks.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 665c4078f3ee..7865426dff16 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -416,20 +416,18 @@ class SampleProfileLoader {
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
   mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
-  CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName,
-                                   uint64_t &Sum, uint64_t Count, CallBase *I,
-                                   const char *&Reason);
-  bool inlineCallInstruction(CallBase &CB,
-                             const FunctionSamples *CalleeSamples);
+  // Attempt to promote indirect call and also inline the promoted call
+  bool tryPromoteAndInlineCandidate(
+      Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+      DenseSet<Instruction *> &PromotedInsns,
+      SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
-  // Helper functions call-site prioritized BFS inliner
-  // Will change the main FDO inliner to be work list based directly in
-  // upstream, then merge this change with that and remove the duplication.
   InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
   bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
-  bool tryInlineCandidate(InlineCandidate &Candidate,
-                          SmallVector<CallBase *, 8> &InlinedCallSites);
+  bool
+  tryInlineCandidate(InlineCandidate &Candidate,
+                     SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool
   inlineHotFunctionsWithPriority(Function &F,
                                  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -1077,70 +1075,45 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return it.first->second;
 }
 
-CallBase *
-SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName,
-                                            uint64_t &Sum, uint64_t Count,
-                                            CallBase *I, const char *&Reason) {
-  Reason = "Callee function not available";
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F  Caller function.
+/// \param Candidate  ICP and inline candidate.
+/// \param Sum  Sum of target counts for indirect call.
+/// \param PromotedInsns  Map to keep track of indirect call already processed.
+/// \param Candidate  ICP and inline candidate.
+/// \param InlinedCallSite  Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+    Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+    DenseSet<Instruction *> &PromotedInsns,
+    SmallVector<CallBase *, 8> *InlinedCallSite) {
+  const char *Reason = "Callee function not available";
   // R->getValue() != &F is to prevent promoting a recursive call.
   // If it is a recursive call, we do not inline it as it could bloat
   // the code exponentially. There is way to better handle this, e.g.
   // clone the caller first, and inline the cloned caller if it is
   // recursive. As llvm does not inline recursive calls, we will
   // simply ignore it instead of handling it explicitly.
-  auto R = SymbolMap.find(CalleeName);
+  auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
   if (R != SymbolMap.end() && R->getValue() &&
       !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
       R->getValue()->hasFnAttribute("use-sample-profile") &&
-      R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) {
+      R->getValue() != &F &&
+      isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
     auto *DI =
-        &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE);
-    Sum -= Count;
-    return DI;
-  }
-  return nullptr;
-}
-
-bool SampleProfileLoader::inlineCallInstruction(
-    CallBase &CB, const FunctionSamples *CalleeSamples) {
-  if (ExternalInlineAdvisor) {
-    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
+        &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+                                  Candidate.CallsiteCount, Sum, false, ORE);
+    if (DI) {
+      Sum -= Candidate.CallsiteCount;
+      PromotedInsns.insert(Candidate.CallInstr);
+      Candidate.CallInstr = DI;
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
+        return tryInlineCandidate(Candidate, InlinedCallSite);
     }
-    // Dummy record, we don't use it for replay.
-    Advice->recordInlining();
-  }
-
-  Function *CalledFunction = CB.getCalledFunction();
-  assert(CalledFunction);
-  DebugLoc DLoc = CB.getDebugLoc();
-  BasicBlock *BB = CB.getParent();
-  InlineParams Params = getInlineParams();
-  Params.ComputeFullInlineCost = true;
-  // Checks if there is anything in the reachable portion of the callee at
-  // this callsite that makes this inlining potentially illegal. Need to
-  // set ComputeFullInlineCost, otherwise getInlineCost may return early
-  // when cost exceeds threshold without checking all IRs in the callee.
-  // The acutal cost does not matter because we only checks isNever() to
-  // see if it is legal to inline the callsite.
-  InlineCost Cost =
-      getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
-  if (Cost.isNever()) {
-    ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
-              << "incompatible inlining");
-    return false;
-  }
-  InlineFunctionInfo IFI(nullptr, GetAC);
-  if (InlineFunction(CB, IFI).isSuccess()) {
-    // The call to InlineFunction erases I, so we can't pass it here.
-    emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
-                    true, CSINLINE_DEBUG);
-    if (ProfileIsCS)
-      ContextTracker->markContextSamplesInlined(CalleeSamples);
-    ++NumCSInlined;
-    return true;
+  } else {
+    LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+                      << CalleeFunctionName << " because " << Reason << "\n");
   }
   return false;
 }
@@ -1206,10 +1179,11 @@ bool SampleProfileLoader::inlineHotFunctions(
          "ProfAccForSymsInList should be false when profile-sample-accurate "
          "is enabled");
 
-  DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
   bool Changed = false;
-  while (true) {
-    bool LocalChanged = false;
+  bool LocalChanged = true;
+  while (LocalChanged) {
+    LocalChanged = false;
     SmallVector<CallBase *, 10> CIS;
     for (auto &BB : F) {
       bool Hot = false;
@@ -1223,7 +1197,7 @@ bool SampleProfileLoader::inlineHotFunctions(
                    "GUIDToFuncNameMap has to be populated");
             AllCandidates.push_back(CB);
             if (FS->getEntrySamples() > 0 || ProfileIsCS)
-              localNotInlinedCallSites.try_emplace(CB, FS);
+              LocalNotInlinedCallSites.try_emplace(CB, FS);
             if (callsiteIsHot(FS, PSI))
               Hot = true;
             else if (shouldInlineColdCallee(*CB))
@@ -1241,6 +1215,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
+      InlineCandidate Candidate = {I,
+                                   LocalNotInlinedCallSites.count(I)
+                                       ? LocalNotInlinedCallSites[I]
+                                       : nullptr,
+                                   0 /* dummy count */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1257,30 +1236,16 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          const char *Reason = nullptr;
-          auto CalleeFunctionName = FS->getFuncName();
-          if (CallBase *DI =
-                  tryPromoteIndirectCall(F, CalleeFunctionName, Sum,
-                                         FS->getEntrySamples(), I, Reason)) {
-            PromotedInsns.insert(I);
-            // If profile mismatches, we should not attempt to inline DI.
-            if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(cast<CallBase>(*DI), FS)) {
-              localNotInlinedCallSites.erase(I);
-              LocalChanged = true;
-            }
-          } else {
-            LLVM_DEBUG(dbgs()
-                       << "\nFailed to promote indirect call to "
-                       << CalleeFunctionName << " because " << Reason << "\n");
+          Candidate = {I, FS, FS->getEntrySamples()};
+          if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+            LocalNotInlinedCallSites.erase(I);
+            LocalChanged = true;
           }
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I)
-                                          ? localNotInlinedCallSites[I]
-                                          : nullptr)) {
-          localNotInlinedCallSites.erase(I);
+        if (tryInlineCandidate(Candidate)) {
+          LocalNotInlinedCallSites.erase(I);
           LocalChanged = true;
         }
       } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1288,11 +1253,7 @@ bool SampleProfileLoader::inlineHotFunctions(
             InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
       }
     }
-    if (LocalChanged) {
-      Changed = true;
-    } else {
-      break;
-    }
+    Changed |= LocalChanged;
   }
 
   // For CS profile, profile for not inlined context will be merged when
@@ -1301,7 +1262,7 @@ bool SampleProfileLoader::inlineHotFunctions(
     return Changed;
 
   // Accumulate not inlined callsite information into notInlinedSamples
-  for (const auto &Pair : localNotInlinedCallSites) {
+  for (const auto &Pair : LocalNotInlinedCallSites) {
     CallBase *I = Pair.getFirst();
     Function *Callee = I->getCalledFunction();
     if (!Callee || Callee->isDeclaration())
@@ -1347,7 +1308,7 @@ bool SampleProfileLoader::inlineHotFunctions(
 }
 
 bool SampleProfileLoader::tryInlineCandidate(
-    InlineCandidate &Candidate, SmallVector<CallBase *, 8> &InlinedCallSites) {
+    InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
 
   CallBase &CB = *Candidate.CallInstr;
   Function *CalledFunction = CB.getCalledFunction();
@@ -1372,9 +1333,11 @@ bool SampleProfileLoader::tryInlineCandidate(
                     true, CSINLINE_DEBUG);
 
     // Now populate the list of newly exposed call sites.
-    InlinedCallSites.clear();
-    for (auto &I : IFI.InlinedCallSites)
-      InlinedCallSites.push_back(I);
+    if (InlinedCallSites) {
+      InlinedCallSites->clear();
+      for (auto &I : IFI.InlinedCallSites)
+        InlinedCallSites->push_back(I);
+    }
 
     if (ProfileIsCS)
       ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
@@ -1446,18 +1409,16 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
   InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
                                   GetTTI(*Callee), GetAC, GetTLI);
 
+  // Honor always inline and never inline from call analyzer
+  if (Cost.isNever() || Cost.isAlways())
+    return Cost;
+
   // For old FDO inliner, we inline the call site as long as cost is not
   // "Never". The cost-benefit check is done earlier.
   if (!CallsitePrioritizedInline) {
-    if (Cost.isNever())
-      return Cost;
-    return InlineCost::getAlways("hot callsite previously inlined");
+    return InlineCost::get(Cost.getCost(), INT_MAX);
   }
 
-  // Honor always inline and never inline from call analyzer
-  if (Cost.isNever() || Cost.isAlways())
-    return Cost;
-
   // Otherwise only use the cost from call analyzer, but overwite threshold with
   // Sample PGO threshold.
   return InlineCost::get(Cost.getCost(), SampleThreshold);
@@ -1542,34 +1503,23 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
         // fixed, but we generate 
diff erent types).
         if (!PSI->isHotCount(EntryCountDistributed))
           break;
-        const char *Reason = nullptr;
-        auto CalleeFunctionName = FS->getFuncName();
-        if (CallBase *DI = tryPromoteIndirectCall(
-                F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) {
-          // Attach function profile for promoted indirect callee, and update
-          // call site count for the promoted inline candidate too.
-          Candidate = {DI, FS, EntryCountDistributed};
-          PromotedInsns.insert(I);
-          SmallVector<CallBase *, 8> InlinedCallSites;
-          // If profile mismatches, we should not attempt to inline DI.
-          if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-              tryInlineCandidate(Candidate, InlinedCallSites)) {
-            for (auto *CB : InlinedCallSites) {
-              if (getInlineCandidate(&NewCandidate, CB))
-                CQueue.emplace(NewCandidate);
-            }
-            Changed = true;
+        SmallVector<CallBase *, 8> InlinedCallSites;
+        // Attach function profile for promoted indirect callee, and update
+        // call site count for the promoted inline candidate too.
+        Candidate = {I, FS, EntryCountDistributed};
+        if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
+                                         &InlinedCallSites)) {
+          for (auto *CB : InlinedCallSites) {
+            if (getInlineCandidate(&NewCandidate, CB))
+              CQueue.emplace(NewCandidate);
           }
-        } else {
-          LLVM_DEBUG(dbgs()
-                     << "\nFailed to promote indirect call to "
-                     << CalleeFunctionName << " because " << Reason << "\n");
+          Changed = true;
         }
       }
     } else if (CalledFunction && CalledFunction->getSubprogram() &&
                !CalledFunction->isDeclaration()) {
       SmallVector<CallBase *, 8> InlinedCallSites;
-      if (tryInlineCandidate(Candidate, InlinedCallSites)) {
+      if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
         for (auto *CB : InlinedCallSites) {
           if (getInlineCandidate(&NewCandidate, CB))
             CQueue.emplace(NewCandidate);

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index d47359fa0b5f..5359fd4da067 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -89,7 +89,7 @@ if.end:
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '15'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - Threshold:       '2147483647'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          foo

diff  --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 3add1e74abaa..46f016433b20 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -21,7 +21,7 @@
 
 ; We are expecting foo() to be inlined in main() (almost all the cycles are
 ; spent inside foo).
-; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21;
+; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
 ; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
 
 ; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -47,7 +47,7 @@
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '130'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - Threshold:       '2147483647'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          main


        


More information about the llvm-commits mailing list