[llvm] 1645f46 - [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 07:55:25 PST 2021


Author: Wenlei He
Date: 2021-02-02T07:55:08-08:00
New Revision: 1645f465be85223e9f5b6303a3e5e0e491fd819f

URL: https://github.com/llvm/llvm-project/commit/1645f465be85223e9f5b6303a3e5e0e491fd819f
DIFF: https://github.com/llvm/llvm-project/commit/1645f465be85223e9f5b6303a3e5e0e491fd819f.diff

LOG: [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline

Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO inlining and reduce similar code in the inlining loop, plus minor cleanup for AFDO path.

This is resubmit of D95024, with build break and overtighten assertion fixed.

Test Plan:

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
    llvm/test/Transforms/SampleProfile/remarks.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 665c4078f3ee..2cfefd3a18ea 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -416,20 +416,18 @@ class SampleProfileLoader {
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
   mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
-  CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName,
-                                   uint64_t &Sum, uint64_t Count, CallBase *I,
-                                   const char *&Reason);
-  bool inlineCallInstruction(CallBase &CB,
-                             const FunctionSamples *CalleeSamples);
+  // Attempt to promote indirect call and also inline the promoted call
+  bool tryPromoteAndInlineCandidate(
+      Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+      DenseSet<Instruction *> &PromotedInsns,
+      SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
-  // Helper functions call-site prioritized BFS inliner
-  // Will change the main FDO inliner to be work list based directly in
-  // upstream, then merge this change with that and remove the duplication.
   InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
   bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
-  bool tryInlineCandidate(InlineCandidate &Candidate,
-                          SmallVector<CallBase *, 8> &InlinedCallSites);
+  bool
+  tryInlineCandidate(InlineCandidate &Candidate,
+                     SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool
   inlineHotFunctionsWithPriority(Function &F,
                                  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -1077,70 +1075,46 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return it.first->second;
 }
 
-CallBase *
-SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName,
-                                            uint64_t &Sum, uint64_t Count,
-                                            CallBase *I, const char *&Reason) {
-  Reason = "Callee function not available";
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F  Caller function.
+/// \param Candidate  ICP and inline candidate.
+/// \param Sum  Sum of target counts for indirect call.
+/// \param PromotedInsns  Map to keep track of indirect call already processed.
+/// \param Candidate  ICP and inline candidate.
+/// \param InlinedCallSite  Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+    Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+    DenseSet<Instruction *> &PromotedInsns,
+    SmallVector<CallBase *, 8> *InlinedCallSite) {
+  const char *Reason = "Callee function not available";
   // R->getValue() != &F is to prevent promoting a recursive call.
   // If it is a recursive call, we do not inline it as it could bloat
   // the code exponentially. There is way to better handle this, e.g.
   // clone the caller first, and inline the cloned caller if it is
   // recursive. As llvm does not inline recursive calls, we will
   // simply ignore it instead of handling it explicitly.
-  auto R = SymbolMap.find(CalleeName);
+  auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
   if (R != SymbolMap.end() && R->getValue() &&
       !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
       R->getValue()->hasFnAttribute("use-sample-profile") &&
-      R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) {
+      R->getValue() != &F &&
+      isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
     auto *DI =
-        &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE);
-    Sum -= Count;
-    return DI;
-  }
-  return nullptr;
-}
-
-bool SampleProfileLoader::inlineCallInstruction(
-    CallBase &CB, const FunctionSamples *CalleeSamples) {
-  if (ExternalInlineAdvisor) {
-    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
+        &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+                                  Candidate.CallsiteCount, Sum, false, ORE);
+    if (DI) {
+      Sum -= Candidate.CallsiteCount;
+      PromotedInsns.insert(Candidate.CallInstr);
+      Candidate.CallInstr = DI;
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
+        return tryInlineCandidate(Candidate, InlinedCallSite);
     }
-    // Dummy record, we don't use it for replay.
-    Advice->recordInlining();
-  }
-
-  Function *CalledFunction = CB.getCalledFunction();
-  assert(CalledFunction);
-  DebugLoc DLoc = CB.getDebugLoc();
-  BasicBlock *BB = CB.getParent();
-  InlineParams Params = getInlineParams();
-  Params.ComputeFullInlineCost = true;
-  // Checks if there is anything in the reachable portion of the callee at
-  // this callsite that makes this inlining potentially illegal. Need to
-  // set ComputeFullInlineCost, otherwise getInlineCost may return early
-  // when cost exceeds threshold without checking all IRs in the callee.
-  // The acutal cost does not matter because we only checks isNever() to
-  // see if it is legal to inline the callsite.
-  InlineCost Cost =
-      getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
-  if (Cost.isNever()) {
-    ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
-              << "incompatible inlining");
-    return false;
-  }
-  InlineFunctionInfo IFI(nullptr, GetAC);
-  if (InlineFunction(CB, IFI).isSuccess()) {
-    // The call to InlineFunction erases I, so we can't pass it here.
-    emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
-                    true, CSINLINE_DEBUG);
-    if (ProfileIsCS)
-      ContextTracker->markContextSamplesInlined(CalleeSamples);
-    ++NumCSInlined;
-    return true;
+  } else {
+    LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+                      << Candidate.CalleeSamples->getFuncName() << " because "
+                      << Reason << "\n");
   }
   return false;
 }
@@ -1206,10 +1180,11 @@ bool SampleProfileLoader::inlineHotFunctions(
          "ProfAccForSymsInList should be false when profile-sample-accurate "
          "is enabled");
 
-  DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
   bool Changed = false;
-  while (true) {
-    bool LocalChanged = false;
+  bool LocalChanged = true;
+  while (LocalChanged) {
+    LocalChanged = false;
     SmallVector<CallBase *, 10> CIS;
     for (auto &BB : F) {
       bool Hot = false;
@@ -1223,7 +1198,7 @@ bool SampleProfileLoader::inlineHotFunctions(
                    "GUIDToFuncNameMap has to be populated");
             AllCandidates.push_back(CB);
             if (FS->getEntrySamples() > 0 || ProfileIsCS)
-              localNotInlinedCallSites.try_emplace(CB, FS);
+              LocalNotInlinedCallSites.try_emplace(CB, FS);
             if (callsiteIsHot(FS, PSI))
               Hot = true;
             else if (shouldInlineColdCallee(*CB))
@@ -1241,6 +1216,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
+      InlineCandidate Candidate = {I,
+                                   LocalNotInlinedCallSites.count(I)
+                                       ? LocalNotInlinedCallSites[I]
+                                       : nullptr,
+                                   0 /* dummy count */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1257,30 +1237,16 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          const char *Reason = nullptr;
-          auto CalleeFunctionName = FS->getFuncName();
-          if (CallBase *DI =
-                  tryPromoteIndirectCall(F, CalleeFunctionName, Sum,
-                                         FS->getEntrySamples(), I, Reason)) {
-            PromotedInsns.insert(I);
-            // If profile mismatches, we should not attempt to inline DI.
-            if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(cast<CallBase>(*DI), FS)) {
-              localNotInlinedCallSites.erase(I);
-              LocalChanged = true;
-            }
-          } else {
-            LLVM_DEBUG(dbgs()
-                       << "\nFailed to promote indirect call to "
-                       << CalleeFunctionName << " because " << Reason << "\n");
+          Candidate = {I, FS, FS->getEntrySamples()};
+          if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+            LocalNotInlinedCallSites.erase(I);
+            LocalChanged = true;
           }
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I)
-                                          ? localNotInlinedCallSites[I]
-                                          : nullptr)) {
-          localNotInlinedCallSites.erase(I);
+        if (tryInlineCandidate(Candidate)) {
+          LocalNotInlinedCallSites.erase(I);
           LocalChanged = true;
         }
       } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1288,11 +1254,7 @@ bool SampleProfileLoader::inlineHotFunctions(
             InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
       }
     }
-    if (LocalChanged) {
-      Changed = true;
-    } else {
-      break;
-    }
+    Changed |= LocalChanged;
   }
 
   // For CS profile, profile for not inlined context will be merged when
@@ -1301,7 +1263,7 @@ bool SampleProfileLoader::inlineHotFunctions(
     return Changed;
 
   // Accumulate not inlined callsite information into notInlinedSamples
-  for (const auto &Pair : localNotInlinedCallSites) {
+  for (const auto &Pair : LocalNotInlinedCallSites) {
     CallBase *I = Pair.getFirst();
     Function *Callee = I->getCalledFunction();
     if (!Callee || Callee->isDeclaration())
@@ -1347,7 +1309,7 @@ bool SampleProfileLoader::inlineHotFunctions(
 }
 
 bool SampleProfileLoader::tryInlineCandidate(
-    InlineCandidate &Candidate, SmallVector<CallBase *, 8> &InlinedCallSites) {
+    InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
 
   CallBase &CB = *Candidate.CallInstr;
   Function *CalledFunction = CB.getCalledFunction();
@@ -1372,9 +1334,11 @@ bool SampleProfileLoader::tryInlineCandidate(
                     true, CSINLINE_DEBUG);
 
     // Now populate the list of newly exposed call sites.
-    InlinedCallSites.clear();
-    for (auto &I : IFI.InlinedCallSites)
-      InlinedCallSites.push_back(I);
+    if (InlinedCallSites) {
+      InlinedCallSites->clear();
+      for (auto &I : IFI.InlinedCallSites)
+        InlinedCallSites->push_back(I);
+    }
 
     if (ProfileIsCS)
       ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
@@ -1409,8 +1373,6 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
 
 InlineCost
 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
-  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
-
   std::unique_ptr<InlineAdvice> Advice = nullptr;
   if (ExternalInlineAdvisor) {
     Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
@@ -1446,18 +1408,16 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
   InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
                                   GetTTI(*Callee), GetAC, GetTLI);
 
+  // Honor always inline and never inline from call analyzer
+  if (Cost.isNever() || Cost.isAlways())
+    return Cost;
+
   // For old FDO inliner, we inline the call site as long as cost is not
   // "Never". The cost-benefit check is done earlier.
   if (!CallsitePrioritizedInline) {
-    if (Cost.isNever())
-      return Cost;
-    return InlineCost::getAlways("hot callsite previously inlined");
+    return InlineCost::get(Cost.getCost(), INT_MAX);
   }
 
-  // Honor always inline and never inline from call analyzer
-  if (Cost.isNever() || Cost.isAlways())
-    return Cost;
-
   // Otherwise only use the cost from call analyzer, but overwite threshold with
   // Sample PGO threshold.
   return InlineCost::get(Cost.getCost(), SampleThreshold);
@@ -1542,34 +1502,23 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
         // fixed, but we generate 
diff erent types).
         if (!PSI->isHotCount(EntryCountDistributed))
           break;
-        const char *Reason = nullptr;
-        auto CalleeFunctionName = FS->getFuncName();
-        if (CallBase *DI = tryPromoteIndirectCall(
-                F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) {
-          // Attach function profile for promoted indirect callee, and update
-          // call site count for the promoted inline candidate too.
-          Candidate = {DI, FS, EntryCountDistributed};
-          PromotedInsns.insert(I);
-          SmallVector<CallBase *, 8> InlinedCallSites;
-          // If profile mismatches, we should not attempt to inline DI.
-          if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-              tryInlineCandidate(Candidate, InlinedCallSites)) {
-            for (auto *CB : InlinedCallSites) {
-              if (getInlineCandidate(&NewCandidate, CB))
-                CQueue.emplace(NewCandidate);
-            }
-            Changed = true;
+        SmallVector<CallBase *, 8> InlinedCallSites;
+        // Attach function profile for promoted indirect callee, and update
+        // call site count for the promoted inline candidate too.
+        Candidate = {I, FS, EntryCountDistributed};
+        if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
+                                         &InlinedCallSites)) {
+          for (auto *CB : InlinedCallSites) {
+            if (getInlineCandidate(&NewCandidate, CB))
+              CQueue.emplace(NewCandidate);
           }
-        } else {
-          LLVM_DEBUG(dbgs()
-                     << "\nFailed to promote indirect call to "
-                     << CalleeFunctionName << " because " << Reason << "\n");
+          Changed = true;
         }
       }
     } else if (CalledFunction && CalledFunction->getSubprogram() &&
                !CalledFunction->isDeclaration()) {
       SmallVector<CallBase *, 8> InlinedCallSites;
-      if (tryInlineCandidate(Candidate, InlinedCallSites)) {
+      if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
         for (auto *CB : InlinedCallSites) {
           if (getInlineCandidate(&NewCandidate, CB))
             CQueue.emplace(NewCandidate);

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index d47359fa0b5f..5359fd4da067 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -89,7 +89,7 @@ if.end:
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '15'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - Threshold:       '2147483647'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          foo

diff  --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 3add1e74abaa..46f016433b20 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -21,7 +21,7 @@
 
 ; We are expecting foo() to be inlined in main() (almost all the cycles are
 ; spent inside foo).
-; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21;
+; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
 ; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
 
 ; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -47,7 +47,7 @@
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '130'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - Threshold:       '2147483647'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          main


        


More information about the llvm-commits mailing list