[llvm] 9a03058 - [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline
Wenlei He via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 2 00:34:45 PST 2021
Author: Wenlei He
Date: 2021-02-02T00:34:06-08:00
New Revision: 9a03058d6322edb8abc803ba3e436cc62647d979
URL: https://github.com/llvm/llvm-project/commit/9a03058d6322edb8abc803ba3e436cc62647d979
DIFF: https://github.com/llvm/llvm-project/commit/9a03058d6322edb8abc803ba3e436cc62647d979.diff
LOG: [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline
Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO inlining and reduce similar code in the inlining loop, plus minor cleanup for AFDO path.
Test Plan:
Differential Revision: https://reviews.llvm.org/D95024
Added:
Modified:
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
llvm/test/Transforms/SampleProfile/remarks.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 665c4078f3ee..7865426dff16 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -416,20 +416,18 @@ class SampleProfileLoader {
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName,
- uint64_t &Sum, uint64_t Count, CallBase *I,
- const char *&Reason);
- bool inlineCallInstruction(CallBase &CB,
- const FunctionSamples *CalleeSamples);
+ // Attempt to promote indirect call and also inline the promoted call
+ bool tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+ DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
- // Helper functions call-site prioritized BFS inliner
- // Will change the main FDO inliner to be work list based directly in
- // upstream, then merge this change with that and remove the duplication.
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
- bool tryInlineCandidate(InlineCandidate &Candidate,
- SmallVector<CallBase *, 8> &InlinedCallSites);
+ bool
+ tryInlineCandidate(InlineCandidate &Candidate,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool
inlineHotFunctionsWithPriority(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -1077,70 +1075,45 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return it.first->second;
}
-CallBase *
-SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName,
- uint64_t &Sum, uint64_t Count,
- CallBase *I, const char *&Reason) {
- Reason = "Callee function not available";
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F Caller function.
+/// \param Candidate ICP and inline candidate.
+/// \param Sum Sum of target counts for indirect call.
+/// \param PromotedInsns Map to keep track of indirect call already processed.
+/// \param Candidate ICP and inline candidate.
+/// \param InlinedCallSite Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+ DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSite) {
+ const char *Reason = "Callee function not available";
// R->getValue() != &F is to prevent promoting a recursive call.
// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is
// recursive. As llvm does not inline recursive calls, we will
// simply ignore it instead of handling it explicitly.
- auto R = SymbolMap.find(CalleeName);
+ auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
if (R != SymbolMap.end() && R->getValue() &&
!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
R->getValue()->hasFnAttribute("use-sample-profile") &&
- R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) {
+ R->getValue() != &F &&
+ isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
auto *DI =
- &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE);
- Sum -= Count;
- return DI;
- }
- return nullptr;
-}
-
-bool SampleProfileLoader::inlineCallInstruction(
- CallBase &CB, const FunctionSamples *CalleeSamples) {
- if (ExternalInlineAdvisor) {
- auto Advice = ExternalInlineAdvisor->getAdvice(CB);
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return false;
+ &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+ Candidate.CallsiteCount, Sum, false, ORE);
+ if (DI) {
+ Sum -= Candidate.CallsiteCount;
+ PromotedInsns.insert(Candidate.CallInstr);
+ Candidate.CallInstr = DI;
+ if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
+ return tryInlineCandidate(Candidate, InlinedCallSite);
}
- // Dummy record, we don't use it for replay.
- Advice->recordInlining();
- }
-
- Function *CalledFunction = CB.getCalledFunction();
- assert(CalledFunction);
- DebugLoc DLoc = CB.getDebugLoc();
- BasicBlock *BB = CB.getParent();
- InlineParams Params = getInlineParams();
- Params.ComputeFullInlineCost = true;
- // Checks if there is anything in the reachable portion of the callee at
- // this callsite that makes this inlining potentially illegal. Need to
- // set ComputeFullInlineCost, otherwise getInlineCost may return early
- // when cost exceeds threshold without checking all IRs in the callee.
- // The acutal cost does not matter because we only checks isNever() to
- // see if it is legal to inline the callsite.
- InlineCost Cost =
- getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
- if (Cost.isNever()) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
- << "incompatible inlining");
- return false;
- }
- InlineFunctionInfo IFI(nullptr, GetAC);
- if (InlineFunction(CB, IFI).isSuccess()) {
- // The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
- if (ProfileIsCS)
- ContextTracker->markContextSamplesInlined(CalleeSamples);
- ++NumCSInlined;
- return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason << "\n");
}
return false;
}
@@ -1206,10 +1179,11 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+ DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
bool Changed = false;
- while (true) {
- bool LocalChanged = false;
+ bool LocalChanged = true;
+ while (LocalChanged) {
+ LocalChanged = false;
SmallVector<CallBase *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
@@ -1223,7 +1197,7 @@ bool SampleProfileLoader::inlineHotFunctions(
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
if (FS->getEntrySamples() > 0 || ProfileIsCS)
- localNotInlinedCallSites.try_emplace(CB, FS);
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
else if (shouldInlineColdCallee(*CB))
@@ -1241,6 +1215,11 @@ bool SampleProfileLoader::inlineHotFunctions(
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
+ InlineCandidate Candidate = {I,
+ LocalNotInlinedCallSites.count(I)
+ ? LocalNotInlinedCallSites[I]
+ : nullptr,
+ 0 /* dummy count */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
@@ -1257,30 +1236,16 @@ bool SampleProfileLoader::inlineHotFunctions(
if (!callsiteIsHot(FS, PSI))
continue;
- const char *Reason = nullptr;
- auto CalleeFunctionName = FS->getFuncName();
- if (CallBase *DI =
- tryPromoteIndirectCall(F, CalleeFunctionName, Sum,
- FS->getEntrySamples(), I, Reason)) {
- PromotedInsns.insert(I);
- // If profile mismatches, we should not attempt to inline DI.
- if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(cast<CallBase>(*DI), FS)) {
- localNotInlinedCallSites.erase(I);
- LocalChanged = true;
- }
- } else {
- LLVM_DEBUG(dbgs()
- << "\nFailed to promote indirect call to "
- << CalleeFunctionName << " because " << Reason << "\n");
+ Candidate = {I, FS, FS->getEntrySamples()};
+ if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+ LocalNotInlinedCallSites.erase(I);
+ LocalChanged = true;
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I)
- ? localNotInlinedCallSites[I]
- : nullptr)) {
- localNotInlinedCallSites.erase(I);
+ if (tryInlineCandidate(Candidate)) {
+ LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1288,11 +1253,7 @@ bool SampleProfileLoader::inlineHotFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
}
}
- if (LocalChanged) {
- Changed = true;
- } else {
- break;
- }
+ Changed |= LocalChanged;
}
// For CS profile, profile for not inlined context will be merged when
@@ -1301,7 +1262,7 @@ bool SampleProfileLoader::inlineHotFunctions(
return Changed;
// Accumulate not inlined callsite information into notInlinedSamples
- for (const auto &Pair : localNotInlinedCallSites) {
+ for (const auto &Pair : LocalNotInlinedCallSites) {
CallBase *I = Pair.getFirst();
Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
@@ -1347,7 +1308,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
bool SampleProfileLoader::tryInlineCandidate(
- InlineCandidate &Candidate, SmallVector<CallBase *, 8> &InlinedCallSites) {
+ InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
CallBase &CB = *Candidate.CallInstr;
Function *CalledFunction = CB.getCalledFunction();
@@ -1372,9 +1333,11 @@ bool SampleProfileLoader::tryInlineCandidate(
true, CSINLINE_DEBUG);
// Now populate the list of newly exposed call sites.
- InlinedCallSites.clear();
- for (auto &I : IFI.InlinedCallSites)
- InlinedCallSites.push_back(I);
+ if (InlinedCallSites) {
+ InlinedCallSites->clear();
+ for (auto &I : IFI.InlinedCallSites)
+ InlinedCallSites->push_back(I);
+ }
if (ProfileIsCS)
ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
@@ -1446,18 +1409,16 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
GetTTI(*Callee), GetAC, GetTLI);
+ // Honor always inline and never inline from call analyzer
+ if (Cost.isNever() || Cost.isAlways())
+ return Cost;
+
// For old FDO inliner, we inline the call site as long as cost is not
// "Never". The cost-benefit check is done earlier.
if (!CallsitePrioritizedInline) {
- if (Cost.isNever())
- return Cost;
- return InlineCost::getAlways("hot callsite previously inlined");
+ return InlineCost::get(Cost.getCost(), INT_MAX);
}
- // Honor always inline and never inline from call analyzer
- if (Cost.isNever() || Cost.isAlways())
- return Cost;
-
// Otherwise only use the cost from call analyzer, but overwite threshold with
// Sample PGO threshold.
return InlineCost::get(Cost.getCost(), SampleThreshold);
@@ -1542,34 +1503,23 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
// fixed, but we generate
diff erent types).
if (!PSI->isHotCount(EntryCountDistributed))
break;
- const char *Reason = nullptr;
- auto CalleeFunctionName = FS->getFuncName();
- if (CallBase *DI = tryPromoteIndirectCall(
- F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) {
- // Attach function profile for promoted indirect callee, and update
- // call site count for the promoted inline candidate too.
- Candidate = {DI, FS, EntryCountDistributed};
- PromotedInsns.insert(I);
- SmallVector<CallBase *, 8> InlinedCallSites;
- // If profile mismatches, we should not attempt to inline DI.
- if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- tryInlineCandidate(Candidate, InlinedCallSites)) {
- for (auto *CB : InlinedCallSites) {
- if (getInlineCandidate(&NewCandidate, CB))
- CQueue.emplace(NewCandidate);
- }
- Changed = true;
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ // Attach function profile for promoted indirect callee, and update
+ // call site count for the promoted inline candidate too.
+ Candidate = {I, FS, EntryCountDistributed};
+ if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
+ &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
}
- } else {
- LLVM_DEBUG(dbgs()
- << "\nFailed to promote indirect call to "
- << CalleeFunctionName << " because " << Reason << "\n");
+ Changed = true;
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
SmallVector<CallBase *, 8> InlinedCallSites;
- if (tryInlineCandidate(Candidate, InlinedCallSites)) {
+ if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
for (auto *CB : InlinedCallSites) {
if (getInlineCandidate(&NewCandidate, CB))
CQueue.emplace(NewCandidate);
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index d47359fa0b5f..5359fd4da067 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -89,7 +89,7 @@ if.end:
;YAML-NEXT: - String: '(cost='
;YAML-NEXT: - Cost: '15'
;YAML-NEXT: - String: ', threshold='
-;YAML-NEXT: - Threshold: '225'
+;YAML-NEXT: - Threshold: '2147483647'
;YAML-NEXT: - String: ')'
;YAML-NEXT: - String: ' at callsite '
;YAML-NEXT: - String: foo
diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 3add1e74abaa..46f016433b20 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -21,7 +21,7 @@
; We are expecting foo() to be inlined in main() (almost all the cycles are
; spent inside foo).
-; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21;
+; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -47,7 +47,7 @@
;YAML-NEXT: - String: '(cost='
;YAML-NEXT: - Cost: '130'
;YAML-NEXT: - String: ', threshold='
-;YAML-NEXT: - Threshold: '225'
+;YAML-NEXT: - Threshold: '2147483647'
;YAML-NEXT: - String: ')'
;YAML-NEXT: - String: ' at callsite '
;YAML-NEXT: - String: main
More information about the llvm-commits
mailing list