[llvm] [MemProf] Support cloning for indirect calls with ThinLTO (PR #110625)
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 10:47:54 PDT 2024
================
@@ -4075,24 +4367,80 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
<< ore::NV("Attribute", AllocTypeString));
}
} else if (!CallsiteContext.empty()) {
- // Consult the next callsite node.
- assert(SI != FS->callsites().end());
- auto &StackNode = *(SI++);
-
+ if (!CalledFunction) {
+ // This is an indirect call, see if we have profile information and
+ // whether any clones were recorded for the profiled targets (that
+ // we synthesized CallsiteInfo summary records for when building the
+ // index).
#ifndef NDEBUG
- // Sanity check that the stack ids match between the summary and
- // instruction metadata.
- auto StackIdIndexIter = StackNode.StackIdIndices.begin();
- for (auto StackId : CallsiteContext) {
- assert(StackIdIndexIter != StackNode.StackIdIndices.end());
- assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
- StackId);
- StackIdIndexIter++;
+ // We should have skipped inline assembly calls.
+ auto *CI = dyn_cast<CallInst>(CB);
+ assert(!CI || !CI->isInlineAsm());
+#endif
+ // We should have skipped direct calls via a Constant.
+ assert(CalledValue && !isa<Constant>(CalledValue));
+
+ uint32_t NumCandidates;
+ uint64_t TotalCount;
+ auto CandidateProfileData =
+ ICallAnalysis.getPromotionCandidatesForInstruction(
+ CB, TotalCount, NumCandidates);
+ if (!CandidateProfileData.empty()) {
+ unsigned CallsiteInfoStartStartIndex =
+ static_cast<unsigned int>(SI - FS->callsites().begin());
+ // Iterate past all of the associated callsites nodes and check
+ // them.
+ for (const auto &Candidate : CandidateProfileData) {
+#ifndef NDEBUG
+ auto CalleeValueInfo =
+#endif
+ ImportSummary->getValueInfo(Candidate.Value);
+ // We might not have a ValueInfo if this is a distributed
+ // ThinLTO backend and decided not to import that function.
+ assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
+ assert(SI != FS->callsites().end());
+ auto &StackNode = *(SI++);
+ // See if any of the clones of the indirect callsite for this
+ // profiled target should call a cloned version of the profiled
+ // target. We only need to do the ICP here if so.
+ for (auto CloneNo : StackNode.Clones) {
+ if (!CloneNo)
+ continue;
+ // Save information for ICP, which is performed later to avoid
+ // messing up the current function traversal.
+ ICallAnalysisMap[CB] = {CandidateProfileData.vec(),
+ NumCandidates, TotalCount,
+ CallsiteInfoStartStartIndex};
+ break;
+ }
+ // Perform cloning if not yet done. This is done here in case
+ // we don't need to do ICP, but might need to clone this
+ // function as it is the target of other cloned calls.
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+ }
+ }
}
+
+ else {
+ // Consult the next callsite node.
+ assert(SI != FS->callsites().end());
+ auto &StackNode = *(SI++);
+
+#ifndef NDEBUG
+ // Sanity check that the stack ids match between the summary and
+ // instruction metadata.
+ auto StackIdIndexIter = StackNode.StackIdIndices.begin();
+ for (auto StackId : CallsiteContext) {
+ assert(StackIdIndexIter != StackNode.StackIdIndices.end());
+ assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
+ StackId);
+ StackIdIndexIter++;
+ }
#endif
- CloneCallsite(StackNode, CB, CalledFunction);
- } else if (CB->isTailCall()) {
+ CloneCallsite(StackNode, CB, CalledFunction);
+ }
+ } else if (CB->isTailCall() && CalledFunction) {
----------------
snehasish wrote:
Is it necessary to check CalledFunction here? I believe a tail call should always be direct and thus we should always have CalledFunction.
Maybe an assert is more appropriate if you agree.
https://github.com/llvm/llvm-project/pull/110625
More information about the llvm-commits
mailing list