[llvm] [MemProf] Support cloning for indirect calls with ThinLTO (PR #110625)
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 13:01:17 PDT 2024
================
@@ -4457,121 +4425,172 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
}
- // Now do any promotion required for cloning. Specifically, for each
- // recorded ICP candidate (which was only recorded because one clone of that
- // candidate should call a cloned target), we perform ICP (speculative
- // devirtualization) for each clone of the callsite, and update its callee
- // to the appropriate clone. Note that the ICP compares against the original
- // version of the target, which is what is in the vtable.
- for (auto &ICallInfo : ICallAnalysisMap) {
- auto *CB = ICallInfo.first;
- auto &Info = ICallInfo.second;
- auto CallsiteIndex = Info.CallsiteInfoStartIndex;
- auto TotalCount = Info.TotalCount;
- unsigned NumPromoted = 0;
- unsigned NumClones = 0;
-
- for (auto &Candidate : Info.CandidateProfileData) {
- auto &StackNode = FS->callsites()[CallsiteIndex++];
-
- // All calls in the same function must have the same number of clones.
- assert(!NumClones || NumClones == StackNode.Clones.size());
- NumClones = StackNode.Clones.size();
-
- // See if the target is in the module. If it wasn't imported, it is
- // possible that this profile could have been collected on a different
- // target (or version of the code), and we need to be conservative
- // (similar to what is done in the ICP pass).
- Function *TargetFunction = Symtab.getFunction(Candidate.Value);
- if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget",
- CB)
- << "Cannot promote indirect call: target with md5sum "
- << ore::NV("target md5sum", Candidate.Value) << " not found";
- });
- // FIXME: See if we can use the new declaration importing support to
- // at least get the declarations imported for this case. Hot indirect
- // targets should have been imported normally, however.
- continue;
- }
+ // Now do any promotion required for cloning.
+ performICP(M, FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
+ }
- // Check if legal to promote
- const char *Reason = nullptr;
- if (!isLegalToPromote(*CB, TargetFunction, &Reason)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", CB)
- << "Cannot promote indirect call to "
- << ore::NV("TargetFunction", TargetFunction)
- << " with count of " << ore::NV("TotalCount", TotalCount)
- << ": " << Reason;
- });
- continue;
- }
+ return Changed;
+}
- assert(!IsMemProfClone(*TargetFunction));
-
- // Handle each call clone, applying ICP so that each clone directly
- // calls the specified callee clone, guarded by the appropriate ICP
- // check.
- auto CalleeOrigName = TargetFunction->getName();
- for (unsigned J = 0; J < NumClones; J++) {
- CallBase *CBClone;
- // Copy 0 is the original function.
- if (!J)
- CBClone = CB;
- else
- CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
- // We do the promotion using the original name, so that the comparison
- // is against the name in the vtable. Then just below, change the new
- // direct call to call the cloned function.
- auto &DirectCall = pgo::promoteIndirectCall(
- *CBClone, TargetFunction, Candidate.Count, TotalCount, SamplePGO,
- &ORE);
- auto *TargetToUse = TargetFunction;
- // Call original if this version calls the original version of its
- // callee.
- if (StackNode.Clones[J])
- TargetToUse = cast<Function>(
- M.getOrInsertFunction(
- getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
- TargetFunction->getFunctionType())
- .getCallee());
- DirectCall.setCalledFunction(TargetToUse);
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
- << ore::NV("Call", CBClone) << " in clone "
- << ore::NV("Caller", CBClone->getFunction())
- << " promoted and assigned to call function clone "
- << ore::NV("Callee", TargetToUse));
- }
+unsigned MemProfContextDisambiguation::recordICPInfo(
+ CallBase *CB, ArrayRef<CallsiteInfo> AllCallsites,
+ ArrayRef<CallsiteInfo>::iterator &SI,
+ SmallVector<ICallAnalysisData> &ICallAnalysisInfo) {
+ // First see if we have profile information for this indirect call.
+ uint32_t NumCandidates;
+ uint64_t TotalCount;
+ auto CandidateProfileData =
+ ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
+ NumCandidates);
+ if (CandidateProfileData.empty())
+ return 0;
+
+ // Iterate through all of the candidate profiled targets along with the
+ // CallsiteInfo summary records synthesized for them when building the index,
+ // and see if any are cloned and/or refer to clones.
+ bool ICPNeeded = false;
+ unsigned NumClones = 0;
+ size_t CallsiteInfoStartIndex = std::distance(AllCallsites.begin(), SI);
+ for (const auto &Candidate : CandidateProfileData) {
+#ifndef NDEBUG
+ auto CalleeValueInfo =
+#endif
+ ImportSummary->getValueInfo(Candidate.Value);
+ // We might not have a ValueInfo if this is a distributed
+ // ThinLTO backend and decided not to import that function.
+ assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
+ assert(SI != AllCallsites.end());
+ auto &StackNode = *(SI++);
+ // See if any of the clones of the indirect callsite for this
+ // profiled target should call a cloned version of the profiled
+ // target. We only need to do the ICP here if so.
+ ICPNeeded |= llvm::any_of(StackNode.Clones,
+ [](unsigned CloneNo) { return CloneNo != 0; });
+ // Every callsite in the same function should have been cloned the same
+ // number of times.
+ assert(!NumClones || NumClones == StackNode.Clones.size());
+ NumClones = StackNode.Clones.size();
+ }
+ if (!ICPNeeded)
+ return NumClones;
+ // Save information for ICP, which is performed later to avoid messing up the
+ // current function traversal.
+ ICallAnalysisInfo.push_back({CB, CandidateProfileData.vec(), NumCandidates,
+ TotalCount, CallsiteInfoStartIndex});
+ return NumClones;
+}
+
+void MemProfContextDisambiguation::performICP(
+ Module &M, ArrayRef<CallsiteInfo> AllCallsites,
+ SmallVectorImpl<std::unique_ptr<ValueToValueMapTy>> &VMaps,
+ SmallVector<ICallAnalysisData> &ICallAnalysisInfo,
----------------
snehasish wrote:
Can these be ArrayRefs?
https://github.com/llvm/llvm-project/pull/110625
More information about the llvm-commits
mailing list