[llvm] [MemProf] Support cloning for indirect calls with ThinLTO (PR #110625)

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 11 13:01:17 PDT 2024


================
@@ -4457,121 +4425,172 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
       }
     }
 
-    // Now do any promotion required for cloning. Specifically, for each
-    // recorded ICP candidate (which was only recorded because one clone of that
-    // candidate should call a cloned target), we perform ICP (speculative
-    // devirtualization) for each clone of the callsite, and update its callee
-    // to the appropriate clone. Note that the ICP compares against the original
-    // version of the target, which is what is in the vtable.
-    for (auto &ICallInfo : ICallAnalysisMap) {
-      auto *CB = ICallInfo.first;
-      auto &Info = ICallInfo.second;
-      auto CallsiteIndex = Info.CallsiteInfoStartIndex;
-      auto TotalCount = Info.TotalCount;
-      unsigned NumPromoted = 0;
-      unsigned NumClones = 0;
-
-      for (auto &Candidate : Info.CandidateProfileData) {
-        auto &StackNode = FS->callsites()[CallsiteIndex++];
-
-        // All calls in the same function must have the same number of clones.
-        assert(!NumClones || NumClones == StackNode.Clones.size());
-        NumClones = StackNode.Clones.size();
-
-        // See if the target is in the module. If it wasn't imported, it is
-        // possible that this profile could have been collected on a different
-        // target (or version of the code), and we need to be conservative
-        // (similar to what is done in the ICP pass).
-        Function *TargetFunction = Symtab.getFunction(Candidate.Value);
-        if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
-          ORE.emit([&]() {
-            return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget",
-                                            CB)
-                   << "Cannot promote indirect call: target with md5sum "
-                   << ore::NV("target md5sum", Candidate.Value) << " not found";
-          });
-          // FIXME: See if we can use the new declaration importing support to
-          // at least get the declarations imported for this case. Hot indirect
-          // targets should have been imported normally, however.
-          continue;
-        }
+    // Now do any promotion required for cloning.
+    performICP(M, FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
+  }
 
-        // Check if legal to promote
-        const char *Reason = nullptr;
-        if (!isLegalToPromote(*CB, TargetFunction, &Reason)) {
-          ORE.emit([&]() {
-            return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", CB)
-                   << "Cannot promote indirect call to "
-                   << ore::NV("TargetFunction", TargetFunction)
-                   << " with count of " << ore::NV("TotalCount", TotalCount)
-                   << ": " << Reason;
-          });
-          continue;
-        }
+  return Changed;
+}
 
-        assert(!IsMemProfClone(*TargetFunction));
-
-        // Handle each call clone, applying ICP so that each clone directly
-        // calls the specified callee clone, guarded by the appropriate ICP
-        // check.
-        auto CalleeOrigName = TargetFunction->getName();
-        for (unsigned J = 0; J < NumClones; J++) {
-          CallBase *CBClone;
-          // Copy 0 is the original function.
-          if (!J)
-            CBClone = CB;
-          else
-            CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
-          // We do the promotion using the original name, so that the comparison
-          // is against the name in the vtable. Then just below, change the new
-          // direct call to call the cloned function.
-          auto &DirectCall = pgo::promoteIndirectCall(
-              *CBClone, TargetFunction, Candidate.Count, TotalCount, SamplePGO,
-              &ORE);
-          auto *TargetToUse = TargetFunction;
-          // Call original if this version calls the original version of its
-          // callee.
-          if (StackNode.Clones[J])
-            TargetToUse = cast<Function>(
-                M.getOrInsertFunction(
-                     getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
-                     TargetFunction->getFunctionType())
-                    .getCallee());
-          DirectCall.setCalledFunction(TargetToUse);
-          ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
-                   << ore::NV("Call", CBClone) << " in clone "
-                   << ore::NV("Caller", CBClone->getFunction())
-                   << " promoted and assigned to call function clone "
-                   << ore::NV("Callee", TargetToUse));
-        }
+unsigned MemProfContextDisambiguation::recordICPInfo(
+    CallBase *CB, ArrayRef<CallsiteInfo> AllCallsites,
+    ArrayRef<CallsiteInfo>::iterator &SI,
+    SmallVector<ICallAnalysisData> &ICallAnalysisInfo) {
+  // First see if we have profile information for this indirect call.
+  uint32_t NumCandidates;
+  uint64_t TotalCount;
+  auto CandidateProfileData =
+      ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
+                                                          NumCandidates);
+  if (CandidateProfileData.empty())
+    return 0;
+
+  // Iterate through all of the candidate profiled targets along with the
+  // CallsiteInfo summary records synthesized for them when building the index,
+  // and see if any are cloned and/or refer to clones.
+  bool ICPNeeded = false;
+  unsigned NumClones = 0;
+  size_t CallsiteInfoStartIndex = std::distance(AllCallsites.begin(), SI);
+  for (const auto &Candidate : CandidateProfileData) {
+#ifndef NDEBUG
+    auto CalleeValueInfo =
+#endif
+        ImportSummary->getValueInfo(Candidate.Value);
+    // We might not have a ValueInfo if this is a distributed
+    // ThinLTO backend and decided not to import that function.
+    assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
+    assert(SI != AllCallsites.end());
+    auto &StackNode = *(SI++);
+    // See if any of the clones of the indirect callsite for this
+    // profiled target should call a cloned version of the profiled
+    // target. We only need to do the ICP here if so.
+    ICPNeeded |= llvm::any_of(StackNode.Clones,
+                              [](unsigned CloneNo) { return CloneNo != 0; });
+    // Every callsite in the same function should have been cloned the same
+    // number of times.
+    assert(!NumClones || NumClones == StackNode.Clones.size());
+    NumClones = StackNode.Clones.size();
+  }
+  if (!ICPNeeded)
+    return NumClones;
+  // Save information for ICP, which is performed later to avoid messing up the
+  // current function traversal.
+  ICallAnalysisInfo.push_back({CB, CandidateProfileData.vec(), NumCandidates,
+                               TotalCount, CallsiteInfoStartIndex});
+  return NumClones;
+}
+
+void MemProfContextDisambiguation::performICP(
+    Module &M, ArrayRef<CallsiteInfo> AllCallsites,
+    SmallVectorImpl<std::unique_ptr<ValueToValueMapTy>> &VMaps,
+    SmallVector<ICallAnalysisData> &ICallAnalysisInfo,
----------------
snehasish wrote:

Can these be ArrayRefs?

https://github.com/llvm/llvm-project/pull/110625


More information about the llvm-commits mailing list