[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)

Teresa Johnson via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 24 20:46:09 PDT 2024


================
@@ -322,14 +796,133 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
     if (!NumCandidates ||
         (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
       continue;
+
     auto PromotionCandidates = getPromotionCandidatesForCallSite(
         *CB, ICallProfDataRef, TotalCount, NumCandidates);
-    Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
-                                       ICallProfDataRef, NumCandidates);
+
+    VTableGUIDCountsMap VTableGUIDCounts;
+    Instruction *VPtr =
+        computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
+
+    if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount))
+      Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
+                                           TotalCount, NumCandidates,
+                                           ICallProfDataRef, VTableGUIDCounts);
+    else
+      Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
+                                         TotalCount, ICallProfDataRef,
+                                         NumCandidates, VTableGUIDCounts);
   }
   return Changed;
 }
 
+// TODO: Returns false if the function addressing and vtable load instructions
+// cannot sink to indirect fallback.
+bool IndirectCallPromoter::isProfitableToCompareVTables(
+    const CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+    uint64_t TotalCount) {
+  if (!EnableVTableProfileUse || Candidates.empty())
+    return false;
+  uint64_t RemainingVTableCount = TotalCount;
+  const size_t CandidateSize = Candidates.size();
+  for (size_t I = 0; I < CandidateSize; I++) {
+    auto &Candidate = Candidates[I];
+    uint64_t CandidateVTableCount = 0;
+    for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
+      CandidateVTableCount += Count;
+
+    if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
+      LLVM_DEBUG(dbgs() << "For callsite #" << NumOfPGOICallsites << CB << I
+                        << "-th candidate, function count " << Candidate.Count
+                        << " and its vtable count " << CandidateVTableCount
+                        << " have discrepancies\n");
+      return false;
+    }
+
+    RemainingVTableCount -= Candidate.Count;
+
+    // 'MaxNumVTable' limits the number of vtables to make vtable comparison
+    // profitable. Comparing multiple vtables for one function candidate will
+    // insert additional instructions on the hot path, and allowing more than
+    // one vtable for non last candidates may or may not elongates dependency
+    // chain for the subsequent candidates. Set its value to 1 for non-last
+    // candidate and allow option to override it for the last candidate.
+    int MaxNumVTable = 1;
+    if (I == CandidateSize - 1)
+      MaxNumVTable = ICPMaxNumVTableLastCandidate;
+
+    if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
----------------
teresajohnson wrote:

Might be useful to have a debug or missed optimization message for this case? 

https://github.com/llvm/llvm-project/pull/81442


More information about the cfe-commits mailing list