[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)

David Li via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 10 13:24:57 PDT 2024


================
@@ -276,35 +626,154 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
 
 // Promote indirect-call to conditional direct-call for one callsite.
 bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
-    CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
-    uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
-    uint32_t NumCandidates) {
+    CallBase &CB, Instruction *VPtr,
+    const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount,
+    ArrayRef<InstrProfValueData> ICallProfDataRef, uint32_t NumCandidates,
+    VTableGUIDCountsMap &VTableGUIDCounts) {
   uint32_t NumPromoted = 0;
 
   for (const auto &C : Candidates) {
-    uint64_t Count = C.Count;
-    pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
-                             &ORE);
-    assert(TotalCount >= Count);
-    TotalCount -= Count;
+    uint64_t FuncCount = C.Count;
+    pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
+                             SamplePGO, &ORE);
+    assert(TotalCount >= FuncCount);
+    TotalCount -= FuncCount;
     NumOfPGOICallPromotion++;
     NumPromoted++;
-  }
 
+    if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
+      continue;
+
+    // After a virtual call candidate gets promoted, update the vtable's counts
+    // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
+    // a vtable from which the virtual call is loaded. Compute the sum and use
+    // 128-bit APInt to improve accuracy.
+    uint64_t SumVTableCount = 0;
+    for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
+      SumVTableCount += VTableCount;
+
+    for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
+      APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
+      APFuncCount *= VTableCount;
+      VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
+    }
+  }
   if (NumPromoted == 0)
     return false;
 
-  // Adjust the MD.prof metadata. First delete the old one.
-  CB.setMetadata(LLVMContext::MD_prof, nullptr);
-
   assert(NumPromoted <= ICallProfDataRef.size() &&
          "Number of promoted functions should not be greater than the number "
          "of values in profile metadata");
+
+  // Update value profiles on the indirect call.
+  // TODO: Handle profile update properly when Clang `-fstrict-vtable-pointers`
+  // is enabled and a vtable is used to load multiple virtual functions.
+  updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
+                          NumCandidates);
+  // Update value profiles on the vtable pointer if it exists.
+  if (VPtr)
+    updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
+  return true;
+}
+
+void IndirectCallPromoter::updateFuncValueProfiles(
+    CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
+    uint32_t MaxMDCount) {
+  // First clear the existing !prof.
+  CB.setMetadata(LLVMContext::MD_prof, nullptr);
   // Annotate the remaining value profiles if counter is not zero.
   if (TotalCount != 0)
-    annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
-                      TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+    annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
+                      MaxMDCount);
+}
+
+void IndirectCallPromoter::updateVPtrValueProfiles(
+    Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
+  VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
+  std::vector<InstrProfValueData> VTableValueProfiles;
+  uint64_t TotalVTableCount = 0;
+  for (auto [GUID, Count] : VTableGUIDCounts) {
+    if (Count == 0)
+      continue;
+
+    VTableValueProfiles.push_back({GUID, Count});
+    TotalVTableCount += Count;
+  }
+  llvm::sort(VTableValueProfiles,
+             [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
+               return LHS.Count > RHS.Count;
+             });
+
+  annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
+                    IPVK_VTableTarget, VTableValueProfiles.size());
+}
+
+bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
+    CallBase &CB, Instruction *VPtr,
+    const std::vector<PromotionCandidate> &Candidates, uint64_t TotalFuncCount,
+    uint32_t NumCandidates,
+    MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+    VTableGUIDCountsMap &VTableGUIDCounts) {
+  SmallVector<uint64_t, 4> PromotedFuncCount;
+
+  for (const auto &Candidate : Candidates) {
+    for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
+      VTableGUIDCounts[GUID] -= Count;
+
+    // 'OriginalBB' is the basic block of indirect call before indirect call
+    // promotion.
+    BasicBlock *OriginalBB = CB.getParent();
+    promoteCallWithVTableCmp(
+        CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
+        createBranchWeights(CB.getContext(), Candidate.Count,
+                            TotalFuncCount - Candidate.Count));
+
+    int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
+
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
+             << "Promote indirect call to "
+             << ore::NV("DirectCallee", Candidate.TargetFunction)
+             << " with count " << ore::NV("Count", Candidate.Count)
+             << " out of " << ore::NV("TotalCount", TotalFuncCount)
+             << ", compare "
+             << ore::NV("VTable", Candidate.VTableGUIDAndCounts.size())
+             << " vtables and sink " << ore::NV("SinkCount", SinkCount)
+             << " instructions";
+    });
+
+    PromotedFuncCount.push_back(Candidate.Count);
+
+    TotalFuncCount -= Candidate.Count;
----------------
david-xl wrote:

Add a check to make sure TotalFuncCount does not become negative (the difference and profile precisions in icall and vtable profile may lead to it).

https://github.com/llvm/llvm-project/pull/81442


More information about the cfe-commits mailing list