[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)

Teresa Johnson via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 24 20:46:09 PDT 2024


================
@@ -277,35 +626,160 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
 
 // Promote indirect-call to conditional direct-call for one callsite.
 bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
-    CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
-    uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
-    uint32_t NumCandidates) {
+    CallBase &CB, Instruction *VPtr,
+    const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount,
+    ArrayRef<InstrProfValueData> ICallProfDataRef, uint32_t NumCandidates,
+    VTableGUIDCountsMap &VTableGUIDCounts) {
   uint32_t NumPromoted = 0;
 
   for (const auto &C : Candidates) {
-    uint64_t Count = C.Count;
-    pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
-                             &ORE);
-    assert(TotalCount >= Count);
-    TotalCount -= Count;
+    uint64_t FuncCount = C.Count;
+    pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
+                             SamplePGO, &ORE);
+    assert(TotalCount >= FuncCount);
+    TotalCount -= FuncCount;
     NumOfPGOICallPromotion++;
     NumPromoted++;
-  }
 
+    if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
+      continue;
+
+    // After a virtual call candidate gets promoted, update the vtable's counts
+    // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
+    // a vtable from which the virtual call is loaded. Compute the sum and use
+    // 128-bit APInt to improve accuracy.
+    uint64_t SumVTableCount = 0;
+    for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
+      SumVTableCount += VTableCount;
+
+    for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
+      APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
+      APFuncCount *= VTableCount;
+      VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
+    }
+  }
   if (NumPromoted == 0)
     return false;
 
-  // Adjust the MD.prof metadata. First delete the old one.
-  CB.setMetadata(LLVMContext::MD_prof, nullptr);
-
   assert(NumPromoted <= ICallProfDataRef.size() &&
          "Number of promoted functions should not be greater than the number "
          "of values in profile metadata");
+
+  // Update value profiles on the indirect call.
+  updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
+                          NumCandidates);
+  updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
+  return true;
+}
+
+void IndirectCallPromoter::updateFuncValueProfiles(
+    CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
+    uint32_t MaxMDCount) {
+  // First clear the existing !prof.
+  CB.setMetadata(LLVMContext::MD_prof, nullptr);
   // Annotate the remaining value profiles if counter is not zero.
   if (TotalCount != 0)
-    annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
-                      TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+    annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
+                      MaxMDCount);
+}
+
+void IndirectCallPromoter::updateVPtrValueProfiles(
+    Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
+  if (!EnableVTableProfileUse || VPtr == nullptr ||
+      !VPtr->getMetadata(LLVMContext::MD_prof))
+    return;
+  VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
+  std::vector<InstrProfValueData> VTableValueProfiles;
+  uint64_t TotalVTableCount = 0;
+  for (auto [GUID, Count] : VTableGUIDCounts) {
+    if (Count == 0)
+      continue;
+
+    VTableValueProfiles.push_back({GUID, Count});
+    TotalVTableCount += Count;
+  }
+  llvm::sort(VTableValueProfiles,
+             [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
+               return LHS.Count > RHS.Count;
+             });
+
+  annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
+                    IPVK_VTableTarget, VTableValueProfiles.size());
+}
+
+bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
+    CallBase &CB, Instruction *VPtr,
+    const std::vector<PromotionCandidate> &Candidates, uint64_t TotalFuncCount,
+    uint32_t NumCandidates,
+    MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+    VTableGUIDCountsMap &VTableGUIDCounts) {
+  SmallVector<uint64_t, 4> PromotedFuncCount;
+
+  for (const auto &Candidate : Candidates) {
+    for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
+      VTableGUIDCounts[GUID] -= Count;
+
+    // 'OriginalBB' is the basic block of indirect call. After each candidate
+    // is promoted, a new basic block is created for the indirect fallback basic
+    // block and indirect call `CB` is moved into this new BB.
+    BasicBlock *OriginalBB = CB.getParent();
+    promoteCallWithVTableCmp(
+        CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
+        createBranchWeights(CB.getContext(), Candidate.Count,
+                            TotalFuncCount - Candidate.Count));
+
+    int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
+
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
+             << "Promote indirect call to "
+             << ore::NV("DirectCallee", Candidate.TargetFunction)
+             << " with count " << ore::NV("Count", Candidate.Count)
+             << " out of " << ore::NV("TotalCount", TotalFuncCount)
+             << ", compare "
+             << ore::NV("VTable", Candidate.VTableGUIDAndCounts.size())
+             << " vtables and sink " << ore::NV("SinkCount", SinkCount)
+             << " instructions";
+    });
+
+    PromotedFuncCount.push_back(Candidate.Count);
+
+    assert(TotalFuncCount >= Candidate.Count &&
+           "Within one prof metadata, total count is the sum of counts from "
+           "individual <target, count> pairs");
+    // Use std::min since 'TotalFuncCount' is the saturating sum of individual
+    // counts, see
+    // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
+    TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
+    NumOfPGOICallPromotion++;
+  }
 
+  if (PromotedFuncCount.empty())
+    return false;
+
+  // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
+  // a distinct 'VPtr'.
+  // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
+  // used to load multiple virtual functions. The vtable profiles needs to be
+  // updated properly in that case (e.g, for each indirect call annotate both
+  // type profiles and function profiles in one !prof).
----------------
teresajohnson wrote:

What will happen currently? I assume it means incorrect updated profile info and not a correctness issue or crash? I'm not sure what "both type profiles and function profiles in one !prof" means - aren't they carried by different `!prof`?

https://github.com/llvm/llvm-project/pull/81442


More information about the cfe-commits mailing list