[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)
Mingming Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 26 15:11:09 PDT 2024
================
@@ -277,35 +626,160 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
// Promote indirect-call to conditional direct-call for one callsite.
bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
- CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
- uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
- uint32_t NumCandidates) {
+ CallBase &CB, Instruction *VPtr,
+ const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount,
+ ArrayRef<InstrProfValueData> ICallProfDataRef, uint32_t NumCandidates,
+ VTableGUIDCountsMap &VTableGUIDCounts) {
uint32_t NumPromoted = 0;
for (const auto &C : Candidates) {
- uint64_t Count = C.Count;
- pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
- &ORE);
- assert(TotalCount >= Count);
- TotalCount -= Count;
+ uint64_t FuncCount = C.Count;
+ pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
+ SamplePGO, &ORE);
+ assert(TotalCount >= FuncCount);
+ TotalCount -= FuncCount;
NumOfPGOICallPromotion++;
NumPromoted++;
- }
+ if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
+ continue;
+
+ // After a virtual call candidate gets promoted, update the vtable's counts
+ // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
+ // a vtable from which the virtual call is loaded. Compute the sum and use
+ // 128-bit APInt to improve accuracy.
+ uint64_t SumVTableCount = 0;
+ for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
+ SumVTableCount += VTableCount;
+
+ for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
+ APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
+ APFuncCount *= VTableCount;
+ VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
+ }
+ }
if (NumPromoted == 0)
return false;
- // Adjust the MD.prof metadata. First delete the old one.
- CB.setMetadata(LLVMContext::MD_prof, nullptr);
-
assert(NumPromoted <= ICallProfDataRef.size() &&
"Number of promoted functions should not be greater than the number "
"of values in profile metadata");
+
+ // Update value profiles on the indirect call.
+ updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
+ NumCandidates);
+ updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
+ return true;
+}
+
+void IndirectCallPromoter::updateFuncValueProfiles(
+ CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
+ uint32_t MaxMDCount) {
+ // First clear the existing !prof.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
// Annotate the remaining value profiles if counter is not zero.
if (TotalCount != 0)
- annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
- TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+ annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
+ MaxMDCount);
+}
+
+void IndirectCallPromoter::updateVPtrValueProfiles(
+ Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
+ if (!EnableVTableProfileUse || VPtr == nullptr ||
+ !VPtr->getMetadata(LLVMContext::MD_prof))
+ return;
+ VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
+ std::vector<InstrProfValueData> VTableValueProfiles;
+ uint64_t TotalVTableCount = 0;
+ for (auto [GUID, Count] : VTableGUIDCounts) {
+ if (Count == 0)
+ continue;
+
+ VTableValueProfiles.push_back({GUID, Count});
+ TotalVTableCount += Count;
+ }
+ llvm::sort(VTableValueProfiles,
+ [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
+ return LHS.Count > RHS.Count;
+ });
+
+ annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
+ IPVK_VTableTarget, VTableValueProfiles.size());
+}
+
+bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
+ CallBase &CB, Instruction *VPtr,
+ const std::vector<PromotionCandidate> &Candidates, uint64_t TotalFuncCount,
+ uint32_t NumCandidates,
+ MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ VTableGUIDCountsMap &VTableGUIDCounts) {
+ SmallVector<uint64_t, 4> PromotedFuncCount;
+
+ for (const auto &Candidate : Candidates) {
+ for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
+ VTableGUIDCounts[GUID] -= Count;
+
+ // 'OriginalBB' is the basic block of indirect call. After each candidate
+ // is promoted, a new basic block is created for the indirect fallback basic
+ // block and indirect call `CB` is moved into this new BB.
+ BasicBlock *OriginalBB = CB.getParent();
+ promoteCallWithVTableCmp(
+ CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
+ createBranchWeights(CB.getContext(), Candidate.Count,
+ TotalFuncCount - Candidate.Count));
+
+ int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
+
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
+ << "Promote indirect call to "
+ << ore::NV("DirectCallee", Candidate.TargetFunction)
+ << " with count " << ore::NV("Count", Candidate.Count)
+ << " out of " << ore::NV("TotalCount", TotalFuncCount)
+ << ", compare "
+ << ore::NV("VTable", Candidate.VTableGUIDAndCounts.size())
+ << " vtables and sink " << ore::NV("SinkCount", SinkCount)
+ << " instructions";
+ });
+
+ PromotedFuncCount.push_back(Candidate.Count);
+
+ assert(TotalFuncCount >= Candidate.Count &&
+ "Within one prof metadata, total count is the sum of counts from "
+ "individual <target, count> pairs");
+ // Use std::min since 'TotalFuncCount' is the saturating sum of individual
+ // counts, see
+ // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
+ TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
+ NumOfPGOICallPromotion++;
+ }
+ if (PromotedFuncCount.empty())
+ return false;
+
+ // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
+ // a distinct 'VPtr'.
+ // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
+ // used to load multiple virtual functions. The vtable profiles needs to be
+ // updated properly in that case (e.g, for each indirect call annotate both
+ // type profiles and function profiles in one !prof).
----------------
minglotus-6 wrote:
> I assume it means incorrect updated profile info and not a correctness issue or crash?
Correct. There is no crash or miscompile, but profile udpate is incorrect.
> what "both type profiles and function profiles in one !prof" means - aren't they carried by different !prof
As of now, one `!prof` metadata carries only one type of information (e.g., `branch_weights` or value profile or `function_entry_count`).
The comment wants something like the IR below. Note there is no `!prof` for vtpr, and `!0` references `!1` and `!2`.
```
# there is no , !prof !0 for %vtpr
%vptr = load ptr, ptr %d
%vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
%1 = load ptr, ptr %vfn
call void %1(ptr %d), !prof !0
!0 = {!1, !2}
!1 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
!2 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
```
https://github.com/llvm/llvm-project/pull/81442
More information about the cfe-commits
mailing list