[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)
Mingming Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 12 12:34:25 PDT 2024
================
@@ -321,14 +746,127 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
if (!NumCandidates ||
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
continue;
+
auto PromotionCandidates = getPromotionCandidatesForCallSite(
*CB, ICallProfDataRef, TotalCount, NumCandidates);
- Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
- ICallProfDataRef, NumCandidates);
+
+ VTableGUIDCountsMap VTableGUIDCounts;
+ Instruction *VPtr =
+ computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
+
+ if (isProfitableToCompareVTables(PromotionCandidates, TotalCount))
+ Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
+ TotalCount, NumCandidates,
+ ICallProfDataRef, VTableGUIDCounts);
+ else
+ Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
+ TotalCount, ICallProfDataRef,
+ NumCandidates, VTableGUIDCounts);
}
return Changed;
}
+// TODO: Returns false if the function addressing and vtable load instructions
+// cannot sink to indirect fallback.
+bool IndirectCallPromoter::isProfitableToCompareVTables(
+ const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount) {
+ if (!ICPEnableVTableCmp || Candidates.empty())
+ return false;
+ uint64_t RemainingVTableCount = TotalCount;
+ for (size_t I = 0; I < Candidates.size(); I++) {
+ auto &Candidate = Candidates[I];
+ uint64_t VTableSumCount = 0;
+ for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
+ VTableSumCount += Count;
+
+ if (VTableSumCount < Candidate.Count * ICPVTableCountPercentage)
+ return false;
+
+ RemainingVTableCount -= Candidate.Count;
+
+ int NumAdditionalVTable = 0;
+ if (I == Candidates.size() - 1)
+ NumAdditionalVTable = ICPNumAdditionalVTableLast;
+
+ int ActualNumAdditionalInst = Candidate.AddressPoints.size() - 1;
+ if (ActualNumAdditionalInst > NumAdditionalVTable) {
+ return false;
+ }
+ }
+
+ // If the indirect fallback is not cold, don't compare vtables.
+ if (PSI && PSI->hasProfileSummary() &&
+ !PSI->isColdCount(RemainingVTableCount))
+ return false;
+
+ return true;
+}
+
+static void
+computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
+ VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+
+ auto compute = [&](Function *Func) {
+ if (!Func || Func->use_empty())
+ return;
+ // Iterate all type.test calls and find all indirect calls.
+ // TODO: Add llvm.public.type.test
+ for (Use &U : llvm::make_early_inc_range(Func->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
+ if (!CI)
+ continue;
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeMDVal)
+ continue;
+ auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+ if (!CompatibleTypeId)
+ continue;
+
+ // Find out all devirtualizable call sites given a llvm.type.test
+ // intrinsic call.
+ SmallVector<DevirtCallSite, 1> DevirtCalls;
+ SmallVector<CallInst *, 1> Assumes;
+ auto &DT = LookupDomTree(*CI->getFunction());
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+
+ // type-id, offset from the address point
+ // combined with type metadata to compute function offset
+ for (auto &DevirtCall : DevirtCalls) {
+ CallBase &CB = DevirtCall.CB;
+ // Given an indirect call, try find the instruction which loads a
+ // pointer to virtual table.
+ Instruction *VTablePtr =
+ PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
+ if (!VTablePtr)
+ continue;
+ VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
+ CompatibleTypeId->getString()};
+ }
+ }
+ };
+
+ // Right now only llvm.type.test is used to find out virtual call sites.
+ // With ThinLTO and whole-program-devirtualization, llvm.type.test and
+ // llvm.public.type.test are emitted, and llvm.public.type.test is either
+ // refined to llvm.type.test or dropped before indirect-call-promotion pass.
----------------
minglotus-6 wrote:
> Briefly explain in the comment why type test is needed for vtable based indirect call promotion?
Iterating `llvm.type.test` users find virtual calls out of all indirect calls in a more compile-time efficient manner. Moreover, the first parameter of `llvm.type.test` is the compatible type string. Since one vtable definition can be compatible with multiple vtables [1], compatible type string together with `getelementptr` are important to calculate the function's byte offset relative to the start of vtable.
I added comment for static function `computeVirtualCallSiteTypeInfoMap`.
[1] [Regression test](https://github.com/llvm/llvm-project/pull/81442/files#diff-634373b218f3f2db3e9aa8dc57b5fb3229d9594bf645d88300a7cbf193c287e6R13-R15) reflects this
https://github.com/llvm/llvm-project/pull/81442
More information about the cfe-commits
mailing list