[libcxx-commits] [flang] [llvm] [compiler-rt] [libcxx] [libcxxabi] [libc] [clang-tools-extra] [lld] [clang] [CSSPGO] Compute and report post-match profile staleness (PR #79090)
Lei Wang via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jan 25 09:59:25 PST 2024
================
@@ -2422,59 +2342,226 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F) {
- // We need to use flattened function samples for matching.
- // Unlike IR, which includes all callsites from the source code, the callsites
- // in profile only show up when they are hit by samples, i,e. the profile
- // callsites in one context may differ from those in another context. To get
- // the maximum number of callsites, we merge the function profiles from all
- // contexts, aka, the flattened profile to find profile anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- return;
+void SampleProfileMatcher::runStaleProfileMatching() {
+ for (const auto &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ continue;
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ continue;
- // Anchors for IR. It's a map from IR location to callee name, callee name is
- // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
- // for unknown indrect callee name.
- std::map<LineLocation, StringRef> IRAnchors;
- findIRAnchors(F, IRAnchors);
- // Anchors for profile. It's a map from callsite location to a set of callee
- // name.
- std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
-
- // Detect profile mismatch for profile staleness metrics report.
- // Skip reporting the metrics for imported functions.
- if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
- (ReportProfileStaleness || PersistProfileStaleness)) {
- // Use top-level nested FS for counting profile mismatch metrics since
- // currently once a callsite is mismatched, all its children profiles are
- // dropped.
- if (const auto *FS = Reader.getSamplesFor(F))
- countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
+ runStaleProfileMatching(F, IR->second, P->second,
+ getIRToProfileLocationMap(F));
+ }
}
- // Run profile matching for checksum mismatched profile, currently only
- // support for pseudo-probe.
- if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- // The matching result will be saved to IRToProfileLocationMap, create a new
- // map for each function.
- runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
- getIRToProfileLocationMap(F));
- }
+ distributeIRToProfileLocationMap();
}
-void SampleProfileMatcher::runOnModule() {
+void SampleProfileMatcher::findFuncAnchors() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
- for (auto &F : M) {
+ for (const auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- runOnFunction(F);
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the
+ // callsites in profile only show up when they are hit by samples, i,e. the
+ // profile callsites in one context may differ from those in another
+ // context. To get the maximum number of callsites, we merge the function
+ // profiles from all contexts, aka, the flattened profile to find profile
+ // anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+
+ // Anchors for IR. It's a map from IR location to callee name, callee name
+ // is empty for non-call instruction and use a dummy
+ // name(UnknownIndirectCallee) for unknown indrect callee name.
+ auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
+ findIRAnchors(F, IR.first->second);
+
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
+ findProfileAnchors(*FSFlattened, P.first->second);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const {
+ auto It = FuncToMismatchCallsites.find(FS.getFuncName());
+ // Skip it if no mismatched callsite or this is an external function.
+ if (It == FuncToMismatchCallsites.end() || It->second.empty())
+ return;
+ const auto &MismatchCallsites = It->second;
+ for (const auto &I : FS.getBodySamples()) {
+ if (MismatchCallsites.count(I.first))
+ FuncMismatchedCallsiteSamples += I.second.getSamples();
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const auto &Loc = I.first;
+ if (MismatchCallsites.count(Loc)) {
+ for (const auto &CS : I.second)
+ FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
+ continue;
+ }
+
+ // count mismatched samples for inlined samples.
+ for (const auto &CS : I.second)
+ countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
+ FuncMismatchedCallsiteSamples);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ return;
+ const auto &IRAnchors = IR->second;
+ const auto &ProfileAnchors = P->second;
+
+ auto &MismatchCallsites =
+ FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+
+ StringRef IRCalleeName;
+ const auto &IR = IRAnchors.find(Loc);
+ if (IR != IRAnchors.end())
+ IRCalleeName = IR->second;
+ bool CallsiteIsMatched = false;
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == UnknownIndirectCallee)
+ CallsiteIsMatched = true;
+ else if (Callees.count(FunctionId(IRCalleeName)))
+ CallsiteIsMatched = true;
+
+ FuncProfiledCallsites++;
+ if (!CallsiteIsMatched) {
+ FuncMismatchedCallsites++;
+ MismatchCallsites.insert(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::countMismatchedHashes(const Function &F,
+ const FunctionSamples &FS) {
+ if (!FunctionSamples::ProfileIsProbeBased)
+ return;
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ NumMismatchedFuncHash++;
+ }
+ countMismatchedHashSamples(FS);
+ }
+}
+
+void SampleProfileMatcher::UpdateIRAnchors() {
+ for (auto &I : FuncIRAnchors) {
+ const auto *F = I.first;
+ auto &IRAnchors = I.second;
+ const auto Mapping =
+ FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
+ if (Mapping == FuncMappings.end())
+ continue;
+ IRAnchorMap UpdatedIRAnchors;
+ const auto &LocToLocMapping = Mapping->second;
+ for (const auto L : LocToLocMapping) {
+ UpdatedIRAnchors[L.second] = IRAnchors[L.first];
+ IRAnchors.erase(L.first);
+ }
+
+ for (const auto &IR : UpdatedIRAnchors) {
+ IRAnchors[IR.first] = IR.second;
+ }
+ }
+}
+
+void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
+ if (!ReportProfileStaleness && !PersistProfileStaleness)
+ return;
+
+ if (!IsPreMatch) {
+ // Use the profile matching results to update to the IR anchors.
+ UpdateIRAnchors();
+ }
+
+ uint64_t UnusedCounter = 0;
+ uint64_t *TotalProfiledCallsitesPtr =
+ IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
+ uint64_t *NumMismatchedCallsitesPtr =
+ IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
+ uint64_t *MismatchedCallsiteSamplesPtr =
+ IsPreMatch ? &MismatchedCallsiteSamples
+ : &PostMatchMismatchedCallsiteSamples;
+
+ auto SkipFunctionForReport = [](const Function &F) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ return true;
+ // Skip reporting the metrics for imported functions.
+ if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
+ return true;
+ return false;
+ };
+
+ StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ const auto *FS = Reader.getSamplesFor(F);
+ if (FS && IsPreMatch) {
+ // Only count the total function metrics once in pre-match time.
+ TotalFuncHashSamples += FS->getTotalSamples();
+ TotalProfiledFunc++;
+ countMismatchedHashes(F, *FS);
+ }
+ countMismatchedCallsites(F, FuncToMismatchCallsites,
+ *TotalProfiledCallsitesPtr,
+ *NumMismatchedCallsitesPtr);
+ }
+
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
+ *MismatchedCallsiteSamplesPtr);
+ }
+}
+
+void SampleProfileMatcher::runOnModule() {
+ findFuncAnchors();
+ countProfileMismatches(true);
+
+ if (SalvageStaleProfile) {
+ runStaleProfileMatching();
+ countProfileMismatches(false);
----------------
wlei-llvm wrote:
> Yes I noticed that. Was thinking these can be passed in as parameters.
Maybe we can put them into a class ProfileMatchStats, then pass PreMatchStats and PostMatchStats instances respectively.
https://github.com/llvm/llvm-project/pull/79090
More information about the libcxx-commits
mailing list