[flang-commits] [libc] [llvm] [compiler-rt] [flang] [lld] [clang] [libcxx] [clang-tools-extra] [libcxxabi] [CSSPGO] Compute and report post-match profile staleness (PR #79090)
via flang-commits
flang-commits at lists.llvm.org
Wed Jan 24 14:28:21 PST 2024
================
@@ -2422,59 +2342,226 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F) {
- // We need to use flattened function samples for matching.
- // Unlike IR, which includes all callsites from the source code, the callsites
- // in profile only show up when they are hit by samples, i,e. the profile
- // callsites in one context may differ from those in another context. To get
- // the maximum number of callsites, we merge the function profiles from all
- // contexts, aka, the flattened profile to find profile anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- return;
+void SampleProfileMatcher::runStaleProfileMatching() {
+ for (const auto &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ continue;
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ continue;
- // Anchors for IR. It's a map from IR location to callee name, callee name is
- // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
- // for unknown indrect callee name.
- std::map<LineLocation, StringRef> IRAnchors;
- findIRAnchors(F, IRAnchors);
- // Anchors for profile. It's a map from callsite location to a set of callee
- // name.
- std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
-
- // Detect profile mismatch for profile staleness metrics report.
- // Skip reporting the metrics for imported functions.
- if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
- (ReportProfileStaleness || PersistProfileStaleness)) {
- // Use top-level nested FS for counting profile mismatch metrics since
- // currently once a callsite is mismatched, all its children profiles are
- // dropped.
- if (const auto *FS = Reader.getSamplesFor(F))
- countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
+ runStaleProfileMatching(F, IR->second, P->second,
+ getIRToProfileLocationMap(F));
+ }
}
- // Run profile matching for checksum mismatched profile, currently only
- // support for pseudo-probe.
- if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- // The matching result will be saved to IRToProfileLocationMap, create a new
- // map for each function.
- runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
- getIRToProfileLocationMap(F));
- }
+ distributeIRToProfileLocationMap();
}
-void SampleProfileMatcher::runOnModule() {
+void SampleProfileMatcher::findFuncAnchors() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
- for (auto &F : M) {
+ for (const auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- runOnFunction(F);
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the
+ // callsites in profile only show up when they are hit by samples, i,e. the
+ // profile callsites in one context may differ from those in another
+ // context. To get the maximum number of callsites, we merge the function
+ // profiles from all contexts, aka, the flattened profile to find profile
+ // anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+
+ // Anchors for IR. It's a map from IR location to callee name, callee name
+ // is empty for non-call instruction and use a dummy
+ // name(UnknownIndirectCallee) for unknown indrect callee name.
+ auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
+ findIRAnchors(F, IR.first->second);
+
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
+ findProfileAnchors(*FSFlattened, P.first->second);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const {
+ auto It = FuncToMismatchCallsites.find(FS.getFuncName());
+ // Skip it if no mismatched callsite or this is an external function.
+ if (It == FuncToMismatchCallsites.end() || It->second.empty())
+ return;
+ const auto &MismatchCallsites = It->second;
+ for (const auto &I : FS.getBodySamples()) {
+ if (MismatchCallsites.count(I.first))
+ FuncMismatchedCallsiteSamples += I.second.getSamples();
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const auto &Loc = I.first;
+ if (MismatchCallsites.count(Loc)) {
+ for (const auto &CS : I.second)
+ FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
+ continue;
+ }
+
+ // count mismatched samples for inlined samples.
+ for (const auto &CS : I.second)
+ countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
+ FuncMismatchedCallsiteSamples);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ return;
+ const auto &IRAnchors = IR->second;
+ const auto &ProfileAnchors = P->second;
+
+ auto &MismatchCallsites =
+ FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+
+ StringRef IRCalleeName;
+ const auto &IR = IRAnchors.find(Loc);
+ if (IR != IRAnchors.end())
+ IRCalleeName = IR->second;
+ bool CallsiteIsMatched = false;
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == UnknownIndirectCallee)
+ CallsiteIsMatched = true;
+ else if (Callees.count(FunctionId(IRCalleeName)))
+ CallsiteIsMatched = true;
+
+ FuncProfiledCallsites++;
+ if (!CallsiteIsMatched) {
+ FuncMismatchedCallsites++;
+ MismatchCallsites.insert(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::countMismatchedHashes(const Function &F,
+ const FunctionSamples &FS) {
+ if (!FunctionSamples::ProfileIsProbeBased)
+ return;
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ NumMismatchedFuncHash++;
+ }
+ countMismatchedHashSamples(FS);
+ }
+}
+
+void SampleProfileMatcher::UpdateIRAnchors() {
+ for (auto &I : FuncIRAnchors) {
+ const auto *F = I.first;
+ auto &IRAnchors = I.second;
+ const auto Mapping =
+ FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
+ if (Mapping == FuncMappings.end())
+ continue;
+ IRAnchorMap UpdatedIRAnchors;
+ const auto &LocToLocMapping = Mapping->second;
+ for (const auto L : LocToLocMapping) {
+ UpdatedIRAnchors[L.second] = IRAnchors[L.first];
+ IRAnchors.erase(L.first);
+ }
+
+ for (const auto &IR : UpdatedIRAnchors) {
+ IRAnchors[IR.first] = IR.second;
+ }
+ }
+}
+
+void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
+ if (!ReportProfileStaleness && !PersistProfileStaleness)
+ return;
+
+ if (!IsPreMatch) {
+ // Use the profile matching results to update to the IR anchors.
+ UpdateIRAnchors();
+ }
+
+ uint64_t UnusedCounter = 0;
+ uint64_t *TotalProfiledCallsitesPtr =
+ IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
+ uint64_t *NumMismatchedCallsitesPtr =
+ IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
+ uint64_t *MismatchedCallsiteSamplesPtr =
+ IsPreMatch ? &MismatchedCallsiteSamples
+ : &PostMatchMismatchedCallsiteSamples;
+
+ auto SkipFunctionForReport = [](const Function &F) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ return true;
+ // Skip reporting the metrics for imported functions.
+ if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
+ return true;
+ return false;
+ };
+
+ StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ const auto *FS = Reader.getSamplesFor(F);
+ if (FS && IsPreMatch) {
+ // Only count the total function metrics once in pre-match time.
+ TotalFuncHashSamples += FS->getTotalSamples();
+ TotalProfiledFunc++;
+ countMismatchedHashes(F, *FS);
+ }
+ countMismatchedCallsites(F, FuncToMismatchCallsites,
+ *TotalProfiledCallsitesPtr,
+ *NumMismatchedCallsitesPtr);
+ }
+
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
+ *MismatchedCallsiteSamplesPtr);
+ }
+}
+
+void SampleProfileMatcher::runOnModule() {
----------------
WenleiHe wrote:
Could you explain why we need turn `runOnFunctions` into `runOnModule`? I understand that we now need to update anchor for reporting after matching, but what prevents us from doing that functions by function? Keep all anchors for all functions around also has a memory cost.
(Without that change and the renaming, or doing those separately, it'd be easier to review)
https://github.com/llvm/llvm-project/pull/79090
More information about the flang-commits
mailing list