[libcxx-commits] [libc] [llvm] [clang-tools-extra] [libcxxabi] [flang] [lld] [compiler-rt] [clang] [libcxx] [CSSPGO] Compute and report post-match profile staleness (PR #79090)
Lei Wang via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Jan 24 15:25:09 PST 2024
================
@@ -2422,59 +2342,226 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F) {
- // We need to use flattened function samples for matching.
- // Unlike IR, which includes all callsites from the source code, the callsites
- // in profile only show up when they are hit by samples, i,e. the profile
- // callsites in one context may differ from those in another context. To get
- // the maximum number of callsites, we merge the function profiles from all
- // contexts, aka, the flattened profile to find profile anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- return;
+void SampleProfileMatcher::runStaleProfileMatching() {
+ for (const auto &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ continue;
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ continue;
- // Anchors for IR. It's a map from IR location to callee name, callee name is
- // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
- // for unknown indrect callee name.
- std::map<LineLocation, StringRef> IRAnchors;
- findIRAnchors(F, IRAnchors);
- // Anchors for profile. It's a map from callsite location to a set of callee
- // name.
- std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
-
- // Detect profile mismatch for profile staleness metrics report.
- // Skip reporting the metrics for imported functions.
- if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
- (ReportProfileStaleness || PersistProfileStaleness)) {
- // Use top-level nested FS for counting profile mismatch metrics since
- // currently once a callsite is mismatched, all its children profiles are
- // dropped.
- if (const auto *FS = Reader.getSamplesFor(F))
- countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
+ runStaleProfileMatching(F, IR->second, P->second,
+ getIRToProfileLocationMap(F));
+ }
}
- // Run profile matching for checksum mismatched profile, currently only
- // support for pseudo-probe.
- if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- // The matching result will be saved to IRToProfileLocationMap, create a new
- // map for each function.
- runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
- getIRToProfileLocationMap(F));
- }
+ distributeIRToProfileLocationMap();
}
-void SampleProfileMatcher::runOnModule() {
+void SampleProfileMatcher::findFuncAnchors() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
- for (auto &F : M) {
+ for (const auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- runOnFunction(F);
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the
+ // callsites in profile only show up when they are hit by samples, i,e. the
+ // profile callsites in one context may differ from those in another
+ // context. To get the maximum number of callsites, we merge the function
+ // profiles from all contexts, aka, the flattened profile to find profile
+ // anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+
+ // Anchors for IR. It's a map from IR location to callee name, callee name
+ // is empty for non-call instruction and use a dummy
+ // name(UnknownIndirectCallee) for unknown indrect callee name.
+ auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
+ findIRAnchors(F, IR.first->second);
+
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
+ findProfileAnchors(*FSFlattened, P.first->second);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const {
+ auto It = FuncToMismatchCallsites.find(FS.getFuncName());
+ // Skip it if no mismatched callsite or this is an external function.
+ if (It == FuncToMismatchCallsites.end() || It->second.empty())
+ return;
+ const auto &MismatchCallsites = It->second;
+ for (const auto &I : FS.getBodySamples()) {
+ if (MismatchCallsites.count(I.first))
+ FuncMismatchedCallsiteSamples += I.second.getSamples();
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const auto &Loc = I.first;
+ if (MismatchCallsites.count(Loc)) {
+ for (const auto &CS : I.second)
+ FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
+ continue;
+ }
+
+ // count mismatched samples for inlined samples.
+ for (const auto &CS : I.second)
+ countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
+ FuncMismatchedCallsiteSamples);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ return;
+ const auto &IRAnchors = IR->second;
+ const auto &ProfileAnchors = P->second;
+
+ auto &MismatchCallsites =
+ FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+
+ StringRef IRCalleeName;
+ const auto &IR = IRAnchors.find(Loc);
+ if (IR != IRAnchors.end())
+ IRCalleeName = IR->second;
+ bool CallsiteIsMatched = false;
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == UnknownIndirectCallee)
+ CallsiteIsMatched = true;
+ else if (Callees.count(FunctionId(IRCalleeName)))
+ CallsiteIsMatched = true;
+
+ FuncProfiledCallsites++;
+ if (!CallsiteIsMatched) {
+ FuncMismatchedCallsites++;
+ MismatchCallsites.insert(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::countMismatchedHashes(const Function &F,
+ const FunctionSamples &FS) {
+ if (!FunctionSamples::ProfileIsProbeBased)
+ return;
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ NumMismatchedFuncHash++;
+ }
+ countMismatchedHashSamples(FS);
+ }
+}
+
+void SampleProfileMatcher::UpdateIRAnchors() {
+ for (auto &I : FuncIRAnchors) {
+ const auto *F = I.first;
+ auto &IRAnchors = I.second;
+ const auto Mapping =
+ FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
+ if (Mapping == FuncMappings.end())
+ continue;
+ IRAnchorMap UpdatedIRAnchors;
+ const auto &LocToLocMapping = Mapping->second;
+ for (const auto L : LocToLocMapping) {
+ UpdatedIRAnchors[L.second] = IRAnchors[L.first];
+ IRAnchors.erase(L.first);
+ }
+
+ for (const auto &IR : UpdatedIRAnchors) {
+ IRAnchors[IR.first] = IR.second;
+ }
+ }
+}
+
+void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
+ if (!ReportProfileStaleness && !PersistProfileStaleness)
+ return;
+
+ if (!IsPreMatch) {
+ // Use the profile matching results to update to the IR anchors.
+ UpdateIRAnchors();
+ }
+
+ uint64_t UnusedCounter = 0;
+ uint64_t *TotalProfiledCallsitesPtr =
+ IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
+ uint64_t *NumMismatchedCallsitesPtr =
+ IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
+ uint64_t *MismatchedCallsiteSamplesPtr =
+ IsPreMatch ? &MismatchedCallsiteSamples
+ : &PostMatchMismatchedCallsiteSamples;
+
+ auto SkipFunctionForReport = [](const Function &F) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ return true;
+ // Skip reporting the metrics for imported functions.
+ if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
+ return true;
+ return false;
+ };
+
+ StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ const auto *FS = Reader.getSamplesFor(F);
+ if (FS && IsPreMatch) {
+ // Only count the total function metrics once in pre-match time.
+ TotalFuncHashSamples += FS->getTotalSamples();
+ TotalProfiledFunc++;
+ countMismatchedHashes(F, *FS);
+ }
+ countMismatchedCallsites(F, FuncToMismatchCallsites,
+ *TotalProfiledCallsitesPtr,
+ *NumMismatchedCallsitesPtr);
+ }
+
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
+ *MismatchedCallsiteSamplesPtr);
+ }
+}
+
+void SampleProfileMatcher::runOnModule() {
----------------
wlei-llvm wrote:
Here it's because we need to count the inlineed/nested callsite samples. (before we only count the top-level callsite samples)
e.g.
```
main : 1001:1
1:1
2: foo:1000:1
1: 1
bar: 999
```
Supposing the processing order is main --> foo --> bar
Before it only count for function main, and it will miss the bar's samples, because at the time, function bar is not processed, there is no anchors or any mismatch info.
Now I changed to find all the anchors and doing a mismatch computation, the results is saved into a map<function to mismatch-locations>, then count the samples recursively(see `countMismatchedCallsiteSamples`)
that's why each process it needs to "runOnModule", this indeed need a lot of new function name which is confusing.
https://github.com/llvm/llvm-project/pull/79090
More information about the libcxx-commits
mailing list