[flang-commits] [libc] [lld] [libcxx] [libcxxabi] [compiler-rt] [llvm] [clang] [flang] [clang-tools-extra] [CSSPGO] Compute and report post-match profile staleness (PR #79090)
Lei Wang via flang-commits
flang-commits at lists.llvm.org
Fri Jan 26 17:52:48 PST 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/79090
>From 54784e26f33178efd21b0289a1f673d66ea26cc3 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 22 Jan 2024 19:16:26 -0800
Subject: [PATCH 1/3] [CSSPGO] Support post-match profile staleness metrics
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 440 +++++++++++-------
.../Inputs/profile-mismatch.prof | 7 +-
.../SampleProfile/profile-mismatch.ll | 12 +-
.../pseudo-probe-profile-mismatch-thinlto.ll | 6 +-
.../pseudo-probe-profile-mismatch.ll | 76 +--
5 files changed, 324 insertions(+), 217 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2fd8668d15e200f..a7170faa65dc07c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -433,12 +433,19 @@ using CandidateQueue =
PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
CandidateComparer>;
+using IRAnchorMap = std::map<LineLocation, StringRef>;
+using ProfileAnchorMap = std::map<LineLocation, std::unordered_set<FunctionId>>;
+
// Sample profile matching - fuzzy match.
class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
SampleProfileMap FlattenedProfiles;
+
+ std::unordered_map<const Function *, IRAnchorMap> FuncIRAnchors;
+ std::unordered_map<const Function *, ProfileAnchorMap> FuncProfileAnchors;
+
// For each function, the matcher generates a map, of which each entry is a
// mapping from the source location of current build to the source location in
// the profile.
@@ -448,6 +455,8 @@ class SampleProfileMatcher {
uint64_t TotalProfiledCallsites = 0;
uint64_t NumMismatchedCallsites = 0;
uint64_t MismatchedCallsiteSamples = 0;
+ uint64_t PostMatchNumMismatchedCallsites = 0;
+ uint64_t PostMatchMismatchedCallsiteSamples = 0;
uint64_t TotalCallsiteSamples = 0;
uint64_t TotalProfiledFunc = 0;
uint64_t NumMismatchedFuncHash = 0;
@@ -474,24 +483,22 @@ class SampleProfileMatcher {
return nullptr;
}
void runOnFunction(const Function &F);
- void findIRAnchors(const Function &F,
- std::map<LineLocation, StringRef> &IRAnchors);
- void findProfileAnchors(
+ void findFuncAnchors();
+ void UpdateIRAnchors();
+ void findIRAnchors(const Function &F, IRAnchorMap &IRAnchors);
+ void findProfileAnchors(const FunctionSamples &FS,
+ ProfileAnchorMap &ProfileAnchors);
+ void countMismatchedHashSamples(const FunctionSamples &FS);
+ void countProfileMismatches(bool IsPreMatch);
+ void countMismatchedHashes(const Function &F, const FunctionSamples &FS);
+ void countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const;
+ void countMismatchedCallsiteSamples(
const FunctionSamples &FS,
- std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors);
- void countMismatchedSamples(const FunctionSamples &FS);
- void countProfileMismatches(
- const Function &F, const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors);
- void countProfileCallsiteMismatches(
- const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const;
LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
@@ -499,11 +506,10 @@ class SampleProfileMatcher {
}
void distributeIRToProfileLocationMap();
void distributeIRToProfileLocationMap(FunctionSamples &FS);
- void runStaleProfileMatching(
- const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- LocToLocMap &IRToProfileLocationMap);
+ void runStaleProfileMatching();
+ void runStaleProfileMatching(const Function &F, const IRAnchorMap &IRAnchors,
+ const ProfileAnchorMap &ProfileAnchors,
+ LocToLocMap &IRToProfileLocationMap);
};
/// Sample profile pass.
@@ -1129,7 +1135,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
+
Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
@@ -2123,8 +2129,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-void SampleProfileMatcher::findIRAnchors(
- const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
+void SampleProfileMatcher::findIRAnchors(const Function &F,
+ IRAnchorMap &IRAnchors) {
// For inlined code, recover the original callsite and callee by finding the
// top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
// top-level frame is "main:1", the callsite is "1" and the callee is "foo".
@@ -2190,7 +2196,8 @@ void SampleProfileMatcher::findIRAnchors(
}
}
-void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
+void SampleProfileMatcher::countMismatchedHashSamples(
+ const FunctionSamples &FS) {
const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
// Skip the function that is external or renamed.
if (!FuncDesc)
@@ -2202,96 +2209,11 @@ void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
}
for (const auto &I : FS.getCallsiteSamples())
for (const auto &CS : I.second)
- countMismatchedSamples(CS.second);
-}
-
-void SampleProfileMatcher::countProfileMismatches(
- const Function &F, const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors) {
- [[maybe_unused]] bool IsFuncHashMismatch = false;
- if (FunctionSamples::ProfileIsProbeBased) {
- TotalFuncHashSamples += FS.getTotalSamples();
- TotalProfiledFunc++;
- const auto *FuncDesc = ProbeManager->getDesc(F);
- if (FuncDesc) {
- if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
- NumMismatchedFuncHash++;
- IsFuncHashMismatch = true;
- }
- countMismatchedSamples(FS);
- }
- }
-
- uint64_t FuncMismatchedCallsites = 0;
- uint64_t FuncProfiledCallsites = 0;
- countProfileCallsiteMismatches(FS, IRAnchors, ProfileAnchors,
- FuncMismatchedCallsites,
- FuncProfiledCallsites);
- TotalProfiledCallsites += FuncProfiledCallsites;
- NumMismatchedCallsites += FuncMismatchedCallsites;
- LLVM_DEBUG({
- if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
- FuncMismatchedCallsites)
- dbgs() << "Function checksum is matched but there are "
- << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
- << " mismatched callsites.\n";
- });
+ countMismatchedHashSamples(CS.second);
}
-void SampleProfileMatcher::countProfileCallsiteMismatches(
- const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
-
- // Check if there are any callsites in the profile that does not match to any
- // IR callsites, those callsite samples will be discarded.
- for (const auto &I : ProfileAnchors) {
- const auto &Loc = I.first;
- const auto &Callees = I.second;
- assert(!Callees.empty() && "Callees should not be empty");
-
- StringRef IRCalleeName;
- const auto &IR = IRAnchors.find(Loc);
- if (IR != IRAnchors.end())
- IRCalleeName = IR->second;
-
- // Compute number of samples in the original profile.
- uint64_t CallsiteSamples = 0;
- if (auto CTM = FS.findCallTargetMapAt(Loc)) {
- for (const auto &I : *CTM)
- CallsiteSamples += I.second;
- }
- const auto *FSMap = FS.findFunctionSamplesMapAt(Loc);
- if (FSMap) {
- for (const auto &I : *FSMap)
- CallsiteSamples += I.second.getTotalSamples();
- }
-
- bool CallsiteIsMatched = false;
- // Since indirect call does not have CalleeName, check conservatively if
- // callsite in the profile is a callsite location. This is to reduce num of
- // false positive since otherwise all the indirect call samples will be
- // reported as mismatching.
- if (IRCalleeName == UnknownIndirectCallee)
- CallsiteIsMatched = true;
- else if (Callees.size() == 1 && Callees.count(getRepInFormat(IRCalleeName)))
- CallsiteIsMatched = true;
-
- FuncProfiledCallsites++;
- TotalCallsiteSamples += CallsiteSamples;
- if (!CallsiteIsMatched) {
- FuncMismatchedCallsites++;
- MismatchedCallsiteSamples += CallsiteSamples;
- }
- }
-}
-
-void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
- std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
+void SampleProfileMatcher::findProfileAnchors(
+ const FunctionSamples &FS, ProfileAnchorMap &ProfileAnchors) {
auto isInvalidLineOffset = [](uint32_t LineOffset) {
return LineOffset & 0x8000;
};
@@ -2338,10 +2260,8 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
void SampleProfileMatcher::runStaleProfileMatching(
- const Function &F,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
+ const Function &F, const IRAnchorMap &IRAnchors,
+ const ProfileAnchorMap &ProfileAnchors,
LocToLocMap &IRToProfileLocationMap) {
LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
<< "\n");
@@ -2422,59 +2342,226 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F) {
- // We need to use flattened function samples for matching.
- // Unlike IR, which includes all callsites from the source code, the callsites
- // in profile only show up when they are hit by samples, i,e. the profile
- // callsites in one context may differ from those in another context. To get
- // the maximum number of callsites, we merge the function profiles from all
- // contexts, aka, the flattened profile to find profile anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- return;
+void SampleProfileMatcher::runStaleProfileMatching() {
+ for (const auto &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ continue;
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ continue;
- // Anchors for IR. It's a map from IR location to callee name, callee name is
- // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
- // for unknown indrect callee name.
- std::map<LineLocation, StringRef> IRAnchors;
- findIRAnchors(F, IRAnchors);
- // Anchors for profile. It's a map from callsite location to a set of callee
- // name.
- std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
-
- // Detect profile mismatch for profile staleness metrics report.
- // Skip reporting the metrics for imported functions.
- if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
- (ReportProfileStaleness || PersistProfileStaleness)) {
- // Use top-level nested FS for counting profile mismatch metrics since
- // currently once a callsite is mismatched, all its children profiles are
- // dropped.
- if (const auto *FS = Reader.getSamplesFor(F))
- countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
+ runStaleProfileMatching(F, IR->second, P->second,
+ getIRToProfileLocationMap(F));
+ }
}
- // Run profile matching for checksum mismatched profile, currently only
- // support for pseudo-probe.
- if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- // The matching result will be saved to IRToProfileLocationMap, create a new
- // map for each function.
- runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
- getIRToProfileLocationMap(F));
- }
+ distributeIRToProfileLocationMap();
}
-void SampleProfileMatcher::runOnModule() {
+void SampleProfileMatcher::findFuncAnchors() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
- for (auto &F : M) {
+ for (const auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- runOnFunction(F);
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the
+ // callsites in profile only show up when they are hit by samples, i,e. the
+ // profile callsites in one context may differ from those in another
+ // context. To get the maximum number of callsites, we merge the function
+ // profiles from all contexts, aka, the flattened profile to find profile
+ // anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+
+ // Anchors for IR. It's a map from IR location to callee name, callee name
+ // is empty for non-call instruction and use a dummy
+ // name(UnknownIndirectCallee) for unknown indrect callee name.
+ auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
+ findIRAnchors(F, IR.first->second);
+
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
+ findProfileAnchors(*FSFlattened, P.first->second);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const {
+ auto It = FuncToMismatchCallsites.find(FS.getFuncName());
+ // Skip it if no mismatched callsite or this is an external function.
+ if (It == FuncToMismatchCallsites.end() || It->second.empty())
+ return;
+ const auto &MismatchCallsites = It->second;
+ for (const auto &I : FS.getBodySamples()) {
+ if (MismatchCallsites.count(I.first))
+ FuncMismatchedCallsiteSamples += I.second.getSamples();
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const auto &Loc = I.first;
+ if (MismatchCallsites.count(Loc)) {
+ for (const auto &CS : I.second)
+ FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
+ continue;
+ }
+
+ // count mismatched samples for inlined samples.
+ for (const auto &CS : I.second)
+ countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
+ FuncMismatchedCallsiteSamples);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ return;
+ const auto &IRAnchors = IR->second;
+ const auto &ProfileAnchors = P->second;
+
+ auto &MismatchCallsites =
+ FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+
+ StringRef IRCalleeName;
+ const auto &IR = IRAnchors.find(Loc);
+ if (IR != IRAnchors.end())
+ IRCalleeName = IR->second;
+ bool CallsiteIsMatched = false;
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == UnknownIndirectCallee)
+ CallsiteIsMatched = true;
+ else if (Callees.count(FunctionId(IRCalleeName)))
+ CallsiteIsMatched = true;
+
+ FuncProfiledCallsites++;
+ if (!CallsiteIsMatched) {
+ FuncMismatchedCallsites++;
+ MismatchCallsites.insert(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::countMismatchedHashes(const Function &F,
+ const FunctionSamples &FS) {
+ if (!FunctionSamples::ProfileIsProbeBased)
+ return;
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ NumMismatchedFuncHash++;
+ }
+ countMismatchedHashSamples(FS);
+ }
+}
+
+void SampleProfileMatcher::UpdateIRAnchors() {
+ for (auto &I : FuncIRAnchors) {
+ const auto *F = I.first;
+ auto &IRAnchors = I.second;
+ const auto Mapping =
+ FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
+ if (Mapping == FuncMappings.end())
+ continue;
+ IRAnchorMap UpdatedIRAnchors;
+ const auto &LocToLocMapping = Mapping->second;
+ for (const auto L : LocToLocMapping) {
+ UpdatedIRAnchors[L.second] = IRAnchors[L.first];
+ IRAnchors.erase(L.first);
+ }
+
+ for (const auto &IR : UpdatedIRAnchors) {
+ IRAnchors[IR.first] = IR.second;
+ }
+ }
+}
+
+void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
+ if (!ReportProfileStaleness && !PersistProfileStaleness)
+ return;
+
+ if (!IsPreMatch) {
+ // Use the profile matching results to update to the IR anchors.
+ UpdateIRAnchors();
+ }
+
+ uint64_t UnusedCounter = 0;
+ uint64_t *TotalProfiledCallsitesPtr =
+ IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
+ uint64_t *NumMismatchedCallsitesPtr =
+ IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
+ uint64_t *MismatchedCallsiteSamplesPtr =
+ IsPreMatch ? &MismatchedCallsiteSamples
+ : &PostMatchMismatchedCallsiteSamples;
+
+ auto SkipFunctionForReport = [](const Function &F) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ return true;
+ // Skip reporting the metrics for imported functions.
+ if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
+ return true;
+ return false;
+ };
+
+ StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ const auto *FS = Reader.getSamplesFor(F);
+ if (FS && IsPreMatch) {
+ // Only count the total function metrics once in pre-match time.
+ TotalFuncHashSamples += FS->getTotalSamples();
+ TotalProfiledFunc++;
+ countMismatchedHashes(F, *FS);
+ }
+ countMismatchedCallsites(F, FuncToMismatchCallsites,
+ *TotalProfiledCallsitesPtr,
+ *NumMismatchedCallsitesPtr);
+ }
+
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
+ *MismatchedCallsiteSamplesPtr);
+ }
+}
+
+void SampleProfileMatcher::runOnModule() {
+ findFuncAnchors();
+ countProfileMismatches(true);
+
+ if (SalvageStaleProfile) {
+ runStaleProfileMatching();
+ countProfileMismatches(false);
}
- if (SalvageStaleProfile)
- distributeIRToProfileLocationMap();
if (ReportProfileStaleness) {
if (FunctionSamples::ProfileIsProbeBased) {
@@ -2487,9 +2574,18 @@ void SampleProfileMatcher::runOnModule() {
errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
<< ")"
<< " of callsites' profile are invalid and "
- << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
+ << "(" << MismatchedCallsiteSamples << "/" << TotalFuncHashSamples
<< ")"
<< " of samples are discarded due to callsite location mismatch.\n";
+ if (SalvageStaleProfile) {
+ errs() << "(" << PostMatchNumMismatchedCallsites << "/"
+ << TotalProfiledCallsites << ")"
+ << " of callsites' profile are invalid and "
+ << "(" << PostMatchMismatchedCallsiteSamples << "/"
+ << TotalFuncHashSamples << ")"
+ << " of samples are discarded due to callsite location mismatch "
+ "after stale profile matching.\n";
+ }
}
if (PersistProfileStaleness) {
@@ -2497,19 +2593,23 @@ void SampleProfileMatcher::runOnModule() {
MDBuilder MDB(Ctx);
SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
+ ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
+ ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
+ ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
+ MismatchedCallsiteSamples);
+ ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
if (FunctionSamples::ProfileIsProbeBased) {
- ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
+ ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
MismatchedFuncHashSamples);
- ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
}
-
- ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
- ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
- ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
- MismatchedCallsiteSamples);
- ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
+ if (SalvageStaleProfile) {
+ ProfStatsVec.emplace_back("PostMatchNumMismatchedCallsites",
+ PostMatchNumMismatchedCallsites);
+ ProfStatsVec.emplace_back("PostMatchMismatchedCallsiteSamples",
+ PostMatchMismatchedCallsiteSamples);
+ }
auto *MD = MDB.createLLVMStats(ProfStatsVec);
auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
index 818a048b8cabb84..f2a00e789b8b669 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
@@ -2,14 +2,15 @@ main:30:0
0: 0
1.1: 0
3: 10 matched:10
- 4: 10
- 5: 10 bar_mismatch:10
+ 7: 10
8: 0
- 7: foo:15
+ 4: foo:15
1: 5
2: 5
3: inlinee_mismatch:5
1: 5
+ 5: bar_mismatch:10
+ 1: 10
bar:10:10
1: 10
matched:10:10
diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
index d86175c02dbb423..e7c5dece1235b57 100644
--- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -6,9 +6,9 @@
; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM
-; CHECK: (2/3) of callsites' profile are invalid and (25/35) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/4) of callsites' profile are invalid and (15/50) of samples are discarded due to callsite location mismatch.
-; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 25, !"TotalCallsiteSamples", i64 35}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 15, !"TotalFuncHashSamples", i64 50}
; CHECK-OBJ: .llvm_stats
@@ -20,15 +20,15 @@
; CHECK-ASM: .byte 22
; CHECK-ASM: .ascii "TotalProfiledCallsites"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .ascii "NA=="
; CHECK-ASM: .byte 25
; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MjU="
+; CHECK-ASM: .ascii "MTU="
; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalCallsiteSamples"
+; CHECK-ASM: .ascii "TotalFuncHashSamples"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MzU="
+; CHECK-ASM: .ascii "NTA="
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
index 29c3a142cc68f8f..7f848da74a53cee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
@@ -4,7 +4,7 @@
; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
; CHECK: (1/1) of functions' profile are invalid and (6822/6822) of samples are discarded due to function hash mismatch.
-; CHECK: (4/4) of callsites' profile are invalid and (5026/5026) of samples are discarded due to callsite location mismatch.
+; CHECK: (4/4) of callsites' profile are invalid and (5026/6822) of samples are discarded due to callsite location mismatch.
+; CHECK: (0/4) of callsites' profile are invalid and (0/6822) of samples are discarded due to callsite location mismatch after stale profile matching.
-
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"TotalFuncHashSamples", i64 6822, !"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalCallsiteSamples", i64 5026}
+; CHECK-MD: !{!"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalFuncHashSamples", i64 6822, !"TotalProfiledFunc", i64 1, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"PostMatchNumMismatchedCallsites", i64 0, !"PostMatchMismatchedCallsiteSamples", i64 0}
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
index 4b6edf821376c0b..5c5bb1f0fae647f 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
@@ -10,45 +10,51 @@
; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
-; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch after stale profile matching.
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 3, !"MismatchedFuncHashSamples", i64 10, !"TotalFuncHashSamples", i64 50, !"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalFuncHashSamples", i64 50, !"TotalProfiledFunc", i64 3, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 10, !"PostMatchNumMismatchedCallsites", i64 2, !"PostMatchMismatchedCallsiteSamples", i64 20}
; CHECK-OBJ: .llvm_stats
-; CHECK-ASM: .section .llvm_stats,"", at progbits
-; CHECK-ASM: .byte 21
-; CHECK-ASM: .ascii "NumMismatchedFuncHash"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MQ=="
-; CHECK-ASM: .byte 17
-; CHECK-ASM: .ascii "TotalProfiledFunc"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mw=="
-; CHECK-ASM: .byte 25
-; CHECK-ASM: .ascii "MismatchedFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MTA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "NTA="
-; CHECK-ASM: .byte 22
-; CHECK-ASM: .ascii "NumMismatchedCallsites"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mg=="
-; CHECK-ASM: .byte 22
-; CHECK-ASM: .ascii "TotalProfiledCallsites"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mw=="
-; CHECK-ASM: .byte 25
-; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MjA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalCallsiteSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MzA="
+
+; CHECK-ASM: .section .llvm_stats,"", at progbits
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii "NumMismatchedCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mg=="
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii "TotalProfiledCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MjA="
+; CHECK-ASM: .byte 20
+; CHECK-ASM: .ascii "TotalFuncHashSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "NTA="
+; CHECK-ASM: .byte 17
+; CHECK-ASM: .ascii "TotalProfiledFunc"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .byte 21
+; CHECK-ASM: .ascii "NumMismatchedFuncHash"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MQ=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii "MismatchedFuncHashSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MTA="
+; CHECK-ASM: .byte 31
+; CHECK-ASM: .ascii "PostMatchNumMismatchedCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mg=="
+; CHECK-ASM: .byte 34
+; CHECK-ASM: .ascii "PostMatchMismatchedCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MjA="
; CHECK-NESTED: (1/2) of functions' profile are invalid and (211/311) of samples are discarded due to function hash mismatch.
>From c63688b3b837b1ce21ecb66bba846f09d9f2ae74 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 26 Jan 2024 17:52:12 -0800
Subject: [PATCH 2/3] [CSSPGO] Support post-match profile staleness metrics
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a7170faa65dc07c..c232b9339146a89 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -457,7 +457,6 @@ class SampleProfileMatcher {
uint64_t MismatchedCallsiteSamples = 0;
uint64_t PostMatchNumMismatchedCallsites = 0;
uint64_t PostMatchMismatchedCallsiteSamples = 0;
- uint64_t TotalCallsiteSamples = 0;
uint64_t TotalProfiledFunc = 0;
uint64_t NumMismatchedFuncHash = 0;
uint64_t MismatchedFuncHashSamples = 0;
>From 380b6628cd6c89d91ea27402696c9604cacf761f Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 26 Jan 2024 10:14:35 -0800
Subject: [PATCH 3/3] Encapsulate mismatch counting into a new class
ProfileMatchStats
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 590 +++++++++---------
.../Inputs/profile-mismatch.prof | 1 -
.../SampleProfile/profile-mismatch.ll | 4 +-
.../pseudo-probe-profile-mismatch-thinlto.ll | 4 +-
.../pseudo-probe-profile-mismatch.ll | 19 +-
5 files changed, 317 insertions(+), 301 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index c232b9339146a89..0743cb8f78204c7 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -433,8 +433,43 @@ using CandidateQueue =
PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
CandidateComparer>;
-using IRAnchorMap = std::map<LineLocation, StringRef>;
-using ProfileAnchorMap = std::map<LineLocation, std::unordered_set<FunctionId>>;
+// Profile matching statstics.
+class ProfileMatchStats {
+ const Module &M;
+ SampleProfileReader &Reader;
+ const PseudoProbeManager *ProbeManager;
+
+public:
+ ProfileMatchStats(const Module &M, SampleProfileReader &Reader,
+ const PseudoProbeManager *ProbeManager)
+ : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
+
+ uint64_t NumMismatchedCallsites = 0;
+ uint64_t TotalProfiledCallsites = 0;
+ uint64_t MismatchedCallsiteSamples = 0;
+ uint64_t NumMismatchedFuncHash = 0;
+ uint64_t TotalProfiledFunc = 0;
+ uint64_t MismatchedFuncHashSamples = 0;
+ uint64_t TotalFunctionSamples = 0;
+
+ // A map from function name to a set of mismatched callsite locations.
+ StringMap<std::set<LineLocation>> FuncMismatchedCallsites;
+
+ void countMismatchedSamples(const FunctionSamples &FS);
+ void countProfileMismatches(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors);
+ void countMismatchedCallsites(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
+ const LocToLocMap &IRToProfileLocationMap);
+ void countMismatchedCallsiteSamples(const FunctionSamples &FS);
+ void countMismatchedCallsiteSamples();
+ void copyUnchangedCallsiteMismatches(
+ const StringMap<std::set<LineLocation>> &InputMismatchedCallsites);
+};
// Sample profile matching - fuzzy match.
class SampleProfileMatcher {
@@ -442,37 +477,27 @@ class SampleProfileMatcher {
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
SampleProfileMap FlattenedProfiles;
-
- std::unordered_map<const Function *, IRAnchorMap> FuncIRAnchors;
- std::unordered_map<const Function *, ProfileAnchorMap> FuncProfileAnchors;
-
// For each function, the matcher generates a map, of which each entry is a
// mapping from the source location of current build to the source location in
// the profile.
StringMap<LocToLocMap> FuncMappings;
- // Profile mismatching statstics.
- uint64_t TotalProfiledCallsites = 0;
- uint64_t NumMismatchedCallsites = 0;
- uint64_t MismatchedCallsiteSamples = 0;
- uint64_t PostMatchNumMismatchedCallsites = 0;
- uint64_t PostMatchMismatchedCallsiteSamples = 0;
- uint64_t TotalProfiledFunc = 0;
- uint64_t NumMismatchedFuncHash = 0;
- uint64_t MismatchedFuncHashSamples = 0;
- uint64_t TotalFuncHashSamples = 0;
-
- // A dummy name for unknown indirect callee, used to differentiate from a
- // non-call instruction that also has an empty callee name.
- static constexpr const char *UnknownIndirectCallee =
- "unknown.indirect.callee";
+ ProfileMatchStats PreMatchStats;
+ ProfileMatchStats PostMatchStats;
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager){};
+ : M(M), Reader(Reader), ProbeManager(ProbeManager),
+ PreMatchStats(M, Reader, ProbeManager),
+ PostMatchStats(M, Reader, ProbeManager){};
void runOnModule();
+ // A dummy name for unknown indirect callee, used to differentiate from a
+ // non-call instruction that also has an empty callee name.
+ static constexpr const char *UnknownIndirectCallee =
+ "unknown.indirect.callee";
+
private:
FunctionSamples *getFlattenedSamplesFor(const Function &F) {
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
@@ -482,22 +507,11 @@ class SampleProfileMatcher {
return nullptr;
}
void runOnFunction(const Function &F);
- void findFuncAnchors();
- void UpdateIRAnchors();
- void findIRAnchors(const Function &F, IRAnchorMap &IRAnchors);
- void findProfileAnchors(const FunctionSamples &FS,
- ProfileAnchorMap &ProfileAnchors);
- void countMismatchedHashSamples(const FunctionSamples &FS);
- void countProfileMismatches(bool IsPreMatch);
- void countMismatchedHashes(const Function &F, const FunctionSamples &FS);
- void countMismatchedCallsites(
- const Function &F,
- StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
- uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const;
- void countMismatchedCallsiteSamples(
+ void findIRAnchors(const Function &F,
+ std::map<LineLocation, StringRef> &IRAnchors);
+ void findProfileAnchors(
const FunctionSamples &FS,
- StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
- uint64_t &FuncMismatchedCallsiteSamples) const;
+ std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors);
LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
@@ -505,10 +519,12 @@ class SampleProfileMatcher {
}
void distributeIRToProfileLocationMap();
void distributeIRToProfileLocationMap(FunctionSamples &FS);
- void runStaleProfileMatching();
- void runStaleProfileMatching(const Function &F, const IRAnchorMap &IRAnchors,
- const ProfileAnchorMap &ProfileAnchors,
- LocToLocMap &IRToProfileLocationMap);
+ void runStaleProfileMatching(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
+ LocToLocMap &IRToProfileLocationMap);
+ void reportOrPersistProfileStats();
};
/// Sample profile pass.
@@ -695,6 +711,10 @@ void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
}
} // namespace llvm
+bool ShouldSkipProfileLoading(const Function &F) {
+ return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
+}
+
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (FunctionSamples::ProfileIsProbeBased)
return getProbeWeight(Inst);
@@ -2128,8 +2148,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-void SampleProfileMatcher::findIRAnchors(const Function &F,
- IRAnchorMap &IRAnchors) {
+void SampleProfileMatcher::findIRAnchors(
+ const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
// For inlined code, recover the original callsite and callee by finding the
// top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
// top-level frame is "main:1", the callsite is "1" and the callee is "foo".
@@ -2195,8 +2215,7 @@ void SampleProfileMatcher::findIRAnchors(const Function &F,
}
}
-void SampleProfileMatcher::countMismatchedHashSamples(
- const FunctionSamples &FS) {
+void ProfileMatchStats::countMismatchedSamples(const FunctionSamples &FS) {
const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
// Skip the function that is external or renamed.
if (!FuncDesc)
@@ -2208,11 +2227,144 @@ void SampleProfileMatcher::countMismatchedHashSamples(
}
for (const auto &I : FS.getCallsiteSamples())
for (const auto &CS : I.second)
- countMismatchedHashSamples(CS.second);
+ countMismatchedSamples(CS.second);
+}
+
+void ProfileMatchStats::countMismatchedCallsites(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
+ const LocToLocMap &IRToProfileLocationMap) {
+ auto &MismatchedCallsites =
+ FuncMismatchedCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+ auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
+ const auto &ProfileLoc = IRToProfileLocationMap.find(IRLoc);
+ if (ProfileLoc != IRToProfileLocationMap.end())
+ return ProfileLoc->second;
+ else
+ return IRLoc;
+ };
+
+ std::set<LineLocation> MatchedCallsites;
+ for (const auto &I : IRAnchors) {
+ // In post-match, use the matching result to remap the current IR callsite.
+ const auto &Loc = MapIRLocToProfileLoc(I.first);
+ const auto &IRCalleeName = I.second;
+ const auto &It = ProfileAnchors.find(Loc);
+ if (It == ProfileAnchors.end())
+ continue;
+ const auto &Callees = It->second;
+
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
+ MatchedCallsites.insert(Loc);
+ else if (Callees.count(getRepInFormat(IRCalleeName)))
+ MatchedCallsites.insert(Loc);
+ }
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ [[maybe_unused]] const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+ TotalProfiledCallsites++;
+ if (!MatchedCallsites.count(Loc)) {
+ NumMismatchedCallsites++;
+ MismatchedCallsites.insert(Loc);
+ }
+ }
+}
+
+void ProfileMatchStats::countProfileMismatches(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors) {
+ [[maybe_unused]] bool IsFuncHashMismatch = false;
+ // Use top-level nested FS for counting profile mismatch metrics since
+ // currently once a callsite is mismatched, all its children profiles are
+ // dropped.
+ if (const auto *FS = Reader.getSamplesFor(F)) {
+ TotalProfiledFunc++;
+ TotalFunctionSamples += FS->getTotalSamples();
+ if (FunctionSamples::ProfileIsProbeBased) {
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, *FS)) {
+ NumMismatchedFuncHash++;
+ IsFuncHashMismatch = true;
+ }
+ countMismatchedSamples(*FS);
+ }
+ }
+ }
+
+ countMismatchedCallsites(F, IRAnchors, ProfileAnchors, LocToLocMap());
+ LLVM_DEBUG({
+ auto It = FuncMismatchedCallsites.find(
+ FunctionSamples::getCanonicalFnName(F.getName()));
+ if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
+ It != FuncMismatchedCallsites.end() && !It->second.empty())
+ dbgs() << "Function checksum is matched but there are "
+ << It->second.size() << " mismatched callsites.\n";
+ });
+}
+
+void ProfileMatchStats::countMismatchedCallsiteSamples(
+ const FunctionSamples &FS) {
+ auto It = FuncMismatchedCallsites.find(FS.getFuncName());
+ // Skip it if no mismatched callsite or this is an external function.
+ if (It == FuncMismatchedCallsites.end() || It->second.empty())
+ return;
+ const auto &MismatchCallsites = It->second;
+
+ for (const auto &I : FS.getBodySamples()) {
+ if (MismatchCallsites.count(I.first))
+ MismatchedCallsiteSamples += I.second.getSamples();
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const auto &Loc = I.first;
+ if (MismatchCallsites.count(Loc)) {
+ for (const auto &CS : I.second)
+ MismatchedCallsiteSamples += CS.second.getTotalSamples();
+ continue;
+ }
+
+ // Count mismatched samples for inlined functions.
+ for (const auto &CS : I.second)
+ countMismatchedCallsiteSamples(CS.second);
+ }
+}
+
+void ProfileMatchStats::countMismatchedCallsiteSamples() {
+ if (FuncMismatchedCallsites.empty())
+ return;
+ for (const auto &F : M) {
+ if (ShouldSkipProfileLoading(F))
+ continue;
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countMismatchedCallsiteSamples(*FS);
+ }
+}
+
+void ProfileMatchStats::copyUnchangedCallsiteMismatches(
+ const StringMap<std::set<LineLocation>> &InputMismatchedCallsites) {
+ for (const auto &I : InputMismatchedCallsites) {
+ auto It = FuncMismatchedCallsites.find(I.first());
+ if (It != FuncMismatchedCallsites.end())
+ continue;
+ FuncMismatchedCallsites.try_emplace(I.first(), I.second);
+ }
}
void SampleProfileMatcher::findProfileAnchors(
- const FunctionSamples &FS, ProfileAnchorMap &ProfileAnchors) {
+ const FunctionSamples &FS,
+ std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
auto isInvalidLineOffset = [](uint32_t LineOffset) {
return LineOffset & 0x8000;
};
@@ -2259,8 +2411,9 @@ void SampleProfileMatcher::findProfileAnchors(
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
void SampleProfileMatcher::runStaleProfileMatching(
- const Function &F, const IRAnchorMap &IRAnchors,
- const ProfileAnchorMap &ProfileAnchors,
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
LocToLocMap &IRToProfileLocationMap) {
LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
<< "\n");
@@ -2341,249 +2494,79 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runStaleProfileMatching() {
- for (const auto &F : M) {
- if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
- continue;
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- continue;
- auto IR = FuncIRAnchors.find(&F);
- auto P = FuncProfileAnchors.find(&F);
- if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
- continue;
-
- // Run profile matching for checksum mismatched profile, currently only
- // support for pseudo-probe.
- if (FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- runStaleProfileMatching(F, IR->second, P->second,
- getIRToProfileLocationMap(F));
- }
- }
-
- distributeIRToProfileLocationMap();
-}
-
-void SampleProfileMatcher::findFuncAnchors() {
- ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
- FunctionSamples::ProfileIsCS);
- for (const auto &F : M) {
- if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
- continue;
- // We need to use flattened function samples for matching.
- // Unlike IR, which includes all callsites from the source code, the
- // callsites in profile only show up when they are hit by samples, i,e. the
- // profile callsites in one context may differ from those in another
- // context. To get the maximum number of callsites, we merge the function
- // profiles from all contexts, aka, the flattened profile to find profile
- // anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- continue;
-
- // Anchors for IR. It's a map from IR location to callee name, callee name
- // is empty for non-call instruction and use a dummy
- // name(UnknownIndirectCallee) for unknown indrect callee name.
- auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
- findIRAnchors(F, IR.first->second);
-
- // Anchors for profile. It's a map from callsite location to a set of callee
- // name.
- auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
- findProfileAnchors(*FSFlattened, P.first->second);
- }
-}
-
-void SampleProfileMatcher::countMismatchedCallsiteSamples(
- const FunctionSamples &FS,
- StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
- uint64_t &FuncMismatchedCallsiteSamples) const {
- auto It = FuncToMismatchCallsites.find(FS.getFuncName());
- // Skip it if no mismatched callsite or this is an external function.
- if (It == FuncToMismatchCallsites.end() || It->second.empty())
- return;
- const auto &MismatchCallsites = It->second;
- for (const auto &I : FS.getBodySamples()) {
- if (MismatchCallsites.count(I.first))
- FuncMismatchedCallsiteSamples += I.second.getSamples();
- }
-
- for (const auto &I : FS.getCallsiteSamples()) {
- const auto &Loc = I.first;
- if (MismatchCallsites.count(Loc)) {
- for (const auto &CS : I.second)
- FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
- continue;
- }
-
- // count mismatched samples for inlined samples.
- for (const auto &CS : I.second)
- countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
- FuncMismatchedCallsiteSamples);
- }
-}
-
-void SampleProfileMatcher::countMismatchedCallsites(
- const Function &F,
- StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
- uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
- auto IR = FuncIRAnchors.find(&F);
- auto P = FuncProfileAnchors.find(&F);
- if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
- return;
- const auto &IRAnchors = IR->second;
- const auto &ProfileAnchors = P->second;
-
- auto &MismatchCallsites =
- FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
-
- // Check if there are any callsites in the profile that does not match to any
- // IR callsites, those callsite samples will be discarded.
- for (const auto &I : ProfileAnchors) {
- const auto &Loc = I.first;
- const auto &Callees = I.second;
- assert(!Callees.empty() && "Callees should not be empty");
-
- StringRef IRCalleeName;
- const auto &IR = IRAnchors.find(Loc);
- if (IR != IRAnchors.end())
- IRCalleeName = IR->second;
- bool CallsiteIsMatched = false;
- // Since indirect call does not have CalleeName, check conservatively if
- // callsite in the profile is a callsite location. This is to reduce num of
- // false positive since otherwise all the indirect call samples will be
- // reported as mismatching.
- if (IRCalleeName == UnknownIndirectCallee)
- CallsiteIsMatched = true;
- else if (Callees.count(FunctionId(IRCalleeName)))
- CallsiteIsMatched = true;
-
- FuncProfiledCallsites++;
- if (!CallsiteIsMatched) {
- FuncMismatchedCallsites++;
- MismatchCallsites.insert(Loc);
- }
- }
-}
-
-void SampleProfileMatcher::countMismatchedHashes(const Function &F,
- const FunctionSamples &FS) {
- if (!FunctionSamples::ProfileIsProbeBased)
+void SampleProfileMatcher::runOnFunction(const Function &F) {
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the callsites
+ // in profile only show up when they are hit by samples, i,e. the profile
+ // callsites in one context may differ from those in another context. To get
+ // the maximum number of callsites, we merge the function profiles from all
+ // contexts, aka, the flattened profile to find profile anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
return;
- const auto *FuncDesc = ProbeManager->getDesc(F);
- if (FuncDesc) {
- if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
- NumMismatchedFuncHash++;
- }
- countMismatchedHashSamples(FS);
- }
-}
-
-void SampleProfileMatcher::UpdateIRAnchors() {
- for (auto &I : FuncIRAnchors) {
- const auto *F = I.first;
- auto &IRAnchors = I.second;
- const auto Mapping =
- FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
- if (Mapping == FuncMappings.end())
- continue;
- IRAnchorMap UpdatedIRAnchors;
- const auto &LocToLocMapping = Mapping->second;
- for (const auto L : LocToLocMapping) {
- UpdatedIRAnchors[L.second] = IRAnchors[L.first];
- IRAnchors.erase(L.first);
- }
-
- for (const auto &IR : UpdatedIRAnchors) {
- IRAnchors[IR.first] = IR.second;
- }
- }
-}
-
-void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
- if (!ReportProfileStaleness && !PersistProfileStaleness)
- return;
-
- if (!IsPreMatch) {
- // Use the profile matching results to update to the IR anchors.
- UpdateIRAnchors();
- }
-
- uint64_t UnusedCounter = 0;
- uint64_t *TotalProfiledCallsitesPtr =
- IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
- uint64_t *NumMismatchedCallsitesPtr =
- IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
- uint64_t *MismatchedCallsiteSamplesPtr =
- IsPreMatch ? &MismatchedCallsiteSamples
- : &PostMatchMismatchedCallsiteSamples;
-
- auto SkipFunctionForReport = [](const Function &F) {
- if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
- return true;
- // Skip reporting the metrics for imported functions.
- if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
- return true;
- return false;
- };
- StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
- for (const auto &F : M) {
- if (SkipFunctionForReport(F))
- continue;
- const auto *FS = Reader.getSamplesFor(F);
- if (FS && IsPreMatch) {
- // Only count the total function metrics once in pre-match time.
- TotalFuncHashSamples += FS->getTotalSamples();
- TotalProfiledFunc++;
- countMismatchedHashes(F, *FS);
- }
- countMismatchedCallsites(F, FuncToMismatchCallsites,
- *TotalProfiledCallsitesPtr,
- *NumMismatchedCallsitesPtr);
- }
-
- for (const auto &F : M) {
- if (SkipFunctionForReport(F))
- continue;
- if (const auto *FS = Reader.getSamplesFor(F))
- countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
- *MismatchedCallsiteSamplesPtr);
+ // Anchors for IR. It's a map from IR location to callee name, callee name is
+ // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
+ // for unknown indrect callee name.
+ std::map<LineLocation, StringRef> IRAnchors;
+ findIRAnchors(F, IRAnchors);
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
+ findProfileAnchors(*FSFlattened, ProfileAnchors);
+
+ // Detect profile mismatch for profile staleness metrics report.
+ // Skip reporting the metrics for imported functions.
+ if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
+ (ReportProfileStaleness || PersistProfileStaleness)) {
+ PreMatchStats.countProfileMismatches(F, IRAnchors, ProfileAnchors);
+ }
+
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
+ // The matching result will be saved to IRToProfileLocationMap, create a new
+ // map for each function.
+ auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
+ runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
+ IRToProfileLocationMap);
+ PostMatchStats.countMismatchedCallsites(F, IRAnchors, ProfileAnchors,
+ IRToProfileLocationMap);
}
}
-void SampleProfileMatcher::runOnModule() {
- findFuncAnchors();
- countProfileMismatches(true);
-
- if (SalvageStaleProfile) {
- runStaleProfileMatching();
- countProfileMismatches(false);
- }
-
+void SampleProfileMatcher::reportOrPersistProfileStats() {
if (ReportProfileStaleness) {
if (FunctionSamples::ProfileIsProbeBased) {
- errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
+ errs() << "(" << PreMatchStats.NumMismatchedFuncHash << "/"
+ << PreMatchStats.TotalProfiledFunc << ")"
<< " of functions' profile are invalid and "
- << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples
- << ")"
+ << " (" << PreMatchStats.MismatchedFuncHashSamples << "/"
+ << PreMatchStats.TotalFunctionSamples << ")"
<< " of samples are discarded due to function hash mismatch.\n";
}
- errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
- << ")"
+ errs() << "(" << PreMatchStats.NumMismatchedCallsites << "/"
+ << PreMatchStats.TotalProfiledCallsites << ")"
<< " of callsites' profile are invalid and "
- << "(" << MismatchedCallsiteSamples << "/" << TotalFuncHashSamples
- << ")"
+ << "(" << PreMatchStats.MismatchedCallsiteSamples << "/"
+ << PreMatchStats.TotalFunctionSamples << ")"
<< " of samples are discarded due to callsite location mismatch.\n";
if (SalvageStaleProfile) {
- errs() << "(" << PostMatchNumMismatchedCallsites << "/"
- << TotalProfiledCallsites << ")"
- << " of callsites' profile are invalid and "
- << "(" << PostMatchMismatchedCallsiteSamples << "/"
- << TotalFuncHashSamples << ")"
- << " of samples are discarded due to callsite location mismatch "
- "after stale profile matching.\n";
+ uint64_t NumRecoveredCallsites = PostMatchStats.TotalProfiledCallsites -
+ PostMatchStats.NumMismatchedCallsites;
+ uint64_t NumMismatchedCallsites =
+ PreMatchStats.NumMismatchedCallsites - NumRecoveredCallsites;
+ errs() << "Out of " << PostMatchStats.TotalProfiledCallsites
+ << " callsites used for profile matching, "
+ << NumRecoveredCallsites
+ << " callsites have been recovered. After the matching, ("
+ << NumMismatchedCallsites << "/"
+ << PreMatchStats.TotalProfiledCallsites
+ << ") of callsites are still invalid ("
+ << PostMatchStats.MismatchedCallsiteSamples << "/"
+ << PreMatchStats.TotalFunctionSamples << ")"
+ << " of samples are still discarded.\n";
}
}
@@ -2592,22 +2575,29 @@ void SampleProfileMatcher::runOnModule() {
MDBuilder MDB(Ctx);
SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
- ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
- ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
+ ProfStatsVec.emplace_back("NumMismatchedCallsites",
+ PreMatchStats.NumMismatchedCallsites);
+ ProfStatsVec.emplace_back("TotalProfiledCallsites",
+ PreMatchStats.TotalProfiledCallsites);
ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
- MismatchedCallsiteSamples);
- ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
+ PreMatchStats.MismatchedCallsiteSamples);
+ ProfStatsVec.emplace_back("TotalProfiledFunc",
+ PreMatchStats.TotalProfiledFunc);
+ ProfStatsVec.emplace_back("TotalFunctionSamples",
+ PreMatchStats.TotalFunctionSamples);
if (FunctionSamples::ProfileIsProbeBased) {
- ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
- ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
+ ProfStatsVec.emplace_back("NumMismatchedFuncHash",
+ PreMatchStats.NumMismatchedFuncHash);
ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
- MismatchedFuncHashSamples);
+ PreMatchStats.MismatchedFuncHashSamples);
}
if (SalvageStaleProfile) {
ProfStatsVec.emplace_back("PostMatchNumMismatchedCallsites",
- PostMatchNumMismatchedCallsites);
+ PostMatchStats.NumMismatchedCallsites);
+ ProfStatsVec.emplace_back("NumCallsitesForMatching",
+ PostMatchStats.TotalProfiledCallsites);
ProfStatsVec.emplace_back("PostMatchMismatchedCallsiteSamples",
- PostMatchMismatchedCallsiteSamples);
+ PostMatchStats.MismatchedCallsiteSamples);
}
auto *MD = MDB.createLLVMStats(ProfStatsVec);
@@ -2616,6 +2606,30 @@ void SampleProfileMatcher::runOnModule() {
}
}
+void SampleProfileMatcher::runOnModule() {
+ ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
+ for (auto &F : M) {
+ if (ShouldSkipProfileLoading(F))
+ continue;
+ runOnFunction(F);
+ }
+
+ if (SalvageStaleProfile)
+ distributeIRToProfileLocationMap();
+
+ PreMatchStats.countMismatchedCallsiteSamples();
+ if (SalvageStaleProfile) {
+ // If a function doesn't run the matching but has mismatched callsites, this
+ // won't be any data for that function in post-match stats, so just reuse
+ // the pre-match stats.
+ PostMatchStats.copyUnchangedCallsiteMismatches(
+ PreMatchStats.FuncMismatchedCallsites);
+ PostMatchStats.countMismatchedCallsiteSamples();
+ }
+ reportOrPersistProfileStats();
+}
+
void SampleProfileMatcher::distributeIRToProfileLocationMap(
FunctionSamples &FS) {
const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
index f2a00e789b8b669..241d0914a376414 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
@@ -14,4 +14,3 @@ main:30:0
bar:10:10
1: 10
matched:10:10
- 1: 10
diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
index e7c5dece1235b57..14e384d7964ab00 100644
--- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -8,7 +8,7 @@
; CHECK: (2/4) of callsites' profile are invalid and (15/50) of samples are discarded due to callsite location mismatch.
-; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 15, !"TotalFuncHashSamples", i64 50}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 15, !"TotalProfiledFunc", i64 3, !"TotalFunctionSamples", i64 50}
; CHECK-OBJ: .llvm_stats
@@ -26,7 +26,7 @@
; CHECK-ASM: .byte 4
; CHECK-ASM: .ascii "MTU="
; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalFuncHashSamples"
+; CHECK-ASM: .ascii "TotalFunctionSamples"
; CHECK-ASM: .byte 4
; CHECK-ASM: .ascii "NTA="
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
index 7f848da74a53cee..768fe5509f33a9a 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
@@ -5,6 +5,6 @@
; CHECK: (1/1) of functions' profile are invalid and (6822/6822) of samples are discarded due to function hash mismatch.
; CHECK: (4/4) of callsites' profile are invalid and (5026/6822) of samples are discarded due to callsite location mismatch.
-; CHECK: (0/4) of callsites' profile are invalid and (0/6822) of samples are discarded due to callsite location mismatch after stale profile matching.
+; CHECK: Out of 4 callsites used for profile matching, 4 callsites have been recovered. After the matching, (0/4) of callsites are still invalid (0/6822) of samples are still discarded.
-; CHECK-MD: !{!"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalFuncHashSamples", i64 6822, !"TotalProfiledFunc", i64 1, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"PostMatchNumMismatchedCallsites", i64 0, !"PostMatchMismatchedCallsiteSamples", i64 0}
+; CHECK-MD: !{!"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalProfiledFunc", i64 1, !"TotalFunctionSamples", i64 6822, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"PostMatchNumMismatchedCallsites", i64 0, !"NumCallsitesForMatching", i64 4, !"PostMatchMismatchedCallsiteSamples", i64 0}
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
index 5c5bb1f0fae647f..9949b5fd41f407c 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
@@ -11,13 +11,12 @@
; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch.
-; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch after stale profile matching.
+; CHECK: Out of 0 callsites used for profile matching, 0 callsites have been recovered. After the matching, (2/3) of callsites are still invalid (20/50) of samples are still discarded.
-; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalFuncHashSamples", i64 50, !"TotalProfiledFunc", i64 3, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 10, !"PostMatchNumMismatchedCallsites", i64 2, !"PostMatchMismatchedCallsiteSamples", i64 20}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalProfiledFunc", i64 3, !"TotalFunctionSamples", i64 50, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 10, !"PostMatchNumMismatchedCallsites", i64 0, !"NumCallsitesForMatching", i64 0, !"PostMatchMismatchedCallsiteSamples", i64 20}
; CHECK-OBJ: .llvm_stats
-
; CHECK-ASM: .section .llvm_stats,"", at progbits
; CHECK-ASM: .byte 22
; CHECK-ASM: .ascii "NumMismatchedCallsites"
@@ -31,14 +30,14 @@
; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
; CHECK-ASM: .byte 4
; CHECK-ASM: .ascii "MjA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "NTA="
; CHECK-ASM: .byte 17
; CHECK-ASM: .ascii "TotalProfiledFunc"
; CHECK-ASM: .byte 4
; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .byte 20
+; CHECK-ASM: .ascii "TotalFunctionSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "NTA="
; CHECK-ASM: .byte 21
; CHECK-ASM: .ascii "NumMismatchedFuncHash"
; CHECK-ASM: .byte 4
@@ -50,7 +49,11 @@
; CHECK-ASM: .byte 31
; CHECK-ASM: .ascii "PostMatchNumMismatchedCallsites"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mg=="
+; CHECK-ASM: .ascii "MA=="
+; CHECK-ASM: .byte 23
+; CHECK-ASM: .ascii "NumCallsitesForMatching"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MA=="
; CHECK-ASM: .byte 34
; CHECK-ASM: .ascii "PostMatchMismatchedCallsiteSamples"
; CHECK-ASM: .byte 4
More information about the flang-commits
mailing list