[flang-commits] [flang] [libcxx] [libcxxabi] [libc] [llvm] [clang] [compiler-rt] [clang-tools-extra] [lld] [CSSPGO] Compute and report post-match profile staleness (PR #79090)
Lei Wang via flang-commits
flang-commits at lists.llvm.org
Tue Jan 23 10:07:13 PST 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/79090
>From 54784e26f33178efd21b0289a1f673d66ea26cc3 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 22 Jan 2024 19:16:26 -0800
Subject: [PATCH] [CSSPGO] Support post-match profile staleness metrics
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 440 +++++++++++-------
.../Inputs/profile-mismatch.prof | 7 +-
.../SampleProfile/profile-mismatch.ll | 12 +-
.../pseudo-probe-profile-mismatch-thinlto.ll | 6 +-
.../pseudo-probe-profile-mismatch.ll | 76 +--
5 files changed, 324 insertions(+), 217 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2fd8668d15e200f..a7170faa65dc07c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -433,12 +433,19 @@ using CandidateQueue =
PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
CandidateComparer>;
+using IRAnchorMap = std::map<LineLocation, StringRef>;
+using ProfileAnchorMap = std::map<LineLocation, std::unordered_set<FunctionId>>;
+
// Sample profile matching - fuzzy match.
class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
SampleProfileMap FlattenedProfiles;
+
+ std::unordered_map<const Function *, IRAnchorMap> FuncIRAnchors;
+ std::unordered_map<const Function *, ProfileAnchorMap> FuncProfileAnchors;
+
// For each function, the matcher generates a map, of which each entry is a
// mapping from the source location of current build to the source location in
// the profile.
@@ -448,6 +455,8 @@ class SampleProfileMatcher {
uint64_t TotalProfiledCallsites = 0;
uint64_t NumMismatchedCallsites = 0;
uint64_t MismatchedCallsiteSamples = 0;
+ uint64_t PostMatchNumMismatchedCallsites = 0;
+ uint64_t PostMatchMismatchedCallsiteSamples = 0;
uint64_t TotalCallsiteSamples = 0;
uint64_t TotalProfiledFunc = 0;
uint64_t NumMismatchedFuncHash = 0;
@@ -474,24 +483,22 @@ class SampleProfileMatcher {
return nullptr;
}
void runOnFunction(const Function &F);
- void findIRAnchors(const Function &F,
- std::map<LineLocation, StringRef> &IRAnchors);
- void findProfileAnchors(
+ void findFuncAnchors();
+ void UpdateIRAnchors();
+ void findIRAnchors(const Function &F, IRAnchorMap &IRAnchors);
+ void findProfileAnchors(const FunctionSamples &FS,
+ ProfileAnchorMap &ProfileAnchors);
+ void countMismatchedHashSamples(const FunctionSamples &FS);
+ void countProfileMismatches(bool IsPreMatch);
+ void countMismatchedHashes(const Function &F, const FunctionSamples &FS);
+ void countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const;
+ void countMismatchedCallsiteSamples(
const FunctionSamples &FS,
- std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors);
- void countMismatchedSamples(const FunctionSamples &FS);
- void countProfileMismatches(
- const Function &F, const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors);
- void countProfileCallsiteMismatches(
- const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const;
LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
@@ -499,11 +506,10 @@ class SampleProfileMatcher {
}
void distributeIRToProfileLocationMap();
void distributeIRToProfileLocationMap(FunctionSamples &FS);
- void runStaleProfileMatching(
- const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- LocToLocMap &IRToProfileLocationMap);
+ void runStaleProfileMatching();
+ void runStaleProfileMatching(const Function &F, const IRAnchorMap &IRAnchors,
+ const ProfileAnchorMap &ProfileAnchors,
+ LocToLocMap &IRToProfileLocationMap);
};
/// Sample profile pass.
@@ -1129,7 +1135,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
+
Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
@@ -2123,8 +2129,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-void SampleProfileMatcher::findIRAnchors(
- const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
+void SampleProfileMatcher::findIRAnchors(const Function &F,
+ IRAnchorMap &IRAnchors) {
// For inlined code, recover the original callsite and callee by finding the
// top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
// top-level frame is "main:1", the callsite is "1" and the callee is "foo".
@@ -2190,7 +2196,8 @@ void SampleProfileMatcher::findIRAnchors(
}
}
-void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
+void SampleProfileMatcher::countMismatchedHashSamples(
+ const FunctionSamples &FS) {
const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
// Skip the function that is external or renamed.
if (!FuncDesc)
@@ -2202,96 +2209,11 @@ void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
}
for (const auto &I : FS.getCallsiteSamples())
for (const auto &CS : I.second)
- countMismatchedSamples(CS.second);
-}
-
-void SampleProfileMatcher::countProfileMismatches(
- const Function &F, const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors) {
- [[maybe_unused]] bool IsFuncHashMismatch = false;
- if (FunctionSamples::ProfileIsProbeBased) {
- TotalFuncHashSamples += FS.getTotalSamples();
- TotalProfiledFunc++;
- const auto *FuncDesc = ProbeManager->getDesc(F);
- if (FuncDesc) {
- if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
- NumMismatchedFuncHash++;
- IsFuncHashMismatch = true;
- }
- countMismatchedSamples(FS);
- }
- }
-
- uint64_t FuncMismatchedCallsites = 0;
- uint64_t FuncProfiledCallsites = 0;
- countProfileCallsiteMismatches(FS, IRAnchors, ProfileAnchors,
- FuncMismatchedCallsites,
- FuncProfiledCallsites);
- TotalProfiledCallsites += FuncProfiledCallsites;
- NumMismatchedCallsites += FuncMismatchedCallsites;
- LLVM_DEBUG({
- if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
- FuncMismatchedCallsites)
- dbgs() << "Function checksum is matched but there are "
- << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
- << " mismatched callsites.\n";
- });
+ countMismatchedHashSamples(CS.second);
}
-void SampleProfileMatcher::countProfileCallsiteMismatches(
- const FunctionSamples &FS,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
-
- // Check if there are any callsites in the profile that does not match to any
- // IR callsites, those callsite samples will be discarded.
- for (const auto &I : ProfileAnchors) {
- const auto &Loc = I.first;
- const auto &Callees = I.second;
- assert(!Callees.empty() && "Callees should not be empty");
-
- StringRef IRCalleeName;
- const auto &IR = IRAnchors.find(Loc);
- if (IR != IRAnchors.end())
- IRCalleeName = IR->second;
-
- // Compute number of samples in the original profile.
- uint64_t CallsiteSamples = 0;
- if (auto CTM = FS.findCallTargetMapAt(Loc)) {
- for (const auto &I : *CTM)
- CallsiteSamples += I.second;
- }
- const auto *FSMap = FS.findFunctionSamplesMapAt(Loc);
- if (FSMap) {
- for (const auto &I : *FSMap)
- CallsiteSamples += I.second.getTotalSamples();
- }
-
- bool CallsiteIsMatched = false;
- // Since indirect call does not have CalleeName, check conservatively if
- // callsite in the profile is a callsite location. This is to reduce num of
- // false positive since otherwise all the indirect call samples will be
- // reported as mismatching.
- if (IRCalleeName == UnknownIndirectCallee)
- CallsiteIsMatched = true;
- else if (Callees.size() == 1 && Callees.count(getRepInFormat(IRCalleeName)))
- CallsiteIsMatched = true;
-
- FuncProfiledCallsites++;
- TotalCallsiteSamples += CallsiteSamples;
- if (!CallsiteIsMatched) {
- FuncMismatchedCallsites++;
- MismatchedCallsiteSamples += CallsiteSamples;
- }
- }
-}
-
-void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
- std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
+void SampleProfileMatcher::findProfileAnchors(
+ const FunctionSamples &FS, ProfileAnchorMap &ProfileAnchors) {
auto isInvalidLineOffset = [](uint32_t LineOffset) {
return LineOffset & 0x8000;
};
@@ -2338,10 +2260,8 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
void SampleProfileMatcher::runStaleProfileMatching(
- const Function &F,
- const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
+ const Function &F, const IRAnchorMap &IRAnchors,
+ const ProfileAnchorMap &ProfileAnchors,
LocToLocMap &IRToProfileLocationMap) {
LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
<< "\n");
@@ -2422,59 +2342,226 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F) {
- // We need to use flattened function samples for matching.
- // Unlike IR, which includes all callsites from the source code, the callsites
- // in profile only show up when they are hit by samples, i,e. the profile
- // callsites in one context may differ from those in another context. To get
- // the maximum number of callsites, we merge the function profiles from all
- // contexts, aka, the flattened profile to find profile anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (!FSFlattened)
- return;
+void SampleProfileMatcher::runStaleProfileMatching() {
+ for (const auto &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ continue;
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ continue;
- // Anchors for IR. It's a map from IR location to callee name, callee name is
- // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
- // for unknown indrect callee name.
- std::map<LineLocation, StringRef> IRAnchors;
- findIRAnchors(F, IRAnchors);
- // Anchors for profile. It's a map from callsite location to a set of callee
- // name.
- std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
-
- // Detect profile mismatch for profile staleness metrics report.
- // Skip reporting the metrics for imported functions.
- if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
- (ReportProfileStaleness || PersistProfileStaleness)) {
- // Use top-level nested FS for counting profile mismatch metrics since
- // currently once a callsite is mismatched, all its children profiles are
- // dropped.
- if (const auto *FS = Reader.getSamplesFor(F))
- countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
+ runStaleProfileMatching(F, IR->second, P->second,
+ getIRToProfileLocationMap(F));
+ }
}
- // Run profile matching for checksum mismatched profile, currently only
- // support for pseudo-probe.
- if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- // The matching result will be saved to IRToProfileLocationMap, create a new
- // map for each function.
- runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
- getIRToProfileLocationMap(F));
- }
+ distributeIRToProfileLocationMap();
}
-void SampleProfileMatcher::runOnModule() {
+void SampleProfileMatcher::findFuncAnchors() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
- for (auto &F : M) {
+ for (const auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- runOnFunction(F);
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the
+ // callsites in profile only show up when they are hit by samples, i,e. the
+ // profile callsites in one context may differ from those in another
+ // context. To get the maximum number of callsites, we merge the function
+ // profiles from all contexts, aka, the flattened profile to find profile
+ // anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ continue;
+
+ // Anchors for IR. It's a map from IR location to callee name, callee name
+ // is empty for non-call instruction and use a dummy
+ // name(UnknownIndirectCallee) for unknown indrect callee name.
+ auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
+ findIRAnchors(F, IR.first->second);
+
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
+ findProfileAnchors(*FSFlattened, P.first->second);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncMismatchedCallsiteSamples) const {
+ auto It = FuncToMismatchCallsites.find(FS.getFuncName());
+ // Skip it if no mismatched callsite or this is an external function.
+ if (It == FuncToMismatchCallsites.end() || It->second.empty())
+ return;
+ const auto &MismatchCallsites = It->second;
+ for (const auto &I : FS.getBodySamples()) {
+ if (MismatchCallsites.count(I.first))
+ FuncMismatchedCallsiteSamples += I.second.getSamples();
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const auto &Loc = I.first;
+ if (MismatchCallsites.count(Loc)) {
+ for (const auto &CS : I.second)
+ FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
+ continue;
+ }
+
+ // count mismatched samples for inlined samples.
+ for (const auto &CS : I.second)
+ countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
+ FuncMismatchedCallsiteSamples);
+ }
+}
+
+void SampleProfileMatcher::countMismatchedCallsites(
+ const Function &F,
+ StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+ uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
+ auto IR = FuncIRAnchors.find(&F);
+ auto P = FuncProfileAnchors.find(&F);
+ if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+ return;
+ const auto &IRAnchors = IR->second;
+ const auto &ProfileAnchors = P->second;
+
+ auto &MismatchCallsites =
+ FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+
+ StringRef IRCalleeName;
+ const auto &IR = IRAnchors.find(Loc);
+ if (IR != IRAnchors.end())
+ IRCalleeName = IR->second;
+ bool CallsiteIsMatched = false;
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == UnknownIndirectCallee)
+ CallsiteIsMatched = true;
+ else if (Callees.count(FunctionId(IRCalleeName)))
+ CallsiteIsMatched = true;
+
+ FuncProfiledCallsites++;
+ if (!CallsiteIsMatched) {
+ FuncMismatchedCallsites++;
+ MismatchCallsites.insert(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::countMismatchedHashes(const Function &F,
+ const FunctionSamples &FS) {
+ if (!FunctionSamples::ProfileIsProbeBased)
+ return;
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ NumMismatchedFuncHash++;
+ }
+ countMismatchedHashSamples(FS);
+ }
+}
+
+void SampleProfileMatcher::UpdateIRAnchors() {
+ for (auto &I : FuncIRAnchors) {
+ const auto *F = I.first;
+ auto &IRAnchors = I.second;
+ const auto Mapping =
+ FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
+ if (Mapping == FuncMappings.end())
+ continue;
+ IRAnchorMap UpdatedIRAnchors;
+ const auto &LocToLocMapping = Mapping->second;
+ for (const auto L : LocToLocMapping) {
+ UpdatedIRAnchors[L.second] = IRAnchors[L.first];
+ IRAnchors.erase(L.first);
+ }
+
+ for (const auto &IR : UpdatedIRAnchors) {
+ IRAnchors[IR.first] = IR.second;
+ }
+ }
+}
+
+void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
+ if (!ReportProfileStaleness && !PersistProfileStaleness)
+ return;
+
+ if (!IsPreMatch) {
+ // Use the profile matching results to update to the IR anchors.
+ UpdateIRAnchors();
+ }
+
+ uint64_t UnusedCounter = 0;
+ uint64_t *TotalProfiledCallsitesPtr =
+ IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
+ uint64_t *NumMismatchedCallsitesPtr =
+ IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
+ uint64_t *MismatchedCallsiteSamplesPtr =
+ IsPreMatch ? &MismatchedCallsiteSamples
+ : &PostMatchMismatchedCallsiteSamples;
+
+ auto SkipFunctionForReport = [](const Function &F) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+ return true;
+ // Skip reporting the metrics for imported functions.
+ if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
+ return true;
+ return false;
+ };
+
+ StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ const auto *FS = Reader.getSamplesFor(F);
+ if (FS && IsPreMatch) {
+ // Only count the total function metrics once in pre-match time.
+ TotalFuncHashSamples += FS->getTotalSamples();
+ TotalProfiledFunc++;
+ countMismatchedHashes(F, *FS);
+ }
+ countMismatchedCallsites(F, FuncToMismatchCallsites,
+ *TotalProfiledCallsitesPtr,
+ *NumMismatchedCallsitesPtr);
+ }
+
+ for (const auto &F : M) {
+ if (SkipFunctionForReport(F))
+ continue;
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
+ *MismatchedCallsiteSamplesPtr);
+ }
+}
+
+void SampleProfileMatcher::runOnModule() {
+ findFuncAnchors();
+ countProfileMismatches(true);
+
+ if (SalvageStaleProfile) {
+ runStaleProfileMatching();
+ countProfileMismatches(false);
}
- if (SalvageStaleProfile)
- distributeIRToProfileLocationMap();
if (ReportProfileStaleness) {
if (FunctionSamples::ProfileIsProbeBased) {
@@ -2487,9 +2574,18 @@ void SampleProfileMatcher::runOnModule() {
errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
<< ")"
<< " of callsites' profile are invalid and "
- << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
+ << "(" << MismatchedCallsiteSamples << "/" << TotalFuncHashSamples
<< ")"
<< " of samples are discarded due to callsite location mismatch.\n";
+ if (SalvageStaleProfile) {
+ errs() << "(" << PostMatchNumMismatchedCallsites << "/"
+ << TotalProfiledCallsites << ")"
+ << " of callsites' profile are invalid and "
+ << "(" << PostMatchMismatchedCallsiteSamples << "/"
+ << TotalFuncHashSamples << ")"
+ << " of samples are discarded due to callsite location mismatch "
+ "after stale profile matching.\n";
+ }
}
if (PersistProfileStaleness) {
@@ -2497,19 +2593,23 @@ void SampleProfileMatcher::runOnModule() {
MDBuilder MDB(Ctx);
SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
+ ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
+ ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
+ ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
+ MismatchedCallsiteSamples);
+ ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
if (FunctionSamples::ProfileIsProbeBased) {
- ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
+ ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
MismatchedFuncHashSamples);
- ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
}
-
- ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
- ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
- ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
- MismatchedCallsiteSamples);
- ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
+ if (SalvageStaleProfile) {
+ ProfStatsVec.emplace_back("PostMatchNumMismatchedCallsites",
+ PostMatchNumMismatchedCallsites);
+ ProfStatsVec.emplace_back("PostMatchMismatchedCallsiteSamples",
+ PostMatchMismatchedCallsiteSamples);
+ }
auto *MD = MDB.createLLVMStats(ProfStatsVec);
auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
index 818a048b8cabb84..f2a00e789b8b669 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
@@ -2,14 +2,15 @@ main:30:0
0: 0
1.1: 0
3: 10 matched:10
- 4: 10
- 5: 10 bar_mismatch:10
+ 7: 10
8: 0
- 7: foo:15
+ 4: foo:15
1: 5
2: 5
3: inlinee_mismatch:5
1: 5
+ 5: bar_mismatch:10
+ 1: 10
bar:10:10
1: 10
matched:10:10
diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
index d86175c02dbb423..e7c5dece1235b57 100644
--- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -6,9 +6,9 @@
; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM
-; CHECK: (2/3) of callsites' profile are invalid and (25/35) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/4) of callsites' profile are invalid and (15/50) of samples are discarded due to callsite location mismatch.
-; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 25, !"TotalCallsiteSamples", i64 35}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 15, !"TotalFuncHashSamples", i64 50}
; CHECK-OBJ: .llvm_stats
@@ -20,15 +20,15 @@
; CHECK-ASM: .byte 22
; CHECK-ASM: .ascii "TotalProfiledCallsites"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .ascii "NA=="
; CHECK-ASM: .byte 25
; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MjU="
+; CHECK-ASM: .ascii "MTU="
; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalCallsiteSamples"
+; CHECK-ASM: .ascii "TotalFuncHashSamples"
; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MzU="
+; CHECK-ASM: .ascii "NTA="
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
index 29c3a142cc68f8f..7f848da74a53cee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
@@ -4,7 +4,7 @@
; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
; CHECK: (1/1) of functions' profile are invalid and (6822/6822) of samples are discarded due to function hash mismatch.
-; CHECK: (4/4) of callsites' profile are invalid and (5026/5026) of samples are discarded due to callsite location mismatch.
+; CHECK: (4/4) of callsites' profile are invalid and (5026/6822) of samples are discarded due to callsite location mismatch.
+; CHECK: (0/4) of callsites' profile are invalid and (0/6822) of samples are discarded due to callsite location mismatch after stale profile matching.
-
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"TotalFuncHashSamples", i64 6822, !"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalCallsiteSamples", i64 5026}
+; CHECK-MD: !{!"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalFuncHashSamples", i64 6822, !"TotalProfiledFunc", i64 1, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"PostMatchNumMismatchedCallsites", i64 0, !"PostMatchMismatchedCallsiteSamples", i64 0}
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
index 4b6edf821376c0b..5c5bb1f0fae647f 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
@@ -10,45 +10,51 @@
; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
-; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch after stale profile matching.
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 3, !"MismatchedFuncHashSamples", i64 10, !"TotalFuncHashSamples", i64 50, !"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalFuncHashSamples", i64 50, !"TotalProfiledFunc", i64 3, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 10, !"PostMatchNumMismatchedCallsites", i64 2, !"PostMatchMismatchedCallsiteSamples", i64 20}
; CHECK-OBJ: .llvm_stats
-; CHECK-ASM: .section .llvm_stats,"", at progbits
-; CHECK-ASM: .byte 21
-; CHECK-ASM: .ascii "NumMismatchedFuncHash"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MQ=="
-; CHECK-ASM: .byte 17
-; CHECK-ASM: .ascii "TotalProfiledFunc"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mw=="
-; CHECK-ASM: .byte 25
-; CHECK-ASM: .ascii "MismatchedFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MTA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "NTA="
-; CHECK-ASM: .byte 22
-; CHECK-ASM: .ascii "NumMismatchedCallsites"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mg=="
-; CHECK-ASM: .byte 22
-; CHECK-ASM: .ascii "TotalProfiledCallsites"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "Mw=="
-; CHECK-ASM: .byte 25
-; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MjA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii "TotalCallsiteSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii "MzA="
+
+; CHECK-ASM: .section .llvm_stats,"", at progbits
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii "NumMismatchedCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mg=="
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii "TotalProfiledCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii "MismatchedCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MjA="
+; CHECK-ASM: .byte 20
+; CHECK-ASM: .ascii "TotalFuncHashSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "NTA="
+; CHECK-ASM: .byte 17
+; CHECK-ASM: .ascii "TotalProfiledFunc"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mw=="
+; CHECK-ASM: .byte 21
+; CHECK-ASM: .ascii "NumMismatchedFuncHash"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MQ=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii "MismatchedFuncHashSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MTA="
+; CHECK-ASM: .byte 31
+; CHECK-ASM: .ascii "PostMatchNumMismatchedCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "Mg=="
+; CHECK-ASM: .byte 34
+; CHECK-ASM: .ascii "PostMatchMismatchedCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii "MjA="
; CHECK-NESTED: (1/2) of functions' profile are invalid and (211/311) of samples are discarded due to function hash mismatch.
More information about the flang-commits
mailing list