[llvm] [SampleFDO] Stale profile call-graph matching (PR #95135)
Lei Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 16:28:26 PDT 2024
================
@@ -590,14 +688,237 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
}
}
+void SampleProfileMatcher::findNewIRFunctions() {
+ // TODO: Support MD5 profile.
+ if (FunctionSamples::UseMD5)
+ return;
+ StringSet<> NamesInProfile;
+ if (auto NameTable = Reader.getNameTable()) {
+ for (auto Name : *NameTable)
+ NamesInProfile.insert(Name.stringRef());
+ }
+
+ for (auto &F : M) {
+ // Skip declarations, as even if the function can be matched, we have
+ // nothing to do with it.
+ if (F.isDeclaration())
+ continue;
+
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
+ const auto *FS = getFlattenedSamplesFor(F);
+ if (FS)
+ continue;
+
+ // For extended binary, functions are fully inlined may not be loaded in the
+ // top-level profile, so check the NameTable which has the all symbol names
+ // in profile.
+ if (NamesInProfile.count(CanonFName))
+ continue;
+
+ // For extended binary, non-profiled function symbols are in the profile
+ // symbol list table.
+ if (PSL && PSL->contains(CanonFName))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Function " << CanonFName
+ << " is not in profile or symbol list table.\n");
+ NewIRFunctions[FunctionId(CanonFName)] = &F;
+ }
+}
+
+bool SampleProfileMatcher::functionMatchesProfileHelper(
+ const Function &IRFunc, const FunctionId &ProfFunc) {
+ // The value is in the range [0, 1]. The bigger the value is, the more similar
+ // two sequences are.
+ float Similarity = 0.0;
+
+ const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+ assert(FSFlattened && "Flattened profile sample is null");
+ // Similarity check may not be reliable if the function is tiny, we use the
+ // number of basic block as a proxy for the function complexity and skip the
+ // matching if it's too small.
+ if (IRFunc.size() < MinFuncCountForCGMatching ||
+ FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
+ return false;
+
+ // For probe-based function, we first trust the checksum info. If the checksum
+ // doesn't match, we continue checking for similarity.
+ if (FunctionSamples::ProfileIsProbeBased) {
+ const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
+ if (FuncDesc &&
+ !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+ LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
+ << "(IR) and " << ProfFunc << "(Profile) match.\n");
+
+ return true;
+ }
+ }
----------------
wlei-llvm wrote:
this is intentional, the reason is checksum may not be reliable for tiny function too( I saw some cases the block num is only one but the function are different).
https://github.com/llvm/llvm-project/pull/95135
More information about the llvm-commits
mailing list