[llvm] [SampleFDO] Stale profile renaming matching (PR #92151)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 20:50:13 PDT 2024
================
@@ -590,13 +609,260 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
}
}
-void SampleProfileMatcher::runOnModule() {
+// Find functions that don't show in the profile or profile symbol list, which
+// are supposed to be new functions. We use them as the targets for renaming
+// matching.
+void SampleProfileMatcher::findIRNewFunctions(
+ StringMap<Function *> &IRNewFunctions) {
+ // TODO: Support MD5 profile.
+ if (FunctionSamples::UseMD5)
+ return;
+ StringSet<> NamesInProfile;
+ if (auto NameTable = Reader.getNameTable()) {
+ for (auto Name : *NameTable)
+ NamesInProfile.insert(Name.stringRef());
+ }
+
+ for (auto &F : M) {
+ // Skip declarations, as even if the function can be recognized renamed, we
+ // have nothing to do with it.
+ if (F.isDeclaration())
+ continue;
+
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
+ const auto *FS = getFlattenedSamplesFor(F);
+ if (FS)
+ continue;
+
+ // For extended binary, the full function name symbols exits in the profile
+ // symbol list table.
+ if (NamesInProfile.count(CanonFName))
+ continue;
+
+ if (PSL && PSL->contains(CanonFName))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Function " << CanonFName
+ << " is not in profile or symbol list table.\n");
+ IRNewFunctions[CanonFName] = &F;
+ }
+}
+
+void SampleProfileMatcher::findIRNewCallees(
+ Function &Caller, const StringMap<Function *> &IRNewFunctions,
+ std::vector<Function *> &IRNewCallees) {
+ for (auto &BB : Caller) {
+ for (auto &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || isa<IntrinsicInst>(&I))
+ continue;
+ Function *Callee = CB->getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ StringRef CalleeName =
+ FunctionSamples::getCanonicalFnName(Callee->getName());
+ if (IRNewFunctions.count(CalleeName))
+ IRNewCallees.push_back(Callee);
+ }
+ }
+}
+
+// Use function similarity to determine if the function is renamed. Compute a
+// similarity ratio between two sequences which are the function callsite
+// anchors. The returned value is in the range [0, 1]. The bigger the value is,
+// the more similar two sequences are.
+float SampleProfileMatcher::checkFunctionSimilarity(
+ const Function &IRFunc, const FunctionId &ProfFName) {
+ AnchorMap IRAnchors;
+ findIRAnchors(IRFunc, IRAnchors);
+
+ AnchorMap ProfileAnchors;
+ const auto *FSFlattened = getFlattenedSamplesFor(ProfFName);
+ assert(FSFlattened && "Flattened profile sample is null");
+ findProfileAnchors(*FSFlattened, ProfileAnchors);
+
+ AnchorList FilteredProfileAnchorList;
+ AnchorList FilteredIRAnchorsList;
+ getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
+ FilteredProfileAnchorList);
+
+ // If the function is probe based, we trust the checksum info to check the
+ // similarity. Otherwise, if the checksum is mismatched, continue computing
+ // the similarity.
+ if (FunctionSamples::ProfileIsProbeBased) {
+ const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
+ // Make sure function is complex enough.
+ if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
----------------
WenleiHe wrote:
Also since this is for filtering out small functions, can we simply use IR function's size to filter rather than using anchor counts to infer function size and then filter?
https://github.com/llvm/llvm-project/pull/92151
More information about the llvm-commits
mailing list