[llvm] [CSSPGO] Compute and report profile matching recovered callsites and samples (PR #79090)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 13:29:50 PST 2024


================
@@ -2443,53 +2372,222 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
   std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
   findProfileAnchors(*FSFlattened, ProfileAnchors);
 
-  // Detect profile mismatch for profile staleness metrics report.
-  // Skip reporting the metrics for imported functions.
-  if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
-      (ReportProfileStaleness || PersistProfileStaleness)) {
-    // Use top-level nested FS for counting profile mismatch metrics since
-    // currently once a callsite is mismatched, all its children profiles are
-    // dropped.
-    if (const auto *FS = Reader.getSamplesFor(F))
-      countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
-  }
+  // Compute the callsite match states for profile staleness report.
+  if (ReportProfileStaleness || PersistProfileStaleness)
+    computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors, LocToLocMap());
 
   // Run profile matching for checksum mismatched profile, currently only
   // support for pseudo-probe.
   if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
       !ProbeManager->profileIsValid(F, *FSFlattened)) {
     // The matching result will be saved to IRToProfileLocationMap, create a new
     // map for each function.
+    auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
     runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
-                            getIRToProfileLocationMap(F));
+                            IRToProfileLocationMap);
+    // Find and update callsite match states after matching.
+    if ((ReportProfileStaleness || PersistProfileStaleness) &&
+        !IRToProfileLocationMap.empty())
+      computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
+                                 IRToProfileLocationMap);
   }
 }
 
-void SampleProfileMatcher::runOnModule() {
-  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
-                                   FunctionSamples::ProfileIsCS);
-  for (auto &F : M) {
-    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+void SampleProfileMatcher::computeCallsiteMatchStates(
+    const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+    const std::map<LineLocation, std::unordered_set<FunctionId>>
+        &ProfileAnchors,
+    const LocToLocMap &IRToProfileLocationMap) {
+  // Use the matching result to determine if it's in post-match phrase.
+  bool IsPostMatch = !IRToProfileLocationMap.empty();
+  auto &MismatchedCallsites =
+      FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
+
+  auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
+    const auto &ProfileLoc = IRToProfileLocationMap.find(IRLoc);
+    if (ProfileLoc != IRToProfileLocationMap.end())
+      return ProfileLoc->second;
+    else
+      return IRLoc;
+  };
+
+  std::set<LineLocation> MatchedCallsites;
+  for (const auto &I : IRAnchors) {
+    // In post-match, use the matching result to remap the current IR callsite.
+    const auto &Loc = MapIRLocToProfileLoc(I.first);
+    const auto &IRCalleeName = I.second;
+    const auto &It = ProfileAnchors.find(Loc);
+    if (It == ProfileAnchors.end())
       continue;
-    runOnFunction(F);
+    const auto &Callees = It->second;
+
+    // Since indirect call does not have CalleeName, check conservatively if
+    // callsite in the profile is a callsite location. This is to reduce num of
+    // false positive since otherwise all the indirect call samples will be
+    // reported as mismatching.
+    if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
+      MatchedCallsites.insert(Loc);
+    // TODO : Ideally, we should ensure it's a direct callsite location(Callees
+    // size is 1). However, there may be a bug for profile merge(like ODR
+    // violation) that causes the callees size to be more than 1. After we fix
+    // the bug, we can remove this check.
+    else if (Callees.count(getRepInFormat(IRCalleeName)))
+      MatchedCallsites.insert(Loc);
+  }
+
+  // Check if there are any callsites in the profile that does not match to any
+  // IR callsites, those callsite samples will be discarded.
+  for (const auto &I : ProfileAnchors) {
+    const auto &Loc = I.first;
+    [[maybe_unused]] const auto &Callees = I.second;
+    assert(!Callees.empty() && "Callees should not be empty");
+    if (IsPostMatch) {
+      if (MatchedCallsites.count(Loc)) {
+        auto It = MismatchedCallsites.find(Loc);
+        if (It != MismatchedCallsites.end() &&
+            It->second == MatchState::Mismatched)
+          MismatchedCallsites.emplace(Loc, MatchState::Recovered);
+      } else
+        MismatchedCallsites.emplace(Loc, MatchState::Mismatched);
+    } else {
+      if (MatchedCallsites.count(Loc))
+        MismatchedCallsites.emplace(Loc, MatchState::Matched);
+      else
+        MismatchedCallsites.emplace(Loc, MatchState::Mismatched);
+    }
+  }
+}
+
+void SampleProfileMatcher::countMismatchedFuncSamples(
+    const FunctionSamples &FS) {
+  const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
+  // Skip the function that is external or renamed.
+  if (!FuncDesc)
+    return;
+
+  if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+    MismatchedFunctionSamples += FS.getTotalSamples();
+    return;
+  }
+  for (const auto &I : FS.getCallsiteSamples())
+    for (const auto &CS : I.second)
+      countMismatchedFuncSamples(CS.second);
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+    const FunctionSamples &FS) {
+  auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
+  // Skip it if no mismatched callsite or this is an external function.
+  if (It == FuncCallsiteMatchStates.end() || It->second.empty())
+    return;
+  const auto &MismatchCallsites = It->second;
+
+  auto IsCallsiteMismatched = [&](const LineLocation &Loc) {
+    auto It = MismatchCallsites.find(Loc);
+    if (It == MismatchCallsites.end())
+      return false;
+    return It->second == MatchState::Mismatched;
+  };
+
+  auto CountSamples = [&](const LineLocation &Loc, uint64_t Samples) {
+    auto It = MismatchCallsites.find(Loc);
+    if (It == MismatchCallsites.end())
+      return;
+    if (It->second == MatchState::Mismatched)
+      MismatchedCallsiteSamples += Samples;
+    else if (It->second == MatchState::Recovered)
+      RecoveredCallsiteSamples += Samples;
+  };
+
+  for (const auto &I : FS.getBodySamples())
+    CountSamples(I.first, I.second.getSamples());
+
+  for (const auto &I : FS.getCallsiteSamples()) {
+    uint64_t Samples = 0;
+    for (const auto &CS : I.second)
+      Samples += CS.second.getTotalSamples();
+
+    CountSamples(I.first, Samples);
+
+    if (IsCallsiteMismatched(I.first))
+      continue;
+
+    // Count mismatched samples for matched inlines.
+    for (const auto &CS : I.second)
+      countMismatchedCallsiteSamples(CS.second);
+  }
----------------
WenleiHe wrote:

Ok, I think we need a clearer flow. Currently `CountSamples` checks match state and do something, later `IsCallsiteMismatched` checks match state again and do something else. It's all tangled together. 

It'd be clearer if we can hoist the flow on checking match state, and then group together what needs to be happen to each match state. I think it's still doable even with what you mentioned above. Make sense? 

https://github.com/llvm/llvm-project/pull/79090


More information about the llvm-commits mailing list