[llvm] [SampleFDO] Stale profile renaming matching (PR #92151)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 17 22:39:40 PDT 2024


================
@@ -590,13 +609,260 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   }
 }
 
-void SampleProfileMatcher::runOnModule() {
+// Find functions that don't show in the profile or profile symbol list, which
+// are supposed to be new functions. We use them as the targets for renaming
+// matching.
+void SampleProfileMatcher::findIRNewFunctions(
+    StringMap<Function *> &IRNewFunctions) {
+  // TODO: Support MD5 profile.
+  if (FunctionSamples::UseMD5)
+    return;
+  StringSet<> NamesInProfile;
+  if (auto NameTable = Reader.getNameTable()) {
+    for (auto Name : *NameTable)
+      NamesInProfile.insert(Name.stringRef());
+  }
+
+  for (auto &F : M) {
+    // Skip declarations, as even if the function can be recognized renamed, we
+    // have nothing to do with it.
+    if (F.isDeclaration())
+      continue;
+
+    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
+    const auto *FS = getFlattenedSamplesFor(F);
+    if (FS)
+      continue;
+
+    // For extended binary, the full function name symbols exits in the profile
+    // symbol list table.
+    if (NamesInProfile.count(CanonFName))
+      continue;
+
+    if (PSL && PSL->contains(CanonFName))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Function " << CanonFName
+                      << " is not in profile or symbol list table.\n");
+    IRNewFunctions[CanonFName] = &F;
+  }
+}
+
+void SampleProfileMatcher::findIRNewCallees(
+    Function &Caller, const StringMap<Function *> &IRNewFunctions,
+    std::vector<Function *> &IRNewCallees) {
+  for (auto &BB : Caller) {
+    for (auto &I : BB) {
+      const auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB || isa<IntrinsicInst>(&I))
+        continue;
+      Function *Callee = CB->getCalledFunction();
+      if (!Callee || Callee->isDeclaration())
+        continue;
+      StringRef CalleeName =
+          FunctionSamples::getCanonicalFnName(Callee->getName());
+      if (IRNewFunctions.count(CalleeName))
+        IRNewCallees.push_back(Callee);
+    }
+  }
+}
+
+// Use function similarity to determine if the function is renamed. Compute a
+// similarity ratio between two sequences which are  the function callsite
+// anchors. The returned value is in the range [0, 1]. The bigger the value is,
+// the more similar two sequences are.
+float SampleProfileMatcher::checkFunctionSimilarity(
+    const Function &IRFunc, const FunctionId &ProfFName) {
+  AnchorMap IRAnchors;
+  findIRAnchors(IRFunc, IRAnchors);
+
+  AnchorMap ProfileAnchors;
+  const auto *FSFlattened = getFlattenedSamplesFor(ProfFName);
+  assert(FSFlattened && "Flattened profile sample is null");
+  findProfileAnchors(*FSFlattened, ProfileAnchors);
+
+  AnchorList FilteredProfileAnchorList;
+  AnchorList FilteredIRAnchorsList;
+  getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
+                        FilteredProfileAnchorList);
+
+  // If the function is probe based, we trust the checksum info to check the
+  // similarity. Otherwise, if the checksum is mismatched, continue computing
+  // the similarity.
+  if (FunctionSamples::ProfileIsProbeBased) {
+    const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
+    // Make sure function is complex enough.
+    if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
+        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+      return 1.0;
+    }
+  }
+
+  if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
+    return 0.0;
+
+  // Use the diff algorithm to find the LCS between IR and profile.
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
+
+  return static_cast<float>(MatchedAnchors.size()) * 2 /
+         (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
+}
+
+bool SampleProfileMatcher::functionIsRenamedImpl(const Function &IRFunc,
+                                                 const FunctionId &ProfFunc) {
+  float Similarity = checkFunctionSimilarity(IRFunc, ProfFunc);
+  LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
+                    << "(IR) and " << ProfFunc << "(profile) is "
+                    << format("%.2f", Similarity) << "\n");
+  return Similarity * 100 > FuncRenamingSimilarityThreshold;
+}
+
+bool SampleProfileMatcher::functionIsRenamed(const Function &IRFunc,
+                                             const FunctionId &ProfFunc) {
+  auto R = RenameDecisionCache.find({&IRFunc, ProfFunc});
+  if (R != RenameDecisionCache.end())
+    return R->second;
+
+  bool V = functionIsRenamedImpl(IRFunc, ProfFunc);
+  RenameDecisionCache[{&IRFunc, ProfFunc}] = V;
+  return V;
+}
+
+// Run function renaming matching on the profiled CFG edge to limit the matching
+// scope.
+void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
+    const StringMap<Function *> &IRNewFunctions, FunctionSamples &CallerFS,
+    FunctionMap &OldProfToNewSymbolMap) {
+  auto FindIRFunction = [&](const FunctionId &FName) {
+    // Function can be null if name has conflict, use optional to store the
+    // function pointer.
+    std::optional<Function *> F;
+
+    auto R = SymbolMap->find(FName);
+    if (R != SymbolMap->end())
+      F = R->second;
+
+    auto NewR = OldProfToNewSymbolMap.find(FName);
+    if (NewR != OldProfToNewSymbolMap.end())
+      F = NewR->second;
+
+    return F;
+  };
+
+  // Find the new callees from IR in the current caller scope.
+  std::vector<Function *> IRNewCallees;
+  auto Caller = FindIRFunction(CallerFS.getFunction());
+  if (Caller.has_value() && *Caller) {
+    // No callees for external function, skip the rename matching.
+    if ((*Caller)->isDeclaration())
+      return;
+    findIRNewCallees(**Caller, IRNewFunctions, IRNewCallees);
+  }
+
+  // Run renaming matching on CFG edge(caller-callee).
+  for (auto &CM :
+       const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
+    auto &CalleeMap = CM.second;
+    // Local container used to update the CallsiteSampleMap.
+    std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
+    for (auto &CS : CalleeMap) {
+      auto &CalleeFS = CS.second;
+      auto ProfCallee = CalleeFS.getFunction();
+      auto ExistingIRCallee = FindIRFunction(ProfCallee);
+      // The profile callee is new, run renaming matching.
+      if (!ExistingIRCallee.has_value()) {
+        for (auto *IRCallee : IRNewCallees) {
+          if (functionIsRenamed(*IRCallee, ProfCallee)) {
+            FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
+            OldProfToNewSymbolMap[ProfCallee] = IRCallee;
+            // Update the profile in place so that the deeper level matching
+            // will find the IR function.
+            CalleeFS.setFunction(FunctionId(IRCallee->getName()));
+            LLVM_DEBUG(dbgs() << "Callee renaming is found in function "
+                              << CallerFS.getFunction()
+                              << ", changing profile name from " << ProfCallee
+                              << " to " << IRCallee->getName() << "\n");
+            break;
+          }
+        }
+      } else {
+        // Apply the existing renaming result.
+        auto R = OldProfToNewSymbolMap.find(CalleeFS.getFunction());
+        if (R != OldProfToNewSymbolMap.end()) {
+          FunctionId IRNewCallee(R->second->getName());
+          assert(IRNewCallee != ProfCallee &&
+                 "New callee symbol is not a new function");
+          FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
+          CalleeFS.setFunction(IRNewCallee);
+          LLVM_DEBUG(dbgs() << "Existing callee renaming is found in function "
+                            << CallerFS.getFunction()
+                            << ", changing profile name from " << ProfCallee
+                            << " to " << IRNewCallee << "\n");
+        }
+      }
+      // Note that even there is no renaming in the current scope, there could
+      // be renaming in deeper callee scope, we need to traverse all the callee
+      // profiles.
+      runFuncRenamingMatchingOnProfile(IRNewFunctions, CalleeFS,
----------------
WenleiHe wrote:

When we do CFG profile matching, we use flattened profile so profile can have more anchors and it's easier to match IR. In this case, I see that `checkFunctionSimilarity` also use flattened profile. Should we operate this call graph level matching completely on flattened profile so we avoid the recursion here for inlinees? There is no need to repeatedly match multiple instance of an inlinee (even though there is cache).  

https://github.com/llvm/llvm-project/pull/92151


More information about the llvm-commits mailing list