[llvm] [SampleFDO] Stale profile renaming matching (PR #92151)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 17 21:17:01 PDT 2024


================
@@ -590,13 +609,260 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   }
 }
 
-void SampleProfileMatcher::runOnModule() {
+// Find functions that don't show in the profile or profile symbol list, which
+// are supposed to be new functions. We use them as the targets for renaming
+// matching.
+void SampleProfileMatcher::findIRNewFunctions(
+    StringMap<Function *> &IRNewFunctions) {
+  // TODO: Support MD5 profile.
+  if (FunctionSamples::UseMD5)
+    return;
+  StringSet<> NamesInProfile;
+  if (auto NameTable = Reader.getNameTable()) {
+    for (auto Name : *NameTable)
+      NamesInProfile.insert(Name.stringRef());
+  }
+
+  for (auto &F : M) {
+    // Skip declarations, as even if the function can be recognized renamed, we
+    // have nothing to do with it.
+    if (F.isDeclaration())
+      continue;
+
+    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
+    const auto *FS = getFlattenedSamplesFor(F);
+    if (FS)
+      continue;
+
+    // For extended binary, the full function name symbols exits in the profile
+    // symbol list table.
+    if (NamesInProfile.count(CanonFName))
+      continue;
+
+    if (PSL && PSL->contains(CanonFName))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Function " << CanonFName
+                      << " is not in profile or symbol list table.\n");
+    IRNewFunctions[CanonFName] = &F;
+  }
+}
+
+void SampleProfileMatcher::findIRNewCallees(
+    Function &Caller, const StringMap<Function *> &IRNewFunctions,
+    std::vector<Function *> &IRNewCallees) {
+  for (auto &BB : Caller) {
+    for (auto &I : BB) {
+      const auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB || isa<IntrinsicInst>(&I))
+        continue;
+      Function *Callee = CB->getCalledFunction();
+      if (!Callee || Callee->isDeclaration())
+        continue;
+      StringRef CalleeName =
+          FunctionSamples::getCanonicalFnName(Callee->getName());
+      if (IRNewFunctions.count(CalleeName))
+        IRNewCallees.push_back(Callee);
+    }
+  }
+}
+
+// Use function similarity to determine if the function is renamed. Compute a
+// similarity ratio between two sequences which are  the function callsite
+// anchors. The returned value is in the range [0, 1]. The bigger the value is,
+// the more similar two sequences are.
+float SampleProfileMatcher::checkFunctionSimilarity(
+    const Function &IRFunc, const FunctionId &ProfFName) {
+  AnchorMap IRAnchors;
+  findIRAnchors(IRFunc, IRAnchors);
+
+  AnchorMap ProfileAnchors;
+  const auto *FSFlattened = getFlattenedSamplesFor(ProfFName);
+  assert(FSFlattened && "Flattened profile sample is null");
+  findProfileAnchors(*FSFlattened, ProfileAnchors);
+
+  AnchorList FilteredProfileAnchorList;
+  AnchorList FilteredIRAnchorsList;
+  getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
+                        FilteredProfileAnchorList);
+
+  // If the function is probe based, we trust the checksum info to check the
+  // similarity. Otherwise, if the checksum is mismatched, continue computing
+  // the similarity.
+  if (FunctionSamples::ProfileIsProbeBased) {
+    const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
+    // Make sure function is complex enough.
+    if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
+        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+      return 1.0;
+    }
+  }
+
+  if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
+    return 0.0;
+
+  // Use the diff algorithm to find the LCS between IR and profile.
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
+
+  return static_cast<float>(MatchedAnchors.size()) * 2 /
+         (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
+}
+
+bool SampleProfileMatcher::functionIsRenamedImpl(const Function &IRFunc,
+                                                 const FunctionId &ProfFunc) {
+  float Similarity = checkFunctionSimilarity(IRFunc, ProfFunc);
+  LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
+                    << "(IR) and " << ProfFunc << "(profile) is "
+                    << format("%.2f", Similarity) << "\n");
+  return Similarity * 100 > FuncRenamingSimilarityThreshold;
+}
+
+bool SampleProfileMatcher::functionIsRenamed(const Function &IRFunc,
+                                             const FunctionId &ProfFunc) {
+  auto R = RenameDecisionCache.find({&IRFunc, ProfFunc});
+  if (R != RenameDecisionCache.end())
+    return R->second;
+
+  bool V = functionIsRenamedImpl(IRFunc, ProfFunc);
+  RenameDecisionCache[{&IRFunc, ProfFunc}] = V;
+  return V;
+}
+
+// Run function renaming matching on the profiled CFG edge to limit the matching
+// scope.
+void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
+    const StringMap<Function *> &IRNewFunctions, FunctionSamples &CallerFS,
+    FunctionMap &OldProfToNewSymbolMap) {
+  auto FindIRFunction = [&](const FunctionId &FName) {
+    // Function can be null if name has conflict, use optional to store the
+    // function pointer.
+    std::optional<Function *> F;
+
+    auto R = SymbolMap->find(FName);
+    if (R != SymbolMap->end())
+      F = R->second;
+
+    auto NewR = OldProfToNewSymbolMap.find(FName);
+    if (NewR != OldProfToNewSymbolMap.end())
+      F = NewR->second;
+
+    return F;
+  };
+
+  // Find the new callees from IR in the current caller scope.
+  std::vector<Function *> IRNewCallees;
+  auto Caller = FindIRFunction(CallerFS.getFunction());
+  if (Caller.has_value() && *Caller) {
+    // No callees for external function, skip the rename matching.
+    if ((*Caller)->isDeclaration())
+      return;
+    findIRNewCallees(**Caller, IRNewFunctions, IRNewCallees);
+  }
+
+  // Run renaming matching on CFG edge(caller-callee).
+  for (auto &CM :
+       const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
+    auto &CalleeMap = CM.second;
+    // Local container used to update the CallsiteSampleMap.
+    std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
+    for (auto &CS : CalleeMap) {
+      auto &CalleeFS = CS.second;
+      auto ProfCallee = CalleeFS.getFunction();
+      auto ExistingIRCallee = FindIRFunction(ProfCallee);
+      // The profile callee is new, run renaming matching.
+      if (!ExistingIRCallee.has_value()) {
+        for (auto *IRCallee : IRNewCallees) {
+          if (functionIsRenamed(*IRCallee, ProfCallee)) {
+            FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
+            OldProfToNewSymbolMap[ProfCallee] = IRCallee;
+            // Update the profile in place so that the deeper level matching
+            // will find the IR function.
+            CalleeFS.setFunction(FunctionId(IRCallee->getName()));
+            LLVM_DEBUG(dbgs() << "Callee renaming is found in function "
+                              << CallerFS.getFunction()
+                              << ", changing profile name from " << ProfCallee
+                              << " to " << IRCallee->getName() << "\n");
+            break;
+          }
+        }
+      } else {
+        // Apply the existing renaming result.
+        auto R = OldProfToNewSymbolMap.find(CalleeFS.getFunction());
+        if (R != OldProfToNewSymbolMap.end()) {
+          FunctionId IRNewCallee(R->second->getName());
+          assert(IRNewCallee != ProfCallee &&
+                 "New callee symbol is not a new function");
+          FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
+          CalleeFS.setFunction(IRNewCallee);
+          LLVM_DEBUG(dbgs() << "Existing callee renaming is found in function "
+                            << CallerFS.getFunction()
+                            << ", changing profile name from " << ProfCallee
+                            << " to " << IRNewCallee << "\n");
+        }
+      }
+      // Note that even there is no renaming in the current scope, there could
+      // be renaming in deeper callee scope, we need to traverse all the callee
+      // profiles.
+      runFuncRenamingMatchingOnProfile(IRNewFunctions, CalleeFS,
+                                       OldProfToNewSymbolMap);
+    }
+
+    // Update the CalleeMap using the new name and remove the old entry.
+    for (auto &P : FSamplesToUpdate) {
+      assert((P.first != P.second->getFunction()) &&
+             "Renamed function name should be different from the old map key");
+      CalleeMap[P.second->getFunction()] = *P.second;
+      CalleeMap.erase(P.first);
+    }
+  }
+}
+
+void SampleProfileMatcher::runFuncLevelMatching() {
+  if (!SalvageFunctionRenaming)
+    return;
+  assert(SymbolMap && "SymbolMap points to null");
----------------
WenleiHe wrote:

nit: SymbolMap points to null -> SymbolMap is null

https://github.com/llvm/llvm-project/pull/92151


More information about the llvm-commits mailing list