[llvm] [SampleFDO] Stale profile call-graph matching (PR #95135)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 16:21:47 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/95135

>From 622d78f05fcb7e583a149f4c528696fc1b5f6ab7 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 3 May 2024 21:03:30 -0700
Subject: [PATCH 01/22] [SampleFDO] Stale profile renaming matching

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  54 +++-
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |   6 +-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 294 ++++++++++++++++-
 .../pseudo-probe-stale-profile-renaming.prof  |  62 ++++
 .../pseudo-probe-stale-profile-renaming.ll    | 297 ++++++++++++++++++
 5 files changed, 687 insertions(+), 26 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index b6feca5d47035..f2feb9ba8832d 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -15,12 +15,14 @@
 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
 
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
 
 namespace llvm {
 
 using AnchorList = std::vector<std::pair<LineLocation, FunctionId>>;
 using AnchorMap = std::map<LineLocation, FunctionId>;
+using FunctionMap = HashKeyMap<std::unordered_map, FunctionId, Function *>;
 
 // Sample profile matching - fuzzy match.
 class SampleProfileMatcher {
@@ -58,6 +60,20 @@ class SampleProfileMatcher {
   StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
       FuncCallsiteMatchStates;
 
+  struct RenameDecisionCacheHash {
+    uint64_t
+    operator()(const std::pair<const Function *, FunctionId> &P) const {
+      return hash_combine(P.first, P.second);
+    }
+  };
+  std::unordered_map<std::pair<const Function *, FunctionId>, bool,
+                     RenameDecisionCacheHash>
+      RenameDecisionCache;
+
+  FunctionMap *SymbolMap;
+
+  std::shared_ptr<ProfileSymbolList> PSL;
+
   // Profile mismatch statstics:
   uint64_t TotalProfiledFunc = 0;
   // Num of checksum-mismatched function.
@@ -80,9 +96,11 @@ class SampleProfileMatcher {
 public:
   SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
                        const PseudoProbeManager *ProbeManager,
-                       ThinOrFullLTOPhase LTOPhase)
-      : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
-  void runOnModule();
+                       ThinOrFullLTOPhase LTOPhase,
+                       std::shared_ptr<ProfileSymbolList> PSL)
+      : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase),
+        PSL(PSL) {};
+  void runOnModule(FunctionMap &SymbolMap);
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
     // will be used for sample loader.
@@ -90,16 +108,20 @@ class SampleProfileMatcher {
   }
 
 private:
-  FunctionSamples *getFlattenedSamplesFor(const Function &F) {
-    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
-    auto It = FlattenedProfiles.find(FunctionId(CanonFName));
+  FunctionSamples *getFlattenedSamplesFor(const FunctionId &Fname) {
+    auto It = FlattenedProfiles.find(Fname);
     if (It != FlattenedProfiles.end())
       return &It->second;
     return nullptr;
   }
-  void runOnFunction(Function &F);
-  void findIRAnchors(const Function &F, AnchorMap &IRAnchors);
-  void findProfileAnchors(const FunctionSamples &FS, AnchorMap &ProfileAnchors);
+  FunctionSamples *getFlattenedSamplesFor(const Function &F) {
+    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
+    return getFlattenedSamplesFor(FunctionId(CanonFName));
+  }
+  void runBlockLevelMatching(Function &F);
+  void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
+  void findProfileAnchors(const FunctionSamples &FS,
+                          AnchorMap &ProfileAnchors) const;
   // Record the callsite match states for profile staleness report, the result
   // is saved in FuncCallsiteMatchStates.
   void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors,
@@ -160,6 +182,20 @@ class SampleProfileMatcher {
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap);
+  void findIRNewCallees(Function &Caller,
+                        const StringMap<Function *> &IRNewFunctions,
+                        std::vector<Function *> &IRNewCallees);
+  float checkFunctionSimilarity(const Function &IRFunc,
+                                const FunctionId &ProfFunc);
+  bool functionIsRenamedImpl(const Function &IRFunc,
+                             const FunctionId &ProfFunc);
+  bool functionIsRenamed(const Function &IRFunc, const FunctionId &ProfFunc);
+  void
+  runFuncRenamingMatchingOnProfile(const StringMap<Function *> &IRNewFunctions,
+                                   FunctionSamples &FS,
+                                   FunctionMap &OldProfToNewSymbolMap);
+  void findIRNewFunctions(StringMap<Function *> &IRNewFunctions);
+  void runFuncLevelMatching();
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 92ad4c34da6e7..6195ae049c75e 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -544,7 +544,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
 
   /// Profle Symbol list tells whether a function name appears in the binary
   /// used to generate the current profile.
-  std::unique_ptr<ProfileSymbolList> PSL;
+  std::shared_ptr<ProfileSymbolList> PSL;
 
   /// Total number of samples collected in this profile.
   ///
@@ -2077,7 +2077,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
     MatchingManager = std::make_unique<SampleProfileMatcher>(
-        M, *Reader, ProbeManager.get(), LTOPhase);
+        M, *Reader, ProbeManager.get(), LTOPhase, PSL);
   }
 
   return true;
@@ -2198,7 +2198,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
 
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
-    MatchingManager->runOnModule();
+    MatchingManager->runOnModule(SymbolMap);
     MatchingManager->clearMatchingData();
   }
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index d7613bce4c52e..2b07856252ecd 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -20,12 +20,22 @@ using namespace sampleprof;
 
 #define DEBUG_TYPE "sample-profile-matcher"
 
+static cl::opt<bool> SalvageFunctionRenaming(
+    "salvage-function-renaming", cl::Hidden, cl::init(false),
+    cl::desc("Salvage stale profile by function renaming matching."));
+
+static cl::opt<unsigned> FuncRenamingSimilarityThreshold(
+    "func-renaming-similarity-threshold", cl::Hidden, cl::init(80),
+    cl::desc(
+        "The profile function is considered being renamed if the similarity "
+        "against IR is above the given number(percentage value)."));
+
 extern cl::opt<bool> SalvageStaleProfile;
 extern cl::opt<bool> PersistProfileStaleness;
 extern cl::opt<bool> ReportProfileStaleness;
 
 void SampleProfileMatcher::findIRAnchors(const Function &F,
-                                         AnchorMap &IRAnchors) {
+                                         AnchorMap &IRAnchors) const {
   // For inlined code, recover the original callsite and callee by finding the
   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
@@ -95,7 +105,7 @@ void SampleProfileMatcher::findIRAnchors(const Function &F,
 }
 
 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
-                                              AnchorMap &ProfileAnchors) {
+                                              AnchorMap &ProfileAnchors) const {
   auto isInvalidLineOffset = [](uint32_t LineOffset) {
     return LineOffset & 0x8000;
   };
@@ -260,6 +270,22 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
   }
 }
 
+// Filter the non-call locations from IRAnchors and ProfileAnchors and write
+// them into a list for random access later.
+static void getFilteredAnchorList(const AnchorMap &IRAnchors,
+                                  const AnchorMap &ProfileAnchors,
+                                  AnchorList &FilteredIRAnchorsList,
+                                  AnchorList &FilteredProfileAnchorList) {
+  for (const auto &I : IRAnchors) {
+    if (I.second.stringRef().empty())
+      continue;
+    FilteredIRAnchorsList.emplace_back(I);
+  }
+
+  for (const auto &I : ProfileAnchors)
+    FilteredProfileAnchorList.emplace_back(I);
+}
+
 // Call target name anchor based profile fuzzy matching.
 // Input:
 // For IR locations, the anchor is the callee name of direct callsite; For
@@ -286,16 +312,9 @@ void SampleProfileMatcher::runStaleProfileMatching(
          "Run stale profile matching only once per function");
 
   AnchorList FilteredProfileAnchorList;
-  for (const auto &I : ProfileAnchors)
-    FilteredProfileAnchorList.emplace_back(I);
-
   AnchorList FilteredIRAnchorsList;
-  // Filter the non-callsite from IRAnchors.
-  for (const auto &I : IRAnchors) {
-    if (I.second.stringRef().empty())
-      continue;
-    FilteredIRAnchorsList.emplace_back(I);
-  }
+  getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
+                        FilteredProfileAnchorList);
 
   if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
     return;
@@ -311,7 +330,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
   matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
 }
 
-void SampleProfileMatcher::runOnFunction(Function &F) {
+void SampleProfileMatcher::runBlockLevelMatching(Function &F) {
   // We need to use flattened function samples for matching.
   // Unlike IR, which includes all callsites from the source code, the callsites
   // in profile only show up when they are hit by samples, i,e. the profile
@@ -590,13 +609,260 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   }
 }
 
-void SampleProfileMatcher::runOnModule() {
+// Find functions that don't show in the profile or profile symbol list, which
+// are supposed to be new functions. We use them as the targets for renaming
+// matching.
+void SampleProfileMatcher::findIRNewFunctions(
+    StringMap<Function *> &IRNewFunctions) {
+  // TODO: Support MD5 profile.
+  if (FunctionSamples::UseMD5)
+    return;
+  StringSet<> NamesInProfile;
+  if (auto NameTable = Reader.getNameTable()) {
+    for (auto Name : *NameTable)
+      NamesInProfile.insert(Name.stringRef());
+  }
+
+  for (auto &F : M) {
+    // Skip declarations, as even if the function can be recognized renamed, we
+    // have nothing to do with it.
+    if (F.isDeclaration())
+      continue;
+
+    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
+    const auto *FS = getFlattenedSamplesFor(F);
+    if (FS)
+      continue;
+
+    // For extended binary, the full function name symbols exits in the profile
+    // symbol list table.
+    if (NamesInProfile.count(CanonFName))
+      continue;
+
+    if (PSL && PSL->contains(CanonFName))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Function " << CanonFName
+                      << " is not in profile or symbol list table.\n");
+    IRNewFunctions[CanonFName] = &F;
+  }
+}
+
+void SampleProfileMatcher::findIRNewCallees(
+    Function &Caller, const StringMap<Function *> &IRNewFunctions,
+    std::vector<Function *> &IRNewCallees) {
+  for (auto &BB : Caller) {
+    for (auto &I : BB) {
+      const auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB || isa<IntrinsicInst>(&I))
+        continue;
+      Function *Callee = CB->getCalledFunction();
+      if (!Callee || Callee->isDeclaration())
+        continue;
+      StringRef CalleeName =
+          FunctionSamples::getCanonicalFnName(Callee->getName());
+      if (IRNewFunctions.count(CalleeName))
+        IRNewCallees.push_back(Callee);
+    }
+  }
+}
+
+// Use function similarity to determine if the function is renamed. Compute a
+// similarity ratio between two sequences which are  the function callsite
+// anchors. The returned value is in the range [0, 1]. The bigger the value is,
+// the more similar two sequences are.
+float SampleProfileMatcher::checkFunctionSimilarity(
+    const Function &IRFunc, const FunctionId &ProfFName) {
+  AnchorMap IRAnchors;
+  findIRAnchors(IRFunc, IRAnchors);
+
+  AnchorMap ProfileAnchors;
+  const auto *FSFlattened = getFlattenedSamplesFor(ProfFName);
+  assert(FSFlattened && "Flattened profile sample is null");
+  findProfileAnchors(*FSFlattened, ProfileAnchors);
+
+  AnchorList FilteredProfileAnchorList;
+  AnchorList FilteredIRAnchorsList;
+  getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
+                        FilteredProfileAnchorList);
+
+  // If the function is probe based, we trust the checksum info to check the
+  // similarity. Otherwise, if the checksum is mismatched, continue computing
+  // the similarity.
+  if (FunctionSamples::ProfileIsProbeBased) {
+    const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
+    // Make sure function is complex enough.
+    if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
+        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+      return 1.0;
+    }
+  }
+
+  if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
+    return 0.0;
+
+  // Use the diff algorithm to find the LCS between IR and profile.
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
+
+  return static_cast<float>(MatchedAnchors.size()) * 2 /
+         (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
+}
+
+bool SampleProfileMatcher::functionIsRenamedImpl(const Function &IRFunc,
+                                                 const FunctionId &ProfFunc) {
+  float Similarity = checkFunctionSimilarity(IRFunc, ProfFunc);
+  LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
+                    << "(IR) and " << ProfFunc << "(profile) is "
+                    << format("%.2f", Similarity) << "\n");
+  return Similarity * 100 > FuncRenamingSimilarityThreshold;
+}
+
+bool SampleProfileMatcher::functionIsRenamed(const Function &IRFunc,
+                                             const FunctionId &ProfFunc) {
+  auto R = RenameDecisionCache.find({&IRFunc, ProfFunc});
+  if (R != RenameDecisionCache.end())
+    return R->second;
+
+  bool V = functionIsRenamedImpl(IRFunc, ProfFunc);
+  RenameDecisionCache[{&IRFunc, ProfFunc}] = V;
+  return V;
+}
+
+// Run function renaming matching on the profiled CFG edge to limit the matching
+// scope.
+void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
+    const StringMap<Function *> &IRNewFunctions, FunctionSamples &CallerFS,
+    FunctionMap &OldProfToNewSymbolMap) {
+  auto FindIRFunction = [&](const FunctionId &FName) {
+    // Function can be null if name has conflict, use optional to store the
+    // function pointer.
+    std::optional<Function *> F;
+
+    auto R = SymbolMap->find(FName);
+    if (R != SymbolMap->end())
+      F = R->second;
+
+    auto NewR = OldProfToNewSymbolMap.find(FName);
+    if (NewR != OldProfToNewSymbolMap.end())
+      F = NewR->second;
+
+    return F;
+  };
+
+  // Find the new callees from IR in the current caller scope.
+  std::vector<Function *> IRNewCallees;
+  auto Caller = FindIRFunction(CallerFS.getFunction());
+  if (Caller.has_value() && *Caller) {
+    // No callees for external function, skip the rename matching.
+    if ((*Caller)->isDeclaration())
+      return;
+    findIRNewCallees(**Caller, IRNewFunctions, IRNewCallees);
+  }
+
+  // Run renaming matching on CFG edge(caller-callee).
+  for (auto &CM :
+       const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
+    auto &CalleeMap = CM.second;
+    // Local container used to update the CallsiteSampleMap.
+    std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
+    for (auto &CS : CalleeMap) {
+      auto &CalleeFS = CS.second;
+      auto ProfCallee = CalleeFS.getFunction();
+      auto ExistingIRCallee = FindIRFunction(ProfCallee);
+      // The profile callee is new, run renaming matching.
+      if (!ExistingIRCallee.has_value()) {
+        for (auto *IRCallee : IRNewCallees) {
+          if (functionIsRenamed(*IRCallee, ProfCallee)) {
+            FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
+            OldProfToNewSymbolMap[ProfCallee] = IRCallee;
+            // Update the profile in place so that the deeper level matching
+            // will find the IR function.
+            CalleeFS.setFunction(FunctionId(IRCallee->getName()));
+            LLVM_DEBUG(dbgs() << "Callee renaming is found in function "
+                              << CallerFS.getFunction()
+                              << ", changing profile name from " << ProfCallee
+                              << " to " << IRCallee->getName() << "\n");
+            break;
+          }
+        }
+      } else {
+        // Apply the existing renaming result.
+        auto R = OldProfToNewSymbolMap.find(CalleeFS.getFunction());
+        if (R != OldProfToNewSymbolMap.end()) {
+          FunctionId IRNewCallee(R->second->getName());
+          assert(IRNewCallee != ProfCallee &&
+                 "New callee symbol is not a new function");
+          FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
+          CalleeFS.setFunction(IRNewCallee);
+          LLVM_DEBUG(dbgs() << "Existing callee renaming is found in function "
+                            << CallerFS.getFunction()
+                            << ", changing profile name from " << ProfCallee
+                            << " to " << IRNewCallee << "\n");
+        }
+      }
+      // Note that even there is no renaming in the current scope, there could
+      // be renaming in deeper callee scope, we need to traverse all the callee
+      // profiles.
+      runFuncRenamingMatchingOnProfile(IRNewFunctions, CalleeFS,
+                                       OldProfToNewSymbolMap);
+    }
+
+    // Update the CalleeMap using the new name and remove the old entry.
+    for (auto &P : FSamplesToUpdate) {
+      assert((P.first != P.second->getFunction()) &&
+             "Renamed function name should be different from the old map key");
+      CalleeMap[P.second->getFunction()] = *P.second;
+      CalleeMap.erase(P.first);
+    }
+  }
+}
+
+void SampleProfileMatcher::runFuncLevelMatching() {
+  if (!SalvageFunctionRenaming)
+    return;
+  assert(SymbolMap && "SymbolMap points to null");
+
+  StringMap<Function *> IRNewFunctions;
+  findIRNewFunctions(IRNewFunctions);
+  if (IRNewFunctions.empty())
+    return;
+
+  // The new functions found by the renaming matching. Save them into a map
+  // whose key is the old(profile) function name and value is the new(renamed)
+  // function.
+  FunctionMap OldProfToNewSymbolMap;
+  for (auto &I : Reader.getProfiles())
+    runFuncRenamingMatchingOnProfile(IRNewFunctions, I.second,
+                                     OldProfToNewSymbolMap);
+
+  // Update all the data generated by the old profile.
+  if (!OldProfToNewSymbolMap.empty()) {
+    // Add the new function to the SymbolMap, which will be used in
+    // SampleLoader.
+    for (auto &I : OldProfToNewSymbolMap) {
+      assert(I.second && "New function is null");
+      SymbolMap->emplace(FunctionId(I.second->getName()), I.second);
+    }
+
+    // Re-flatten the profiles after the renaming.
+    FlattenedProfiles.clear();
+    ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+                                     FunctionSamples::ProfileIsCS);
+  }
+  RenameDecisionCache.clear();
+}
+
+void SampleProfileMatcher::runOnModule(FunctionMap &SymMap) {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
+  SymbolMap = &SymMap;
+  runFuncLevelMatching();
+
   for (auto &F : M) {
     if (skipProfileForFunction(F))
       continue;
-    runOnFunction(F);
+    runBlockLevelMatching(F);
   }
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
new file mode 100644
index 0000000000000..1e23ef26d1b15
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
@@ -0,0 +1,62 @@
+main:47:0
+ 1: 0
+ 2: 2
+ 3: 0
+ 4: 3
+ 7: 2 test_noninline:2
+ 8: 2
+ 9: 0
+ 5: foo:24
+  1: 3
+  2: 3 bar:3
+  4: 3 bar:3
+  5: 1 mismatch:1
+  3: baz:15
+   1: 3
+   2: block_only:12
+    1: 3
+    3: 3
+    5: 3
+    10: 3
+    !CFGChecksum: 206551239323
+   !CFGChecksum: 281479271677951
+  !CFGChecksum: 123456
+ 6: baz:14
+  1: 3
+  2: block_only:11
+   1: 3
+   3: 3
+   5: 3
+   10: 2
+   !CFGChecksum: 206551239323
+  !CFGChecksum: 281479271677951
+ 10: cold_func:0
+  1: 0
+  2: block_only:0
+   1: 0
+   3: 0
+   5: 0
+   10: 0
+   !CFGChecksum: 206551239323
+  !CFGChecksum: 281479271677951
+ !CFGChecksum: 1126003093360596
+test_noninline:22:2
+ 1: 2
+ 2: foo:20
+  1: 2
+  2: 2 bar:3
+  4: 3 bar:3
+  3: baz:13
+   1: 2
+   2: block_only:11
+    1: 2
+    3: 3
+    5: 3
+    10: 3
+    !CFGChecksum: 206551239323
+   !CFGChecksum: 281479271677951
+  !CFGChecksum: 123456
+ !CFGChecksum: 281479271677951
+bar:12:12
+ 1: 12
+ !CFGChecksum: 4294967295
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
new file mode 100644
index 0000000000000..36312c3c49451
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -0,0 +1,297 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-function-renaming -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
+
+
+; CHECK: Function new_block_only is not in profile or symbol list table.
+; CHECK: Function new_foo is not in profile or symbol list table.
+
+; CHECK: The similarity between new_foo(IR) and foo(profile) is 0.86
+; CHECK: Callee renaming is found in function main, changing profile name from foo to new_foo
+; CHECK: The similarity between new_block_only(IR) and block_only(profile) is 1.00
+; CHECK: Callee renaming is found in function baz, changing profile name from block_only to new_block_only
+; CHECK: Existing callee renaming is found in function baz, changing profile name from block_only to new_block_only
+; CHECK: Existing callee renaming is found in function cold_func, changing profile name from block_only to new_block_only
+; CHECK: Existing callee renaming is found in function test_noninline, changing profile name from foo to new_foo
+; CHECK: Existing callee renaming is found in function baz, changing profile name from block_only to new_block_only
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #0 !dbg !22 {
+entry:
+    #dbg_value(i32 %x, !26, !DIExpression(), !27)
+  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !28
+  %add = add nsw i32 %x, 1, !dbg !29
+  ret i32 %add, !dbg !30
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local void @new_block_only() #2 !dbg !31 {
+entry:
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 1, i32 0, i64 -1), !dbg !34
+  %0 = load volatile i32, ptr @x, align 4, !dbg !34, !tbaa !36
+  %cmp = icmp eq i32 %0, 9999, !dbg !40
+  br i1 %cmp, label %if.then, label %if.else, !dbg !41
+
+if.then:                                          ; preds = %entry
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 2, i32 0, i64 -1), !dbg !42
+  %1 = load volatile i32, ptr @x, align 4, !dbg !42, !tbaa !36
+  %add = add nsw i32 %1, 1000, !dbg !42
+  store volatile i32 %add, ptr @x, align 4, !dbg !42, !tbaa !36
+  br label %if.end10, !dbg !43
+
+if.else:                                          ; preds = %entry
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 3, i32 0, i64 -1), !dbg !44
+  %2 = load volatile i32, ptr @x, align 4, !dbg !44, !tbaa !36
+  %cmp1 = icmp eq i32 %2, 999, !dbg !46
+  br i1 %cmp1, label %if.then2, label %if.else4, !dbg !47
+
+if.then2:                                         ; preds = %if.else
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 4, i32 0, i64 -1), !dbg !48
+  %3 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !36
+  %add3 = add nsw i32 %3, 100, !dbg !48
+  store volatile i32 %add3, ptr @x, align 4, !dbg !48, !tbaa !36
+  br label %if.end10, !dbg !49
+
+if.else4:                                         ; preds = %if.else
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 5, i32 0, i64 -1), !dbg !50
+  %4 = load volatile i32, ptr @x, align 4, !dbg !50, !tbaa !36
+  %cmp5 = icmp eq i32 %4, 99, !dbg !52
+  br i1 %cmp5, label %if.then6, label %if.else8, !dbg !53
+
+if.then6:                                         ; preds = %if.else4
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 6, i32 0, i64 -1), !dbg !54
+  %5 = load volatile i32, ptr @x, align 4, !dbg !54, !tbaa !36
+  %add7 = add nsw i32 %5, 10, !dbg !54
+  store volatile i32 %add7, ptr @x, align 4, !dbg !54, !tbaa !36
+  br label %if.end10, !dbg !55
+
+if.else8:                                         ; preds = %if.else4
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 7, i32 0, i64 -1), !dbg !56
+  %6 = load volatile i32, ptr @x, align 4, !dbg !56, !tbaa !36
+  %inc = add nsw i32 %6, 1, !dbg !56
+  store volatile i32 %inc, ptr @x, align 4, !dbg !56, !tbaa !36
+  br label %if.end10
+
+if.end10:                                         ; preds = %if.then2, %if.else8, %if.then6, %if.then
+  call void @llvm.pseudoprobe(i64 2964250471062803127, i64 10, i32 0, i64 -1), !dbg !57
+  ret void, !dbg !57
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @baz() #2 !dbg !58 {
+entry:
+  call void @llvm.pseudoprobe(i64 7546896869197086323, i64 1, i32 0, i64 -1), !dbg !59
+  call void @new_block_only(), !dbg !60
+  ret void, !dbg !62
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @new_foo() #2 !dbg !63 {
+entry:
+  call void @llvm.pseudoprobe(i64 5381804724291869009, i64 1, i32 0, i64 -1), !dbg !64
+  %0 = load volatile i32, ptr @x, align 4, !dbg !64, !tbaa !36
+  %call = call i32 @bar(i32 noundef %0), !dbg !65
+  %1 = load volatile i32, ptr @x, align 4, !dbg !67, !tbaa !36
+  %add = add nsw i32 %1, %call, !dbg !67
+  store volatile i32 %add, ptr @x, align 4, !dbg !67, !tbaa !36
+  call void @baz(), !dbg !68
+  %2 = load volatile i32, ptr @x, align 4, !dbg !70, !tbaa !36
+  %call1 = call i32 @bar(i32 noundef %2), !dbg !71
+  %3 = load volatile i32, ptr @x, align 4, !dbg !73, !tbaa !36
+  %add2 = add nsw i32 %3, %call1, !dbg !73
+  store volatile i32 %add2, ptr @x, align 4, !dbg !73, !tbaa !36
+  ret void, !dbg !74
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @test_noninline() #0 !dbg !75 {
+entry:
+  call void @llvm.pseudoprobe(i64 -5610330892148506720, i64 1, i32 0, i64 -1), !dbg !76
+  call void @new_foo(), !dbg !77
+  ret void, !dbg !79
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @cold_func() #2 !dbg !80 {
+entry:
+  call void @llvm.pseudoprobe(i64 2711072140522378707, i64 1, i32 0, i64 -1), !dbg !81
+  call void @new_block_only(), !dbg !82
+  ret void, !dbg !84
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #2 !dbg !85 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !91
+    #dbg_value(i32 0, !89, !DIExpression(), !92)
+  br label %for.cond, !dbg !93
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !94
+    #dbg_value(i32 %i.0, !89, !DIExpression(), !92)
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !95
+  %cmp = icmp slt i32 %i.0, 1000000, !dbg !97
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !98
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !99
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !100
+  call void @cold_func(), !dbg !101
+  ret i32 0, !dbg !103
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !104
+  call void @new_foo(), !dbg !106
+  call void @baz(), !dbg !108
+  call void @test_noninline(), !dbg !110
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !112
+  %inc = add nsw i32 %i.0, 1, !dbg !112
+    #dbg_value(i32 %inc, !89, !DIExpression(), !92)
+  br label %for.cond, !dbg !113, !llvm.loop !114
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4
+
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17, !18, !19, !20, !21}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 19.0.0git (https://github.com/llvm/llvm-project.git 2e1509152224d8ffbeac84c489920dcbaeefc2b2)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/toytest/rename", checksumkind: CSK_MD5, checksum: "b07f600b3cdefd40bd44932bc13c33f5")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 19.0.0git (https://github.com/llvm/llvm-project.git 2e1509152224d8ffbeac84c489920dcbaeefc2b2)"}
+!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!16 = !{i64 2964250471062803127, i64 206551239323, !"new_block_only"}
+!17 = !{i64 7546896869197086323, i64 281479271677951, !"baz"}
+!18 = !{i64 5381804724291869009, i64 844429225099263, !"new_foo"}
+!19 = !{i64 -5610330892148506720, i64 281479271677951, !"test_noninline"}
+!20 = !{i64 2711072140522378707, i64 281479271677951, !"cold_func"}
+!21 = !{i64 -2624081020897602054, i64 1126003093360596, !"main"}
+!22 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !23, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !25)
+!23 = !DISubroutineType(types: !24)
+!24 = !{!6, !6}
+!25 = !{!26}
+!26 = !DILocalVariable(name: "x", arg: 1, scope: !22, file: !3, line: 3, type: !6)
+!27 = !DILocation(line: 0, scope: !22)
+!28 = !DILocation(line: 4, column: 10, scope: !22)
+!29 = !DILocation(line: 4, column: 12, scope: !22)
+!30 = !DILocation(line: 4, column: 3, scope: !22)
+!31 = distinct !DISubprogram(name: "new_block_only", scope: !3, file: !3, line: 7, type: !32, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!32 = !DISubroutineType(types: !33)
+!33 = !{null}
+!34 = !DILocation(line: 8, column: 6, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !31, file: !3, line: 8, column: 6)
+!36 = !{!37, !37, i64 0}
+!37 = !{!"int", !38, i64 0}
+!38 = !{!"omnipotent char", !39, i64 0}
+!39 = !{!"Simple C/C++ TBAA"}
+!40 = !DILocation(line: 8, column: 8, scope: !35)
+!41 = !DILocation(line: 8, column: 6, scope: !31)
+!42 = !DILocation(line: 9, column: 7, scope: !35)
+!43 = !DILocation(line: 9, column: 5, scope: !35)
+!44 = !DILocation(line: 10, column: 12, scope: !45)
+!45 = distinct !DILexicalBlock(scope: !35, file: !3, line: 10, column: 12)
+!46 = !DILocation(line: 10, column: 14, scope: !45)
+!47 = !DILocation(line: 10, column: 12, scope: !35)
+!48 = !DILocation(line: 11, column: 7, scope: !45)
+!49 = !DILocation(line: 11, column: 5, scope: !45)
+!50 = !DILocation(line: 12, column: 12, scope: !51)
+!51 = distinct !DILexicalBlock(scope: !45, file: !3, line: 12, column: 12)
+!52 = !DILocation(line: 12, column: 14, scope: !51)
+!53 = !DILocation(line: 12, column: 12, scope: !45)
+!54 = !DILocation(line: 13, column: 7, scope: !51)
+!55 = !DILocation(line: 13, column: 5, scope: !51)
+!56 = !DILocation(line: 15, column: 6, scope: !51)
+!57 = !DILocation(line: 16, column: 1, scope: !31)
+!58 = distinct !DISubprogram(name: "baz", scope: !3, file: !3, line: 18, type: !32, scopeLine: 18, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!59 = !DILocation(line: 19, column: 3, scope: !58)
+!60 = !DILocation(line: 19, column: 3, scope: !61)
+!61 = !DILexicalBlockFile(scope: !58, file: !3, discriminator: 186646551)
+!62 = !DILocation(line: 20, column: 1, scope: !58)
+!63 = distinct !DISubprogram(name: "new_foo", scope: !3, file: !3, line: 22, type: !32, scopeLine: 22, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!64 = !DILocation(line: 23, column: 12, scope: !63)
+!65 = !DILocation(line: 23, column: 8, scope: !66)
+!66 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 186646551)
+!67 = !DILocation(line: 23, column: 5, scope: !63)
+!68 = !DILocation(line: 24, column: 3, scope: !69)
+!69 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 186646559)
+!70 = !DILocation(line: 25, column: 12, scope: !63)
+!71 = !DILocation(line: 25, column: 8, scope: !72)
+!72 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 186646567)
+!73 = !DILocation(line: 25, column: 5, scope: !63)
+!74 = !DILocation(line: 26, column: 1, scope: !63)
+!75 = distinct !DISubprogram(name: "test_noninline", scope: !3, file: !3, line: 28, type: !32, scopeLine: 28, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!76 = !DILocation(line: 29, column: 3, scope: !75)
+!77 = !DILocation(line: 29, column: 3, scope: !78)
+!78 = !DILexicalBlockFile(scope: !75, file: !3, discriminator: 186646551)
+!79 = !DILocation(line: 30, column: 1, scope: !75)
+!80 = distinct !DISubprogram(name: "cold_func", scope: !3, file: !3, line: 32, type: !32, scopeLine: 32, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!81 = !DILocation(line: 32, column: 20, scope: !80)
+!82 = !DILocation(line: 32, column: 20, scope: !83)
+!83 = !DILexicalBlockFile(scope: !80, file: !3, discriminator: 186646551)
+!84 = !DILocation(line: 32, column: 37, scope: !80)
+!85 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 34, type: !86, scopeLine: 34, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !88)
+!86 = !DISubroutineType(types: !87)
+!87 = !{!6}
+!88 = !{!89}
+!89 = !DILocalVariable(name: "i", scope: !90, file: !3, line: 35, type: !6)
+!90 = distinct !DILexicalBlock(scope: !85, file: !3, line: 35, column: 3)
+!91 = !DILocation(line: 35, column: 12, scope: !90)
+!92 = !DILocation(line: 0, scope: !90)
+!93 = !DILocation(line: 35, column: 8, scope: !90)
+!94 = !DILocation(line: 35, scope: !90)
+!95 = !DILocation(line: 35, column: 19, scope: !96)
+!96 = distinct !DILexicalBlock(scope: !90, file: !3, line: 35, column: 3)
+!97 = !DILocation(line: 35, column: 21, scope: !96)
+!98 = !DILocation(line: 35, column: 3, scope: !90)
+!99 = !DILocation(line: 0, scope: !85)
+!100 = !DILocation(line: 40, column: 3, scope: !85)
+!101 = !DILocation(line: 40, column: 3, scope: !102)
+!102 = !DILexicalBlockFile(scope: !85, file: !3, discriminator: 186646615)
+!103 = !DILocation(line: 41, column: 1, scope: !85)
+!104 = !DILocation(line: 36, column: 7, scope: !105)
+!105 = distinct !DILexicalBlock(scope: !96, file: !3, line: 35, column: 41)
+!106 = !DILocation(line: 36, column: 7, scope: !107)
+!107 = !DILexicalBlockFile(scope: !105, file: !3, discriminator: 186646575)
+!108 = !DILocation(line: 37, column: 7, scope: !109)
+!109 = !DILexicalBlockFile(scope: !105, file: !3, discriminator: 186646583)
+!110 = !DILocation(line: 38, column: 7, scope: !111)
+!111 = !DILexicalBlockFile(scope: !105, file: !3, discriminator: 186646591)
+!112 = !DILocation(line: 35, column: 37, scope: !96)
+!113 = !DILocation(line: 35, column: 3, scope: !96)
+!114 = distinct !{!114, !98, !115, !116}
+!115 = !DILocation(line: 39, column: 3, scope: !90)
+!116 = !{!"llvm.loop.mustprogress"}

>From c7ada8b627b3d4230384b827f9b7f6898a7918ba Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 20 May 2024 10:49:36 -0700
Subject: [PATCH 02/22] addressing comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  33 ++--
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 187 ++++++++++--------
 .../pseudo-probe-stale-profile-renaming.ll    |   2 +-
 3 files changed, 118 insertions(+), 104 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index f2feb9ba8832d..aee1aaa3c4817 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -60,15 +60,15 @@ class SampleProfileMatcher {
   StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
       FuncCallsiteMatchStates;
 
-  struct RenameDecisionCacheHash {
+  struct FuncProfNameMapHash {
     uint64_t
     operator()(const std::pair<const Function *, FunctionId> &P) const {
       return hash_combine(P.first, P.second);
     }
   };
   std::unordered_map<std::pair<const Function *, FunctionId>, bool,
-                     RenameDecisionCacheHash>
-      RenameDecisionCache;
+                     FuncProfNameMapHash>
+      FunctionProfileNameMap;
 
   FunctionMap *SymbolMap;
 
@@ -118,7 +118,12 @@ class SampleProfileMatcher {
     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
     return getFlattenedSamplesFor(FunctionId(CanonFName));
   }
-  void runBlockLevelMatching(Function &F);
+  void getFilteredAnchorList(const AnchorMap &IRAnchors,
+                             const AnchorMap &ProfileAnchors,
+                             AnchorList &FilteredIRAnchorsList,
+                             AnchorList &FilteredProfileAnchorList);
+  void runCFGMatching(Function &F);
+  void runOnFunction(Function &F);
   void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
   void findProfileAnchors(const FunctionSamples &FS,
                           AnchorMap &ProfileAnchors) const;
@@ -182,20 +187,16 @@ class SampleProfileMatcher {
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap);
-  void findIRNewCallees(Function &Caller,
-                        const StringMap<Function *> &IRNewFunctions,
-                        std::vector<Function *> &IRNewCallees);
-  float checkFunctionSimilarity(const Function &IRFunc,
-                                const FunctionId &ProfFunc);
-  bool functionIsRenamedImpl(const Function &IRFunc,
-                             const FunctionId &ProfFunc);
-  bool functionIsRenamed(const Function &IRFunc, const FunctionId &ProfFunc);
-  void
-  runFuncRenamingMatchingOnProfile(const StringMap<Function *> &IRNewFunctions,
+  void findNewIRCallees(Function &Caller,
+                        const StringMap<Function *> &newIRFunctions,
+                        std::vector<Function *> &NewIRCallees);
+  bool functionMatchesProfile(const Function &IRFunc,
+                              const FunctionId &ProfFunc);
+  void matchProfileForNewFunctions(const StringMap<Function *> &newIRFunctions,
                                    FunctionSamples &FS,
                                    FunctionMap &OldProfToNewSymbolMap);
-  void findIRNewFunctions(StringMap<Function *> &IRNewFunctions);
-  void runFuncLevelMatching();
+  void findnewIRFunctions(StringMap<Function *> &newIRFunctions);
+  void runCallGraphMatching();
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 2b07856252ecd..8da8f69af00db 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -20,15 +20,14 @@ using namespace sampleprof;
 
 #define DEBUG_TYPE "sample-profile-matcher"
 
-static cl::opt<bool> SalvageFunctionRenaming(
-    "salvage-function-renaming", cl::Hidden, cl::init(false),
-    cl::desc("Salvage stale profile by function renaming matching."));
+static cl::opt<bool> SalvageRenamedProfile(
+    "salvage-renamed-profile", cl::Hidden, cl::init(false),
+    cl::desc("Salvage renamed profile by function renaming matching."));
 
-static cl::opt<unsigned> FuncRenamingSimilarityThreshold(
-    "func-renaming-similarity-threshold", cl::Hidden, cl::init(80),
-    cl::desc(
-        "The profile function is considered being renamed if the similarity "
-        "against IR is above the given number(percentage value)."));
+static cl::opt<unsigned> RenamedFuncSimilarityThreshold(
+    "renamed-func-similarity-threshold", cl::Hidden, cl::init(80),
+    cl::desc("The profile matches the function if their similarity is above "
+             "the given number(percentage)."));
 
 extern cl::opt<bool> SalvageStaleProfile;
 extern cl::opt<bool> PersistProfileStaleness;
@@ -272,10 +271,9 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
 
 // Filter the non-call locations from IRAnchors and ProfileAnchors and write
 // them into a list for random access later.
-static void getFilteredAnchorList(const AnchorMap &IRAnchors,
-                                  const AnchorMap &ProfileAnchors,
-                                  AnchorList &FilteredIRAnchorsList,
-                                  AnchorList &FilteredProfileAnchorList) {
+void SampleProfileMatcher::getFilteredAnchorList(
+    const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
+    AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
   for (const auto &I : IRAnchors) {
     if (I.second.stringRef().empty())
       continue;
@@ -330,7 +328,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
   matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
 }
 
-void SampleProfileMatcher::runBlockLevelMatching(Function &F) {
+void SampleProfileMatcher::runCFGMatching(Function &F) {
   // We need to use flattened function samples for matching.
   // Unlike IR, which includes all callsites from the source code, the callsites
   // in profile only show up when they are hit by samples, i,e. the profile
@@ -612,8 +610,8 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
 // Find functions that don't show in the profile or profile symbol list, which
 // are supposed to be new functions. We use them as the targets for renaming
 // matching.
-void SampleProfileMatcher::findIRNewFunctions(
-    StringMap<Function *> &IRNewFunctions) {
+void SampleProfileMatcher::findnewIRFunctions(
+    StringMap<Function *> &newIRFunctions) {
   // TODO: Support MD5 profile.
   if (FunctionSamples::UseMD5)
     return;
@@ -634,23 +632,26 @@ void SampleProfileMatcher::findIRNewFunctions(
     if (FS)
       continue;
 
-    // For extended binary, the full function name symbols exits in the profile
-    // symbol list table.
+    // For extended binary, functions are fully inlined may not be loaded in the
+    // top-level profile, so check the NameTable which has the all symbol names
+    // in profile.
     if (NamesInProfile.count(CanonFName))
       continue;
 
+    // For extended binary, non-profiled function symbols are in the profile
+    // symbol list table.
     if (PSL && PSL->contains(CanonFName))
       continue;
 
     LLVM_DEBUG(dbgs() << "Function " << CanonFName
                       << " is not in profile or symbol list table.\n");
-    IRNewFunctions[CanonFName] = &F;
+    newIRFunctions[CanonFName] = &F;
   }
 }
 
-void SampleProfileMatcher::findIRNewCallees(
-    Function &Caller, const StringMap<Function *> &IRNewFunctions,
-    std::vector<Function *> &IRNewCallees) {
+void SampleProfileMatcher::findNewIRCallees(
+    Function &Caller, const StringMap<Function *> &newIRFunctions,
+    std::vector<Function *> &NewIRCallees) {
   for (auto &BB : Caller) {
     for (auto &I : BB) {
       const auto *CB = dyn_cast<CallBase>(&I);
@@ -661,28 +662,37 @@ void SampleProfileMatcher::findIRNewCallees(
         continue;
       StringRef CalleeName =
           FunctionSamples::getCanonicalFnName(Callee->getName());
-      if (IRNewFunctions.count(CalleeName))
-        IRNewCallees.push_back(Callee);
+      if (newIRFunctions.count(CalleeName))
+        NewIRCallees.push_back(Callee);
     }
   }
 }
 
-// Use function similarity to determine if the function is renamed. Compute a
-// similarity ratio between two sequences which are  the function callsite
-// anchors. The returned value is in the range [0, 1]. The bigger the value is,
-// the more similar two sequences are.
-float SampleProfileMatcher::checkFunctionSimilarity(
-    const Function &IRFunc, const FunctionId &ProfFName) {
+// Determine if the function matches profile by computing a similarity ratio
+// between two callsite anchors sequences extracted from function and profile.
+// The returned value is in the range [0, 1]. The bigger the value is, the more
+// similar two sequences are.
+bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
+                                                  const FunctionId &ProfFunc) {
+  // Check the cache.
+  auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
+  if (R != FunctionProfileNameMap.end())
+    return R->second;
+  // The value is in the range [0, 1]. The bigger the value is, the more similar
+  // two sequences are. -1.0 means the similarity is not set, and 0.0 means no
+  // match.
+  float Similarity = -1.0;
+
   AnchorMap IRAnchors;
   findIRAnchors(IRFunc, IRAnchors);
 
   AnchorMap ProfileAnchors;
-  const auto *FSFlattened = getFlattenedSamplesFor(ProfFName);
+  const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
   assert(FSFlattened && "Flattened profile sample is null");
   findProfileAnchors(*FSFlattened, ProfileAnchors);
 
-  AnchorList FilteredProfileAnchorList;
   AnchorList FilteredIRAnchorsList;
+  AnchorList FilteredProfileAnchorList;
   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
                         FilteredProfileAnchorList);
 
@@ -691,48 +701,45 @@ float SampleProfileMatcher::checkFunctionSimilarity(
   // the similarity.
   if (FunctionSamples::ProfileIsProbeBased) {
     const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
-    // Make sure function is complex enough.
+    // Probe-based profile checksum is based on the blocks, if the num of
+    // function block is small, it's more likely to get checksum conflict and
+    // generate wrong matching.
     if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
         !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
-      return 1.0;
+      Similarity = 1.0;
     }
   }
 
-  if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
-    return 0.0;
+  // Skip the matching if the function is tiny. Similarity check may not be
+  // reiable if the num of anchors is small.
+  if (Similarity == -1.0 && (FilteredIRAnchorsList.size() <= 2 ||
+                             FilteredProfileAnchorList.size() <= 2))
+    Similarity = 0.0;
 
-  // Use the diff algorithm to find the LCS between IR and profile.
-  LocToLocMap MatchedAnchors =
-      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
+  if (Similarity == -1.0) {
+    // Use the diff algorithm to find the LCS between IR and profile.
+    LocToLocMap MatchedAnchors =
+        longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
 
-  return static_cast<float>(MatchedAnchors.size()) * 2 /
-         (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
-}
+    Similarity =
+        static_cast<float>(MatchedAnchors.size()) * 2 /
+        (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
+  }
 
-bool SampleProfileMatcher::functionIsRenamedImpl(const Function &IRFunc,
-                                                 const FunctionId &ProfFunc) {
-  float Similarity = checkFunctionSimilarity(IRFunc, ProfFunc);
   LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
                     << "(IR) and " << ProfFunc << "(profile) is "
                     << format("%.2f", Similarity) << "\n");
-  return Similarity * 100 > FuncRenamingSimilarityThreshold;
+  assert((Similarity >= 0 && Similarity <= 1.0) &&
+         "Similarity value should be in [0, 1]");
+  bool Matched = Similarity * 100 > RenamedFuncSimilarityThreshold;
+  FunctionProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
+  return Matched;
 }
 
-bool SampleProfileMatcher::functionIsRenamed(const Function &IRFunc,
-                                             const FunctionId &ProfFunc) {
-  auto R = RenameDecisionCache.find({&IRFunc, ProfFunc});
-  if (R != RenameDecisionCache.end())
-    return R->second;
-
-  bool V = functionIsRenamedImpl(IRFunc, ProfFunc);
-  RenameDecisionCache[{&IRFunc, ProfFunc}] = V;
-  return V;
-}
-
-// Run function renaming matching on the profiled CFG edge to limit the matching
-// scope.
-void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
-    const StringMap<Function *> &IRNewFunctions, FunctionSamples &CallerFS,
+// Match profile for new function on the profiled call-graph edge to limit the
+// matching scope.
+void SampleProfileMatcher::matchProfileForNewFunctions(
+    const StringMap<Function *> &newIRFunctions, FunctionSamples &CallerFS,
     FunctionMap &OldProfToNewSymbolMap) {
   auto FindIRFunction = [&](const FunctionId &FName) {
     // Function can be null if name has conflict, use optional to store the
@@ -741,7 +748,7 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
 
     auto R = SymbolMap->find(FName);
     if (R != SymbolMap->end())
-      F = R->second;
+      return std::optional<Function *>(R->second);
 
     auto NewR = OldProfToNewSymbolMap.find(FName);
     if (NewR != OldProfToNewSymbolMap.end())
@@ -751,29 +758,29 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
   };
 
   // Find the new callees from IR in the current caller scope.
-  std::vector<Function *> IRNewCallees;
+  std::vector<Function *> NewIRCallees;
   auto Caller = FindIRFunction(CallerFS.getFunction());
   if (Caller.has_value() && *Caller) {
     // No callees for external function, skip the rename matching.
     if ((*Caller)->isDeclaration())
       return;
-    findIRNewCallees(**Caller, IRNewFunctions, IRNewCallees);
+    findNewIRCallees(**Caller, newIRFunctions, NewIRCallees);
   }
 
-  // Run renaming matching on CFG edge(caller-callee).
+  // Run function to profile matching on call-graph edge(caller-callee).
   for (auto &CM :
        const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
     auto &CalleeMap = CM.second;
     // Local container used to update the CallsiteSampleMap.
     std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
     for (auto &CS : CalleeMap) {
-      auto &CalleeFS = CS.second;
-      auto ProfCallee = CalleeFS.getFunction();
-      auto ExistingIRCallee = FindIRFunction(ProfCallee);
-      // The profile callee is new, run renaming matching.
+      FunctionSamples &CalleeFS = CS.second;
+      FunctionId ProfCallee = CalleeFS.getFunction();
+      std::optional<Function *> ExistingIRCallee = FindIRFunction(ProfCallee);
+      // The profile callee is new, run function to profile matching.
       if (!ExistingIRCallee.has_value()) {
-        for (auto *IRCallee : IRNewCallees) {
-          if (functionIsRenamed(*IRCallee, ProfCallee)) {
+        for (auto *IRCallee : NewIRCallees) {
+          if (functionMatchesProfile(*IRCallee, ProfCallee)) {
             FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
             OldProfToNewSymbolMap[ProfCallee] = IRCallee;
             // Update the profile in place so that the deeper level matching
@@ -804,8 +811,8 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
       // Note that even there is no renaming in the current scope, there could
       // be renaming in deeper callee scope, we need to traverse all the callee
       // profiles.
-      runFuncRenamingMatchingOnProfile(IRNewFunctions, CalleeFS,
-                                       OldProfToNewSymbolMap);
+      matchProfileForNewFunctions(newIRFunctions, CalleeFS,
+                                  OldProfToNewSymbolMap);
     }
 
     // Update the CalleeMap using the new name and remove the old entry.
@@ -818,14 +825,16 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
   }
 }
 
-void SampleProfileMatcher::runFuncLevelMatching() {
-  if (!SalvageFunctionRenaming)
+void SampleProfileMatcher::runCallGraphMatching() {
+  if (!SalvageRenamedProfile)
     return;
-  assert(SymbolMap && "SymbolMap points to null");
+  assert(SymbolMap && "SymbolMap is null");
+  assert(FunctionProfileNameMap.empty() &&
+         "FunctionProfileNameMap is not empty before the call graph matching");
 
-  StringMap<Function *> IRNewFunctions;
-  findIRNewFunctions(IRNewFunctions);
-  if (IRNewFunctions.empty())
+  StringMap<Function *> newIRFunctions;
+  findnewIRFunctions(newIRFunctions);
+  if (newIRFunctions.empty())
     return;
 
   // The new functions found by the renaming matching. Save them into a map
@@ -833,8 +842,8 @@ void SampleProfileMatcher::runFuncLevelMatching() {
   // function.
   FunctionMap OldProfToNewSymbolMap;
   for (auto &I : Reader.getProfiles())
-    runFuncRenamingMatchingOnProfile(IRNewFunctions, I.second,
-                                     OldProfToNewSymbolMap);
+    matchProfileForNewFunctions(newIRFunctions, I.second,
+                                OldProfToNewSymbolMap);
 
   // Update all the data generated by the old profile.
   if (!OldProfToNewSymbolMap.empty()) {
@@ -850,20 +859,24 @@ void SampleProfileMatcher::runFuncLevelMatching() {
     ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                      FunctionSamples::ProfileIsCS);
   }
-  RenameDecisionCache.clear();
+  FunctionProfileNameMap.clear();
+}
+
+void SampleProfileMatcher::runOnFunction(Function &F) {
+  if (skipProfileForFunction(F))
+    return;
+  runCFGMatching(F);
 }
 
 void SampleProfileMatcher::runOnModule(FunctionMap &SymMap) {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
   SymbolMap = &SymMap;
-  runFuncLevelMatching();
+  runCallGraphMatching();
+
+  for (auto &F : M)
+    runOnFunction(F);
 
-  for (auto &F : M) {
-    if (skipProfileForFunction(F))
-      continue;
-    runBlockLevelMatching(F);
-  }
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
 
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 36312c3c49451..37fbb2babf6d7 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-function-renaming -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-renamed-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
 
 
 ; CHECK: Function new_block_only is not in profile or symbol list table.

>From 183f6aecb87d17cebb078d037f20fe63d5634493 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 23 May 2024 10:05:01 -0700
Subject: [PATCH 03/22] fix test & udpate non-inline call targets & addressing
 comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  10 +-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 256 +++++++++++-------
 .../pseudo-probe-stale-profile-renaming.prof  |   9 +-
 .../pseudo-probe-stale-profile-renaming.ll    |  27 +-
 4 files changed, 181 insertions(+), 121 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index aee1aaa3c4817..ff7813a6c6ad0 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -15,7 +15,6 @@
 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
 
 #include "llvm/ADT/StringSet.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
 
 namespace llvm {
@@ -187,15 +186,22 @@ class SampleProfileMatcher {
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap);
+  std::pair<Function *, bool>
+  findOrMatchFunction(const FunctionId &ProfCallee,
+                      FunctionMap &OldProfToNewSymbolMap,
+                      const std::vector<Function *> &NewIRCallees);
+  std::vector<FunctionSamples *> sortFuncProfiles(SampleProfileMap &ProfileMap);
   void findNewIRCallees(Function &Caller,
                         const StringMap<Function *> &newIRFunctions,
                         std::vector<Function *> &NewIRCallees);
+  bool functionMatchesProfileHelper(const Function &IRFunc,
+                                    const FunctionId &ProfFunc);
   bool functionMatchesProfile(const Function &IRFunc,
                               const FunctionId &ProfFunc);
   void matchProfileForNewFunctions(const StringMap<Function *> &newIRFunctions,
                                    FunctionSamples &FS,
                                    FunctionMap &OldProfToNewSymbolMap);
-  void findnewIRFunctions(StringMap<Function *> &newIRFunctions);
+  void findNewIRFunctions(StringMap<Function *> &newIRFunctions);
   void runCallGraphMatching();
   void reportOrPersistProfileStats();
 };
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 8da8f69af00db..3a8c6962d4a08 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -29,6 +29,16 @@ static cl::opt<unsigned> RenamedFuncSimilarityThreshold(
     cl::desc("The profile matches the function if their similarity is above "
              "the given number(percentage)."));
 
+static cl::opt<unsigned> MinBBForCGMatching(
+    "min-bb-for-cg-matching", cl::Hidden, cl::init(5),
+    cl::desc("The minimum number of basic blocks required for a function to "
+             "run stale profile call graph matching."));
+
+static cl::opt<unsigned> MinCallAnchorForCGMatching(
+    "min-call-for-cg-matching", cl::Hidden, cl::init(3),
+    cl::desc("The minimum number of call anchors required for a function to "
+             "run stale profile call graph matching."));
+
 extern cl::opt<bool> SalvageStaleProfile;
 extern cl::opt<bool> PersistProfileStaleness;
 extern cl::opt<bool> ReportProfileStaleness;
@@ -610,7 +620,7 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
 // Find functions that don't show in the profile or profile symbol list, which
 // are supposed to be new functions. We use them as the targets for renaming
 // matching.
-void SampleProfileMatcher::findnewIRFunctions(
+void SampleProfileMatcher::findNewIRFunctions(
     StringMap<Function *> &newIRFunctions) {
   // TODO: Support MD5 profile.
   if (FunctionSamples::UseMD5)
@@ -668,27 +678,64 @@ void SampleProfileMatcher::findNewIRCallees(
   }
 }
 
+// Find the function using the profile name. If the function is not found but
+// the NewIRCallees is provided, try to match the function profile with all
+// functions in NewIRCallees and return the matched function.
+// The return pair includes the function pointer and a bool value indicating
+// whether the function is new(matched).
+std::pair<Function *, bool> SampleProfileMatcher::findOrMatchFunction(
+    const FunctionId &ProfCallee, FunctionMap &OldProfToNewSymbolMap,
+    const std::vector<Function *> &NewIRCallees = std::vector<Function *>()) {
+  auto F = SymbolMap->find(ProfCallee);
+  if (F != SymbolMap->end())
+    return {F->second, false};
+
+  // Existing matched function is found.
+  auto NewF = OldProfToNewSymbolMap.find(ProfCallee);
+  if (NewF != OldProfToNewSymbolMap.end())
+    return {NewF->second, true};
+
+  for (auto *IRCallee : NewIRCallees)
+    if (functionMatchesProfile(*IRCallee, ProfCallee)) {
+      OldProfToNewSymbolMap[ProfCallee] = IRCallee;
+      return {IRCallee, true};
+    }
+  return {nullptr, false};
+}
+
 // Determine if the function matches profile by computing a similarity ratio
 // between two callsite anchors sequences extracted from function and profile.
-// The returned value is in the range [0, 1]. The bigger the value is, the more
-// similar two sequences are.
-bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
-                                                  const FunctionId &ProfFunc) {
-  // Check the cache.
-  auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
-  if (R != FunctionProfileNameMap.end())
-    return R->second;
+bool SampleProfileMatcher::functionMatchesProfileHelper(
+    const Function &IRFunc, const FunctionId &ProfFunc) {
   // The value is in the range [0, 1]. The bigger the value is, the more similar
-  // two sequences are. -1.0 means the similarity is not set, and 0.0 means no
-  // match.
-  float Similarity = -1.0;
+  // two sequences are.
+  float Similarity = 0.0;
+
+  const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+  assert(FSFlattened && "Flattened profile sample is null");
+  // Similarity check may not be reiable if the function is tiny, we use the
+  // number of basic block as a proxy for the function complexity and skip the
+  // matching if it's too small.
+  if (IRFunc.size() < MinBBForCGMatching ||
+      FSFlattened->getBodySamples().size() < MinBBForCGMatching)
+    return false;
+
+  // For probe-based function, we first trust the checksum info. If the checksum
+  // doesn't match, we continue checking for similarity.
+  if (FunctionSamples::ProfileIsProbeBased) {
+    const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
+    if (FuncDesc &&
+        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+      LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
+                        << "(IR) and " << ProfFunc << "(Profile) match.\n");
+
+      return true;
+    }
+  }
 
   AnchorMap IRAnchors;
   findIRAnchors(IRFunc, IRAnchors);
-
   AnchorMap ProfileAnchors;
-  const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
-  assert(FSFlattened && "Flattened profile sample is null");
   findProfileAnchors(*FSFlattened, ProfileAnchors);
 
   AnchorList FilteredIRAnchorsList;
@@ -696,42 +743,34 @@ bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
                         FilteredProfileAnchorList);
 
-  // If the function is probe based, we trust the checksum info to check the
-  // similarity. Otherwise, if the checksum is mismatched, continue computing
-  // the similarity.
-  if (FunctionSamples::ProfileIsProbeBased) {
-    const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
-    // Probe-based profile checksum is based on the blocks, if the num of
-    // function block is small, it's more likely to get checksum conflict and
-    // generate wrong matching.
-    if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
-        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
-      Similarity = 1.0;
-    }
-  }
-
-  // Skip the matching if the function is tiny. Similarity check may not be
-  // reiable if the num of anchors is small.
-  if (Similarity == -1.0 && (FilteredIRAnchorsList.size() <= 2 ||
-                             FilteredProfileAnchorList.size() <= 2))
-    Similarity = 0.0;
+  // Similarly skip the matching if the num of anchors is not enough.
+  if (FilteredIRAnchorsList.size() < MinCallAnchorForCGMatching ||
+      FilteredProfileAnchorList.size() < MinCallAnchorForCGMatching)
+    return false;
 
-  if (Similarity == -1.0) {
-    // Use the diff algorithm to find the LCS between IR and profile.
-    LocToLocMap MatchedAnchors =
-        longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
+  // Use the diff algorithm to find the LCS between IR and profile.
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
 
-    Similarity =
-        static_cast<float>(MatchedAnchors.size()) * 2 /
-        (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
-  }
+  Similarity =
+      static_cast<float>(MatchedAnchors.size()) * 2 /
+      (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
 
   LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
                     << "(IR) and " << ProfFunc << "(profile) is "
                     << format("%.2f", Similarity) << "\n");
   assert((Similarity >= 0 && Similarity <= 1.0) &&
          "Similarity value should be in [0, 1]");
-  bool Matched = Similarity * 100 > RenamedFuncSimilarityThreshold;
+  return Similarity * 100 > RenamedFuncSimilarityThreshold;
+}
+
+bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
+                                                  const FunctionId &ProfFunc) {
+  auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
+  if (R != FunctionProfileNameMap.end())
+    return R->second;
+
+  bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
   FunctionProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
   return Matched;
 }
@@ -741,72 +780,64 @@ bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
 void SampleProfileMatcher::matchProfileForNewFunctions(
     const StringMap<Function *> &newIRFunctions, FunctionSamples &CallerFS,
     FunctionMap &OldProfToNewSymbolMap) {
-  auto FindIRFunction = [&](const FunctionId &FName) {
-    // Function can be null if name has conflict, use optional to store the
-    // function pointer.
-    std::optional<Function *> F;
-
-    auto R = SymbolMap->find(FName);
-    if (R != SymbolMap->end())
-      return std::optional<Function *>(R->second);
-
-    auto NewR = OldProfToNewSymbolMap.find(FName);
-    if (NewR != OldProfToNewSymbolMap.end())
-      F = NewR->second;
-
-    return F;
-  };
-
-  // Find the new callees from IR in the current caller scope.
+  // Find the new candidate callees from IR in the current caller scope.
   std::vector<Function *> NewIRCallees;
-  auto Caller = FindIRFunction(CallerFS.getFunction());
-  if (Caller.has_value() && *Caller) {
+  if (auto *IRCaller =
+          findOrMatchFunction(CallerFS.getFunction(), OldProfToNewSymbolMap)
+              .first) {
     // No callees for external function, skip the rename matching.
-    if ((*Caller)->isDeclaration())
+    if (IRCaller->isDeclaration())
       return;
-    findNewIRCallees(**Caller, newIRFunctions, NewIRCallees);
+    findNewIRCallees(*IRCaller, newIRFunctions, NewIRCallees);
+  }
+
+  // Match non-inline callees.
+  for (auto &BS : const_cast<BodySampleMap &>(CallerFS.getBodySamples())) {
+    // New function to old function pairs used to update the CallTargetMap.
+    std::vector<std::pair<FunctionId, FunctionId>> CallTargetsToUpdate;
+    SampleRecord::CallTargetMap &CTM =
+        const_cast<SampleRecord::CallTargetMap &>(BS.second.getCallTargets());
+    for (const auto &TS : CTM) {
+      const FunctionId &ProfCallee = TS.first;
+      auto MatchRes =
+          findOrMatchFunction(ProfCallee, OldProfToNewSymbolMap, NewIRCallees);
+      if (!MatchRes.second)
+        continue;
+      FunctionId NewIRCalleeName(MatchRes.first->getName());
+      assert(NewIRCalleeName != ProfCallee &&
+             "New callee symbol is not a new function");
+      LLVM_DEBUG(dbgs() << "In function " << CallerFS.getFunction()
+                        << ", changing profile name from " << ProfCallee
+                        << " to " << NewIRCalleeName << "\n");
+      CallTargetsToUpdate.emplace_back(NewIRCalleeName, ProfCallee);
+    }
+
+    for (const auto &P : CallTargetsToUpdate) {
+      CTM[P.first] = CTM[P.second];
+      CTM.erase(P.second);
+    }
   }
 
-  // Run function to profile matching on call-graph edge(caller-callee).
+  // Match inline callees.
   for (auto &CM :
        const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
     auto &CalleeMap = CM.second;
-    // Local container used to update the CallsiteSampleMap.
+    // New function to old FunctionSamples pairs used to update the
+    // CallsiteSampleMap.
     std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
     for (auto &CS : CalleeMap) {
       FunctionSamples &CalleeFS = CS.second;
       FunctionId ProfCallee = CalleeFS.getFunction();
-      std::optional<Function *> ExistingIRCallee = FindIRFunction(ProfCallee);
-      // The profile callee is new, run function to profile matching.
-      if (!ExistingIRCallee.has_value()) {
-        for (auto *IRCallee : NewIRCallees) {
-          if (functionMatchesProfile(*IRCallee, ProfCallee)) {
-            FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
-            OldProfToNewSymbolMap[ProfCallee] = IRCallee;
-            // Update the profile in place so that the deeper level matching
-            // will find the IR function.
-            CalleeFS.setFunction(FunctionId(IRCallee->getName()));
-            LLVM_DEBUG(dbgs() << "Callee renaming is found in function "
-                              << CallerFS.getFunction()
-                              << ", changing profile name from " << ProfCallee
-                              << " to " << IRCallee->getName() << "\n");
-            break;
-          }
-        }
-      } else {
-        // Apply the existing renaming result.
-        auto R = OldProfToNewSymbolMap.find(CalleeFS.getFunction());
-        if (R != OldProfToNewSymbolMap.end()) {
-          FunctionId IRNewCallee(R->second->getName());
-          assert(IRNewCallee != ProfCallee &&
-                 "New callee symbol is not a new function");
-          FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
-          CalleeFS.setFunction(IRNewCallee);
-          LLVM_DEBUG(dbgs() << "Existing callee renaming is found in function "
-                            << CallerFS.getFunction()
-                            << ", changing profile name from " << ProfCallee
-                            << " to " << IRNewCallee << "\n");
-        }
+      auto MatchRes =
+          findOrMatchFunction(ProfCallee, OldProfToNewSymbolMap, NewIRCallees);
+      if (MatchRes.second) {
+        FunctionId NewIRCalleeName(MatchRes.first->getName());
+        assert(NewIRCalleeName != ProfCallee &&
+               "New callee symbol is not a new function");
+        LLVM_DEBUG(dbgs() << "In function " << CallerFS.getFunction()
+                          << ", changing profile name from " << ProfCallee
+                          << " to " << NewIRCalleeName << "\n");
+        FSamplesToUpdate.emplace_back(NewIRCalleeName, &CalleeFS);
       }
       // Note that even there is no renaming in the current scope, there could
       // be renaming in deeper callee scope, we need to traverse all the callee
@@ -817,14 +848,31 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
 
     // Update the CalleeMap using the new name and remove the old entry.
     for (auto &P : FSamplesToUpdate) {
-      assert((P.first != P.second->getFunction()) &&
+      const FunctionId &OldFunction = P.second->getFunction();
+      assert(P.first != OldFunction &&
              "Renamed function name should be different from the old map key");
-      CalleeMap[P.second->getFunction()] = *P.second;
-      CalleeMap.erase(P.first);
+      P.second->setFunction(P.first);
+      CalleeMap[P.first] = *P.second;
+      CalleeMap.erase(OldFunction);
     }
   }
 }
 
+std::vector<FunctionSamples *>
+SampleProfileMatcher::sortFuncProfiles(SampleProfileMap &ProfileMap) {
+  std::vector<FunctionSamples *> SortedProfiles;
+  for (auto &I : ProfileMap)
+    SortedProfiles.push_back(&I.second);
+
+  llvm::stable_sort(SortedProfiles,
+                    [](const FunctionSamples *A, const FunctionSamples *B) {
+                      if (A->getTotalSamples() == B->getTotalSamples())
+                        return A->getContext() < B->getContext();
+                      return A->getTotalSamples() > B->getTotalSamples();
+                    });
+  return SortedProfiles;
+}
+
 void SampleProfileMatcher::runCallGraphMatching() {
   if (!SalvageRenamedProfile)
     return;
@@ -833,7 +881,7 @@ void SampleProfileMatcher::runCallGraphMatching() {
          "FunctionProfileNameMap is not empty before the call graph matching");
 
   StringMap<Function *> newIRFunctions;
-  findnewIRFunctions(newIRFunctions);
+  findNewIRFunctions(newIRFunctions);
   if (newIRFunctions.empty())
     return;
 
@@ -841,9 +889,9 @@ void SampleProfileMatcher::runCallGraphMatching() {
   // whose key is the old(profile) function name and value is the new(renamed)
   // function.
   FunctionMap OldProfToNewSymbolMap;
-  for (auto &I : Reader.getProfiles())
-    matchProfileForNewFunctions(newIRFunctions, I.second,
-                                OldProfToNewSymbolMap);
+  // Sort the profiles to make the matching order deterministic.
+  for (auto *P : sortFuncProfiles(Reader.getProfiles()))
+    matchProfileForNewFunctions(newIRFunctions, *P, OldProfToNewSymbolMap);
 
   // Update all the data generated by the old profile.
   if (!OldProfToNewSymbolMap.empty()) {
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
index 1e23ef26d1b15..6ff9cd050dd82 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
@@ -32,18 +32,13 @@ main:47:0
   !CFGChecksum: 281479271677951
  10: cold_func:0
   1: 0
-  2: block_only:0
-   1: 0
-   3: 0
-   5: 0
-   10: 0
-   !CFGChecksum: 206551239323
+  2: 0 block_only:0
   !CFGChecksum: 281479271677951
  !CFGChecksum: 1126003093360596
 test_noninline:22:2
  1: 2
  2: foo:20
-  1: 2
+  1: 3
   2: 2 bar:3
   4: 3 bar:3
   3: baz:13
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 37fbb2babf6d7..ef50ba024af3d 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -1,19 +1,30 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-renamed-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-renamed-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-for-cg-matching=0 --min-bb-for-cg-matching=0 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-renamed-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-for-cg-matching=10 --min-bb-for-cg-matching=10 2>&1 | FileCheck %s  --check-prefix=TINY-FUNC
 
 
 ; CHECK: Function new_block_only is not in profile or symbol list table.
 ; CHECK: Function new_foo is not in profile or symbol list table.
 
 ; CHECK: The similarity between new_foo(IR) and foo(profile) is 0.86
-; CHECK: Callee renaming is found in function main, changing profile name from foo to new_foo
-; CHECK: The similarity between new_block_only(IR) and block_only(profile) is 1.00
-; CHECK: Callee renaming is found in function baz, changing profile name from block_only to new_block_only
-; CHECK: Existing callee renaming is found in function baz, changing profile name from block_only to new_block_only
-; CHECK: Existing callee renaming is found in function cold_func, changing profile name from block_only to new_block_only
-; CHECK: Existing callee renaming is found in function test_noninline, changing profile name from foo to new_foo
-; CHECK: Existing callee renaming is found in function baz, changing profile name from block_only to new_block_only
+; CHECK: In function main, changing profile name from foo to new_foo
+; CHECK: The checksums for new_block_only(IR) and block_only(Profile) match
+; CHECK: In function baz, changing profile name from block_only to new_block_only
+; CHECK: In function baz, changing profile name from block_only to new_block_only
+; CHECK: In function cold_func, changing profile name from block_only to new_block_only
+; CHECK: In function test_noninline, changing profile name from foo to new_foo
+; CHECK: In function baz, changing profile name from block_only to new_block_only
+
+; Verify the matched function is updated correctly by checking the inlining.
+; CHECK: 'new_foo' inlined into 'main' to match profiling context with (cost=110, threshold=3000) at callsite main:2:7.5;
+; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ main:3:7.6
+; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ new_foo:2:3.3 @ main:2:7.5;
+; CHECK: 'new_foo' inlined into 'test_noninline' to match profiling context with (cost=110, threshold=3000) at callsite test_noninline:1:3.2;
+
+; TINY-FUNC-NOT: block_only to new_block_only
+; TINY-FUNC-NOT: from foo to new_foo
+
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

>From 6beddf23441855034d5e1f6584d9e59011034bff Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 28 May 2024 22:43:24 -0700
Subject: [PATCH 04/22] addressing comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     | 26 +++++++-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 60 ++++++++-----------
 2 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index ff7813a6c6ad0..ab52ce7b85f95 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -186,22 +186,42 @@ class SampleProfileMatcher {
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap);
+  /// Find the function using the profile name. If the function is not found but
+  /// the \p NewIRCallees is provided, try to match the function profile with
+  /// all functions in \p NewIRCallees and return the matched function.
+  ///
+  /// \param ProfCallee The profile name of the callee.
+  /// \param OldProfToNewSymbolMap The map from old profile name to new symbol.
+  /// \param NewIRCallees The new candidate callees in the same scope to match.
+  ///
+  /// \returns The matched function and a bool value indicating whether the
+  /// function is new(matched).
   std::pair<Function *, bool>
   findOrMatchFunction(const FunctionId &ProfCallee,
                       FunctionMap &OldProfToNewSymbolMap,
                       const std::vector<Function *> &NewIRCallees);
   std::vector<FunctionSamples *> sortFuncProfiles(SampleProfileMap &ProfileMap);
   void findNewIRCallees(Function &Caller,
-                        const StringMap<Function *> &newIRFunctions,
+                        const StringMap<Function *> &NewIRFunctions,
                         std::vector<Function *> &NewIRCallees);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
+  /// Determine if the function matches profile by computing a similarity ratio
+  /// between two callsite anchors extracted from function and profile. If it's
+  /// above the threshold, the function matches the profile.
+  ///
+  /// \returns True if the function matches profile.
   bool functionMatchesProfile(const Function &IRFunc,
                               const FunctionId &ProfFunc);
-  void matchProfileForNewFunctions(const StringMap<Function *> &newIRFunctions,
+  void matchProfileForNewFunctions(const StringMap<Function *> &NewIRFunctions,
                                    FunctionSamples &FS,
                                    FunctionMap &OldProfToNewSymbolMap);
-  void findNewIRFunctions(StringMap<Function *> &newIRFunctions);
+  /// Find functions that don't show in the profile or profile symbol list,
+  /// which are supposed to be new functions. We use them as the targets for
+  /// renaming matching.
+  ///
+  /// \param NewIRFunctions The map from function name to the IR function.
+  void findNewIRFunctions(StringMap<Function *> &NewIRFunctions);
   void runCallGraphMatching();
   void reportOrPersistProfileStats();
 };
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 3a8c6962d4a08..488ec85f6079f 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -617,11 +617,8 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   }
 }
 
-// Find functions that don't show in the profile or profile symbol list, which
-// are supposed to be new functions. We use them as the targets for renaming
-// matching.
 void SampleProfileMatcher::findNewIRFunctions(
-    StringMap<Function *> &newIRFunctions) {
+    StringMap<Function *> &NewIRFunctions) {
   // TODO: Support MD5 profile.
   if (FunctionSamples::UseMD5)
     return;
@@ -655,12 +652,12 @@ void SampleProfileMatcher::findNewIRFunctions(
 
     LLVM_DEBUG(dbgs() << "Function " << CanonFName
                       << " is not in profile or symbol list table.\n");
-    newIRFunctions[CanonFName] = &F;
+    NewIRFunctions[CanonFName] = &F;
   }
 }
 
 void SampleProfileMatcher::findNewIRCallees(
-    Function &Caller, const StringMap<Function *> &newIRFunctions,
+    Function &Caller, const StringMap<Function *> &NewIRFunctions,
     std::vector<Function *> &NewIRCallees) {
   for (auto &BB : Caller) {
     for (auto &I : BB) {
@@ -672,17 +669,12 @@ void SampleProfileMatcher::findNewIRCallees(
         continue;
       StringRef CalleeName =
           FunctionSamples::getCanonicalFnName(Callee->getName());
-      if (newIRFunctions.count(CalleeName))
+      if (NewIRFunctions.count(CalleeName))
         NewIRCallees.push_back(Callee);
     }
   }
 }
 
-// Find the function using the profile name. If the function is not found but
-// the NewIRCallees is provided, try to match the function profile with all
-// functions in NewIRCallees and return the matched function.
-// The return pair includes the function pointer and a bool value indicating
-// whether the function is new(matched).
 std::pair<Function *, bool> SampleProfileMatcher::findOrMatchFunction(
     const FunctionId &ProfCallee, FunctionMap &OldProfToNewSymbolMap,
     const std::vector<Function *> &NewIRCallees = std::vector<Function *>()) {
@@ -703,8 +695,6 @@ std::pair<Function *, bool> SampleProfileMatcher::findOrMatchFunction(
   return {nullptr, false};
 }
 
-// Determine if the function matches profile by computing a similarity ratio
-// between two callsite anchors sequences extracted from function and profile.
 bool SampleProfileMatcher::functionMatchesProfileHelper(
     const Function &IRFunc, const FunctionId &ProfFunc) {
   // The value is in the range [0, 1]. The bigger the value is, the more similar
@@ -713,7 +703,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
   assert(FSFlattened && "Flattened profile sample is null");
-  // Similarity check may not be reiable if the function is tiny, we use the
+  // Similarity check may not be reliable if the function is tiny, we use the
   // number of basic block as a proxy for the function complexity and skip the
   // matching if it's too small.
   if (IRFunc.size() < MinBBForCGMatching ||
@@ -778,7 +768,7 @@ bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
 // Match profile for new function on the profiled call-graph edge to limit the
 // matching scope.
 void SampleProfileMatcher::matchProfileForNewFunctions(
-    const StringMap<Function *> &newIRFunctions, FunctionSamples &CallerFS,
+    const StringMap<Function *> &NewIRFunctions, FunctionSamples &CallerFS,
     FunctionMap &OldProfToNewSymbolMap) {
   // Find the new candidate callees from IR in the current caller scope.
   std::vector<Function *> NewIRCallees;
@@ -788,7 +778,7 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
     // No callees for external function, skip the rename matching.
     if (IRCaller->isDeclaration())
       return;
-    findNewIRCallees(*IRCaller, newIRFunctions, NewIRCallees);
+    findNewIRCallees(*IRCaller, NewIRFunctions, NewIRCallees);
   }
 
   // Match non-inline callees.
@@ -842,7 +832,7 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
       // Note that even there is no renaming in the current scope, there could
       // be renaming in deeper callee scope, we need to traverse all the callee
       // profiles.
-      matchProfileForNewFunctions(newIRFunctions, CalleeFS,
+      matchProfileForNewFunctions(NewIRFunctions, CalleeFS,
                                   OldProfToNewSymbolMap);
     }
 
@@ -880,9 +870,9 @@ void SampleProfileMatcher::runCallGraphMatching() {
   assert(FunctionProfileNameMap.empty() &&
          "FunctionProfileNameMap is not empty before the call graph matching");
 
-  StringMap<Function *> newIRFunctions;
-  findNewIRFunctions(newIRFunctions);
-  if (newIRFunctions.empty())
+  StringMap<Function *> NewIRFunctions;
+  findNewIRFunctions(NewIRFunctions);
+  if (NewIRFunctions.empty())
     return;
 
   // The new functions found by the renaming matching. Save them into a map
@@ -891,23 +881,23 @@ void SampleProfileMatcher::runCallGraphMatching() {
   FunctionMap OldProfToNewSymbolMap;
   // Sort the profiles to make the matching order deterministic.
   for (auto *P : sortFuncProfiles(Reader.getProfiles()))
-    matchProfileForNewFunctions(newIRFunctions, *P, OldProfToNewSymbolMap);
+    matchProfileForNewFunctions(NewIRFunctions, *P, OldProfToNewSymbolMap);
 
+  FunctionProfileNameMap.clear();
+  if (OldProfToNewSymbolMap.empty())
+    return;
   // Update all the data generated by the old profile.
-  if (!OldProfToNewSymbolMap.empty()) {
-    // Add the new function to the SymbolMap, which will be used in
-    // SampleLoader.
-    for (auto &I : OldProfToNewSymbolMap) {
-      assert(I.second && "New function is null");
-      SymbolMap->emplace(FunctionId(I.second->getName()), I.second);
-    }
-
-    // Re-flatten the profiles after the renaming.
-    FlattenedProfiles.clear();
-    ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
-                                     FunctionSamples::ProfileIsCS);
+  // Add the new function to the SymbolMap, which will be used in
+  // SampleLoader.
+  for (auto &I : OldProfToNewSymbolMap) {
+    assert(I.second && "New function is null");
+    SymbolMap->emplace(FunctionId(I.second->getName()), I.second);
   }
-  FunctionProfileNameMap.clear();
+
+  // Re-flatten the profiles after the renaming.
+  FlattenedProfiles.clear();
+  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+                                   FunctionSamples::ProfileIsCS);
 }
 
 void SampleProfileMatcher::runOnFunction(Function &F) {

>From 7f38b7edb14f275d618eff2ddbf93c2495657ecc Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 29 May 2024 22:37:23 -0700
Subject: [PATCH 05/22] change ProfCallee to ProfFunc

---
 .../include/llvm/Transforms/IPO/SampleProfileMatcher.h |  4 ++--
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp       | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index ab52ce7b85f95..70378bd66d9e4 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -190,14 +190,14 @@ class SampleProfileMatcher {
   /// the \p NewIRCallees is provided, try to match the function profile with
   /// all functions in \p NewIRCallees and return the matched function.
   ///
-  /// \param ProfCallee The profile name of the callee.
+  /// \param ProfFunc The function profile name.
   /// \param OldProfToNewSymbolMap The map from old profile name to new symbol.
   /// \param NewIRCallees The new candidate callees in the same scope to match.
   ///
   /// \returns The matched function and a bool value indicating whether the
   /// function is new(matched).
   std::pair<Function *, bool>
-  findOrMatchFunction(const FunctionId &ProfCallee,
+  findOrMatchFunction(const FunctionId &ProfFunc,
                       FunctionMap &OldProfToNewSymbolMap,
                       const std::vector<Function *> &NewIRCallees);
   std::vector<FunctionSamples *> sortFuncProfiles(SampleProfileMap &ProfileMap);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 488ec85f6079f..5693545ee25c3 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -676,20 +676,20 @@ void SampleProfileMatcher::findNewIRCallees(
 }
 
 std::pair<Function *, bool> SampleProfileMatcher::findOrMatchFunction(
-    const FunctionId &ProfCallee, FunctionMap &OldProfToNewSymbolMap,
+    const FunctionId &ProfFunc, FunctionMap &OldProfToNewSymbolMap,
     const std::vector<Function *> &NewIRCallees = std::vector<Function *>()) {
-  auto F = SymbolMap->find(ProfCallee);
+  auto F = SymbolMap->find(ProfFunc);
   if (F != SymbolMap->end())
     return {F->second, false};
 
   // Existing matched function is found.
-  auto NewF = OldProfToNewSymbolMap.find(ProfCallee);
+  auto NewF = OldProfToNewSymbolMap.find(ProfFunc);
   if (NewF != OldProfToNewSymbolMap.end())
     return {NewF->second, true};
 
   for (auto *IRCallee : NewIRCallees)
-    if (functionMatchesProfile(*IRCallee, ProfCallee)) {
-      OldProfToNewSymbolMap[ProfCallee] = IRCallee;
+    if (functionMatchesProfile(*IRCallee, ProfFunc)) {
+      OldProfToNewSymbolMap[ProfFunc] = IRCallee;
       return {IRCallee, true};
     }
   return {nullptr, false};

>From ae436ddc190ca005e335dcc763273b6d292063dd Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 29 May 2024 22:42:53 -0700
Subject: [PATCH 06/22] early break for non-inline callees match when
 NewIRCallees is empty

---
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 5693545ee25c3..83e9e80b91635 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -783,6 +783,8 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
 
   // Match non-inline callees.
   for (auto &BS : const_cast<BodySampleMap &>(CallerFS.getBodySamples())) {
+    if (NewIRCallees.empty())
+      break;
     // New function to old function pairs used to update the CallTargetMap.
     std::vector<std::pair<FunctionId, FunctionId>> CallTargetsToUpdate;
     SampleRecord::CallTargetMap &CTM =

>From 7cb0cd79f9cb9a353065755af2fb5fd2ca0538de Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 30 May 2024 09:10:16 -0700
Subject: [PATCH 07/22] refactoring findOrMatchFunction

---
 .../Transforms/IPO/SampleProfileMatcher.h     | 13 ++++---
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 34 +++++++++++++------
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 70378bd66d9e4..e500aed6681d1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -35,7 +35,7 @@ class SampleProfileMatcher {
   // in the profile.
   StringMap<LocToLocMap> FuncMappings;
 
-  // Match state for an anchor/callsite.
+  // Match state for an anchor/callsite or function.
   enum class MatchState {
     Unknown = 0,
     // Initial match between input profile and current IR.
@@ -186,6 +186,12 @@ class SampleProfileMatcher {
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap);
+  /// Find the existing or new matched function using the profile name.
+  ///
+  /// \returns The function and a match state.
+  std::pair<Function *, MatchState>
+  findFunction(const FunctionId &ProfFunc,
+               const FunctionMap &OldProfToNewSymbolMap) const;
   /// Find the function using the profile name. If the function is not found but
   /// the \p NewIRCallees is provided, try to match the function profile with
   /// all functions in \p NewIRCallees and return the matched function.
@@ -194,9 +200,8 @@ class SampleProfileMatcher {
   /// \param OldProfToNewSymbolMap The map from old profile name to new symbol.
   /// \param NewIRCallees The new candidate callees in the same scope to match.
   ///
-  /// \returns The matched function and a bool value indicating whether the
-  /// function is new(matched).
-  std::pair<Function *, bool>
+  /// \returns The matched function and a match state.
+  std::pair<Function *, MatchState>
   findOrMatchFunction(const FunctionId &ProfFunc,
                       FunctionMap &OldProfToNewSymbolMap,
                       const std::vector<Function *> &NewIRCallees);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 83e9e80b91635..9875b7f6a2295 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -675,24 +675,37 @@ void SampleProfileMatcher::findNewIRCallees(
   }
 }
 
-std::pair<Function *, bool> SampleProfileMatcher::findOrMatchFunction(
-    const FunctionId &ProfFunc, FunctionMap &OldProfToNewSymbolMap,
-    const std::vector<Function *> &NewIRCallees = std::vector<Function *>()) {
+std::pair<Function *, SampleProfileMatcher::MatchState>
+SampleProfileMatcher::findFunction(
+    const FunctionId &ProfFunc,
+    const FunctionMap &OldProfToNewSymbolMap) const {
   auto F = SymbolMap->find(ProfFunc);
   if (F != SymbolMap->end())
-    return {F->second, false};
+    return {F->second, MatchState::InitialMatch};
 
   // Existing matched function is found.
   auto NewF = OldProfToNewSymbolMap.find(ProfFunc);
   if (NewF != OldProfToNewSymbolMap.end())
-    return {NewF->second, true};
+    return {NewF->second, MatchState::RecoveredMismatch};
+  return {nullptr, MatchState::Unknown};
+}
+
+std::pair<Function *, SampleProfileMatcher::MatchState>
+SampleProfileMatcher::findOrMatchFunction(
+    const FunctionId &ProfFunc, FunctionMap &OldProfToNewSymbolMap,
+    const std::vector<Function *> &NewIRCallees) {
+  auto R = findFunction(ProfFunc, OldProfToNewSymbolMap);
+  // We need to check the match state instead of nullptr function because the
+  // returned function can be nullptr even if it's found in the symbol map.
+  if (R.second != MatchState::Unknown)
+    return R;
 
   for (auto *IRCallee : NewIRCallees)
     if (functionMatchesProfile(*IRCallee, ProfFunc)) {
       OldProfToNewSymbolMap[ProfFunc] = IRCallee;
-      return {IRCallee, true};
+      return {IRCallee, MatchState::RecoveredMismatch};
     }
-  return {nullptr, false};
+  return {nullptr, MatchState::Unknown};
 }
 
 bool SampleProfileMatcher::functionMatchesProfileHelper(
@@ -773,8 +786,7 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
   // Find the new candidate callees from IR in the current caller scope.
   std::vector<Function *> NewIRCallees;
   if (auto *IRCaller =
-          findOrMatchFunction(CallerFS.getFunction(), OldProfToNewSymbolMap)
-              .first) {
+          findFunction(CallerFS.getFunction(), OldProfToNewSymbolMap).first) {
     // No callees for external function, skip the rename matching.
     if (IRCaller->isDeclaration())
       return;
@@ -793,7 +805,7 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
       const FunctionId &ProfCallee = TS.first;
       auto MatchRes =
           findOrMatchFunction(ProfCallee, OldProfToNewSymbolMap, NewIRCallees);
-      if (!MatchRes.second)
+      if (MatchRes.second != MatchState::RecoveredMismatch)
         continue;
       FunctionId NewIRCalleeName(MatchRes.first->getName());
       assert(NewIRCalleeName != ProfCallee &&
@@ -822,7 +834,7 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
       FunctionId ProfCallee = CalleeFS.getFunction();
       auto MatchRes =
           findOrMatchFunction(ProfCallee, OldProfToNewSymbolMap, NewIRCallees);
-      if (MatchRes.second) {
+      if (MatchRes.second == MatchState::RecoveredMismatch) {
         FunctionId NewIRCalleeName(MatchRes.first->getName());
         assert(NewIRCalleeName != ProfCallee &&
                "New callee symbol is not a new function");

>From beaa60172e7c47790a33239fd37ad8764cd0f1c5 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 3 Jun 2024 15:32:43 -0700
Subject: [PATCH 08/22] addressing comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  69 +++--
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 257 +++++++++---------
 .../pseudo-probe-stale-profile-renaming.ll    |   4 +-
 3 files changed, 168 insertions(+), 162 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index e500aed6681d1..fa66890294945 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -21,7 +21,6 @@ namespace llvm {
 
 using AnchorList = std::vector<std::pair<LineLocation, FunctionId>>;
 using AnchorMap = std::map<LineLocation, FunctionId>;
-using FunctionMap = HashKeyMap<std::unordered_map, FunctionId, Function *>;
 
 // Sample profile matching - fuzzy match.
 class SampleProfileMatcher {
@@ -65,12 +64,26 @@ class SampleProfileMatcher {
       return hash_combine(P.first, P.second);
     }
   };
+  // A map from a pair of function and profile name to a boolean value
+  // indicating whether they are matched. This is used as a cache for the
+  // matching result.
   std::unordered_map<std::pair<const Function *, FunctionId>, bool,
                      FuncProfNameMapHash>
       FunctionProfileNameMap;
+  // The new functions found by the call graph matching. The map's key is the
+  // old profile name and value is the new(renamed) function.
+  HashKeyMap<std::unordered_map, FunctionId, Function *> ProfileNameToFuncMap;
 
-  FunctionMap *SymbolMap;
+  // A map pointer to the SymbolMap in the SampleProfileLoader, which stores all
+  // the original matched symbols before the matching. this is to determine if
+  // the profile is unused(to be matched) or not.
+  HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap;
 
+  // A map from the caller to its new callees, this is used as a cache for the
+  // candidate callees.
+  std::unordered_map<Function *, std::vector<Function *>> FuncToNewCalleesMap;
+
+  // Pointer to the Profile Symbol List in the reader.
   std::shared_ptr<ProfileSymbolList> PSL;
 
   // Profile mismatch statstics:
@@ -99,11 +112,13 @@ class SampleProfileMatcher {
                        std::shared_ptr<ProfileSymbolList> PSL)
       : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase),
         PSL(PSL) {};
-  void runOnModule(FunctionMap &SymbolMap);
+  void runOnModule(
+      HashKeyMap<std::unordered_map, FunctionId, Function *> &SymbolMap);
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
     // will be used for sample loader.
     FuncCallsiteMatchStates.clear();
+    FlattenedProfiles.clear();
   }
 
 private:
@@ -188,27 +203,25 @@ class SampleProfileMatcher {
                                LocToLocMap &IRToProfileLocationMap);
   /// Find the existing or new matched function using the profile name.
   ///
-  /// \returns The function and a match state.
-  std::pair<Function *, MatchState>
-  findFunction(const FunctionId &ProfFunc,
-               const FunctionMap &OldProfToNewSymbolMap) const;
-  /// Find the function using the profile name. If the function is not found but
-  /// the \p NewIRCallees is provided, try to match the function profile with
-  /// all functions in \p NewIRCallees and return the matched function.
-  ///
-  /// \param ProfFunc The function profile name.
-  /// \param OldProfToNewSymbolMap The map from old profile name to new symbol.
-  /// \param NewIRCallees The new candidate callees in the same scope to match.
+  /// \returns The function pointer.
+  Function *findFuncByProfileName(const FunctionId &ProfileName) const;
+  /// Match the callee profile with the IR function. If the profile callee is
+  /// found in the SymbolMap, which means it's an original matched symbol, skip
+  /// the matching. Otherwise match the callee profile with all functions in
+  /// \p NewIRFuncsToMatch and save the match result into \p MatchResult.
   ///
-  /// \returns The matched function and a match state.
-  std::pair<Function *, MatchState>
-  findOrMatchFunction(const FunctionId &ProfFunc,
-                      FunctionMap &OldProfToNewSymbolMap,
-                      const std::vector<Function *> &NewIRCallees);
-  std::vector<FunctionSamples *> sortFuncProfiles(SampleProfileMap &ProfileMap);
-  void findNewIRCallees(Function &Caller,
-                        const StringMap<Function *> &NewIRFunctions,
-                        std::vector<Function *> &NewIRCallees);
+  /// \param Caller The caller function.
+  /// \param ProfileCalleeName The profile callee name.
+  /// \param IRCalleesToMatch The new candidate IR callees in the same scope to
+  /// match.
+  /// \param MatchResult The matched result.
+  void matchCalleeProfile(
+      const FunctionId &Caller, const FunctionId &ProfileCalleeName,
+      const std::vector<Function *> *IRCalleesToMatch,
+      std::vector<std::pair<FunctionId, FunctionId>> &MatchResult);
+  std::vector<Function *> *
+  findNewIRCallees(Function &Caller,
+                   const StringMap<Function *> &NewIRFunctions);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
   /// Determine if the function matches profile by computing a similarity ratio
@@ -216,17 +229,19 @@ class SampleProfileMatcher {
   /// above the threshold, the function matches the profile.
   ///
   /// \returns True if the function matches profile.
-  bool functionMatchesProfile(const Function &IRFunc,
-                              const FunctionId &ProfFunc);
+  bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc);
   void matchProfileForNewFunctions(const StringMap<Function *> &NewIRFunctions,
-                                   FunctionSamples &FS,
-                                   FunctionMap &OldProfToNewSymbolMap);
+                                   FunctionSamples &FS);
   /// Find functions that don't show in the profile or profile symbol list,
   /// which are supposed to be new functions. We use them as the targets for
   /// renaming matching.
   ///
   /// \param NewIRFunctions The map from function name to the IR function.
   void findNewIRFunctions(StringMap<Function *> &NewIRFunctions);
+  void clearCacheData() {
+    FunctionProfileNameMap.clear();
+    FuncToNewCalleesMap.clear();
+  }
   void runCallGraphMatching();
   void reportOrPersistProfileStats();
 };
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 9875b7f6a2295..3b349cc461bce 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -20,22 +20,23 @@ using namespace sampleprof;
 
 #define DEBUG_TYPE "sample-profile-matcher"
 
-static cl::opt<bool> SalvageRenamedProfile(
-    "salvage-renamed-profile", cl::Hidden, cl::init(false),
-    cl::desc("Salvage renamed profile by function renaming matching."));
+static cl::opt<bool> SalvageUnusedProfile(
+    "salvage-unused-profile", cl::Hidden, cl::init(false),
+    cl::desc(
+        "Salvage unused profile by matching new functions on call graph."));
 
-static cl::opt<unsigned> RenamedFuncSimilarityThreshold(
-    "renamed-func-similarity-threshold", cl::Hidden, cl::init(80),
+static cl::opt<unsigned> FuncProfileSimilarityThreshold(
+    "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
     cl::desc("The profile matches the function if their similarity is above "
              "the given number(percentage)."));
 
-static cl::opt<unsigned> MinBBForCGMatching(
-    "min-bb-for-cg-matching", cl::Hidden, cl::init(5),
+static cl::opt<unsigned> MinFuncCountForCGMatching(
+    "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
     cl::desc("The minimum number of basic blocks required for a function to "
              "run stale profile call graph matching."));
 
-static cl::opt<unsigned> MinCallAnchorForCGMatching(
-    "min-call-for-cg-matching", cl::Hidden, cl::init(3),
+static cl::opt<unsigned> MinCallCountForCGMatching(
+    "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
     cl::desc("The minimum number of call anchors required for a function to "
              "run stale profile call graph matching."));
 
@@ -656,10 +657,15 @@ void SampleProfileMatcher::findNewIRFunctions(
   }
 }
 
-void SampleProfileMatcher::findNewIRCallees(
-    Function &Caller, const StringMap<Function *> &NewIRFunctions,
-    std::vector<Function *> &NewIRCallees) {
-  for (auto &BB : Caller) {
+std::vector<Function *> *SampleProfileMatcher::findNewIRCallees(
+    Function &Func, const StringMap<Function *> &NewIRFunctions) {
+  auto R = FuncToNewCalleesMap.try_emplace(&Func, std::vector<Function *>());
+  std::vector<Function *> &IRCalleesToMatch = R.first->second;
+  // Skip the lookup if it's in the cache.
+  if (!R.second)
+    return &IRCalleesToMatch;
+
+  for (auto &BB : Func) {
     for (auto &I : BB) {
       const auto *CB = dyn_cast<CallBase>(&I);
       if (!CB || isa<IntrinsicInst>(&I))
@@ -670,42 +676,23 @@ void SampleProfileMatcher::findNewIRCallees(
       StringRef CalleeName =
           FunctionSamples::getCanonicalFnName(Callee->getName());
       if (NewIRFunctions.count(CalleeName))
-        NewIRCallees.push_back(Callee);
+        IRCalleesToMatch.push_back(Callee);
     }
   }
+  return &IRCalleesToMatch;
 }
 
-std::pair<Function *, SampleProfileMatcher::MatchState>
-SampleProfileMatcher::findFunction(
-    const FunctionId &ProfFunc,
-    const FunctionMap &OldProfToNewSymbolMap) const {
-  auto F = SymbolMap->find(ProfFunc);
+Function *SampleProfileMatcher::findFuncByProfileName(
+    const FunctionId &ProfileName) const {
+  auto F = SymbolMap->find(ProfileName);
   if (F != SymbolMap->end())
-    return {F->second, MatchState::InitialMatch};
-
-  // Existing matched function is found.
-  auto NewF = OldProfToNewSymbolMap.find(ProfFunc);
-  if (NewF != OldProfToNewSymbolMap.end())
-    return {NewF->second, MatchState::RecoveredMismatch};
-  return {nullptr, MatchState::Unknown};
-}
+    return F->second;
 
-std::pair<Function *, SampleProfileMatcher::MatchState>
-SampleProfileMatcher::findOrMatchFunction(
-    const FunctionId &ProfFunc, FunctionMap &OldProfToNewSymbolMap,
-    const std::vector<Function *> &NewIRCallees) {
-  auto R = findFunction(ProfFunc, OldProfToNewSymbolMap);
-  // We need to check the match state instead of nullptr function because the
-  // returned function can be nullptr even if it's found in the symbol map.
-  if (R.second != MatchState::Unknown)
-    return R;
-
-  for (auto *IRCallee : NewIRCallees)
-    if (functionMatchesProfile(*IRCallee, ProfFunc)) {
-      OldProfToNewSymbolMap[ProfFunc] = IRCallee;
-      return {IRCallee, MatchState::RecoveredMismatch};
-    }
-  return {nullptr, MatchState::Unknown};
+  // Find in new matched function map.
+  auto NewF = ProfileNameToFuncMap.find(ProfileName);
+  if (NewF != ProfileNameToFuncMap.end())
+    return NewF->second;
+  return nullptr;
 }
 
 bool SampleProfileMatcher::functionMatchesProfileHelper(
@@ -719,8 +706,8 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   // Similarity check may not be reliable if the function is tiny, we use the
   // number of basic block as a proxy for the function complexity and skip the
   // matching if it's too small.
-  if (IRFunc.size() < MinBBForCGMatching ||
-      FSFlattened->getBodySamples().size() < MinBBForCGMatching)
+  if (IRFunc.size() < MinFuncCountForCGMatching ||
+      FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
     return false;
 
   // For probe-based function, we first trust the checksum info. If the checksum
@@ -747,8 +734,8 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
                         FilteredProfileAnchorList);
 
   // Similarly skip the matching if the num of anchors is not enough.
-  if (FilteredIRAnchorsList.size() < MinCallAnchorForCGMatching ||
-      FilteredProfileAnchorList.size() < MinCallAnchorForCGMatching)
+  if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
+      FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
     return false;
 
   // Use the diff algorithm to find the LCS between IR and profile.
@@ -764,10 +751,10 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
                     << format("%.2f", Similarity) << "\n");
   assert((Similarity >= 0 && Similarity <= 1.0) &&
          "Similarity value should be in [0, 1]");
-  return Similarity * 100 > RenamedFuncSimilarityThreshold;
+  return Similarity * 100 > FuncProfileSimilarityThreshold;
 }
 
-bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
+bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
                                                   const FunctionId &ProfFunc) {
   auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
   if (R != FunctionProfileNameMap.end())
@@ -775,48 +762,64 @@ bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
 
   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
   FunctionProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
+  ProfileNameToFuncMap[ProfFunc] = &IRFunc;
   return Matched;
 }
 
-// Match profile for new function on the profiled call-graph edge to limit the
-// matching scope.
+void SampleProfileMatcher::matchCalleeProfile(
+    const FunctionId &Caller, const FunctionId &ProfileCalleeName,
+    const std::vector<Function *> *IRCalleesToMatch,
+    std::vector<std::pair<FunctionId, FunctionId>> &MatchResult) {
+  if (!IRCalleesToMatch)
+    return;
+  // Check whether this is an existing function matching the profile, we only
+  // run the matching when the callee profile is unused.
+  auto F = SymbolMap->find(ProfileCalleeName);
+  if (F != SymbolMap->end())
+    return;
+
+  for (auto *IRFuncCandidate : *IRCalleesToMatch)
+    if (functionMatchesProfile(*IRFuncCandidate, ProfileCalleeName)) {
+      FunctionId IRCalleeName(IRFuncCandidate->getName());
+      assert(IRCalleeName != ProfileCalleeName &&
+             "New callee symbol is not a new function");
+      LLVM_DEBUG(dbgs() << "In function " << Caller
+                        << ", changing profile name from " << ProfileCalleeName
+                        << " to " << IRCalleeName << "\n");
+      MatchResult.emplace_back(IRCalleeName, ProfileCalleeName);
+      return;
+    }
+}
+
+// Traverse the profiled call-graph recursively to run the matching.
 void SampleProfileMatcher::matchProfileForNewFunctions(
-    const StringMap<Function *> &NewIRFunctions, FunctionSamples &CallerFS,
-    FunctionMap &OldProfToNewSymbolMap) {
-  // Find the new candidate callees from IR in the current caller scope.
-  std::vector<Function *> NewIRCallees;
-  if (auto *IRCaller =
-          findFunction(CallerFS.getFunction(), OldProfToNewSymbolMap).first) {
+    const StringMap<Function *> &NewIRFunctions, FunctionSamples &FuncProfile) {
+  // Find the new candidate IR callees in the current caller scope.
+  std::vector<Function *> *IRCalleesToMatch = nullptr;
+  if (auto *IRCaller = findFuncByProfileName(FuncProfile.getFunction())) {
     // No callees for external function, skip the rename matching.
     if (IRCaller->isDeclaration())
       return;
-    findNewIRCallees(*IRCaller, NewIRFunctions, NewIRCallees);
+    IRCalleesToMatch = findNewIRCallees(*IRCaller, NewIRFunctions);
   }
+  // Don't return here when IRCalleesToMatch is nullptr or empty, this is
+  // because even if there is no matching in the current scope, there could be
+  // matching in deeper callee scope/edge, so we need to keep traversing the
+  // call-graph. For IRCalleesToMatch is nullptr or empty case, later the
+  // matching function(matchCalleeProfile) will handle this to make it non-op.
 
   // Match non-inline callees.
-  for (auto &BS : const_cast<BodySampleMap &>(CallerFS.getBodySamples())) {
-    if (NewIRCallees.empty())
-      break;
+  for (auto &BS : const_cast<BodySampleMap &>(FuncProfile.getBodySamples())) {
     // New function to old function pairs used to update the CallTargetMap.
-    std::vector<std::pair<FunctionId, FunctionId>> CallTargetsToUpdate;
+    std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
     SampleRecord::CallTargetMap &CTM =
         const_cast<SampleRecord::CallTargetMap &>(BS.second.getCallTargets());
-    for (const auto &TS : CTM) {
-      const FunctionId &ProfCallee = TS.first;
-      auto MatchRes =
-          findOrMatchFunction(ProfCallee, OldProfToNewSymbolMap, NewIRCallees);
-      if (MatchRes.second != MatchState::RecoveredMismatch)
-        continue;
-      FunctionId NewIRCalleeName(MatchRes.first->getName());
-      assert(NewIRCalleeName != ProfCallee &&
-             "New callee symbol is not a new function");
-      LLVM_DEBUG(dbgs() << "In function " << CallerFS.getFunction()
-                        << ", changing profile name from " << ProfCallee
-                        << " to " << NewIRCalleeName << "\n");
-      CallTargetsToUpdate.emplace_back(NewIRCalleeName, ProfCallee);
-    }
+    for (const auto &TS : CTM)
+      matchCalleeProfile(FuncProfile.getFunction(), TS.first, IRCalleesToMatch,
+                         MatchResult);
 
-    for (const auto &P : CallTargetsToUpdate) {
+    // Update the CallTargetMap.
+    for (const auto &P : MatchResult) {
       CTM[P.first] = CTM[P.second];
       CTM.erase(P.second);
     }
@@ -824,61 +827,48 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
 
   // Match inline callees.
   for (auto &CM :
-       const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
+       const_cast<CallsiteSampleMap &>(FuncProfile.getCallsiteSamples())) {
     auto &CalleeMap = CM.second;
-    // New function to old FunctionSamples pairs used to update the
-    // CallsiteSampleMap.
-    std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
+    // New function to old function pairs used to update the CallsiteSampleMap.
+    std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
     for (auto &CS : CalleeMap) {
-      FunctionSamples &CalleeFS = CS.second;
-      FunctionId ProfCallee = CalleeFS.getFunction();
-      auto MatchRes =
-          findOrMatchFunction(ProfCallee, OldProfToNewSymbolMap, NewIRCallees);
-      if (MatchRes.second == MatchState::RecoveredMismatch) {
-        FunctionId NewIRCalleeName(MatchRes.first->getName());
-        assert(NewIRCalleeName != ProfCallee &&
-               "New callee symbol is not a new function");
-        LLVM_DEBUG(dbgs() << "In function " << CallerFS.getFunction()
-                          << ", changing profile name from " << ProfCallee
-                          << " to " << NewIRCalleeName << "\n");
-        FSamplesToUpdate.emplace_back(NewIRCalleeName, &CalleeFS);
-      }
-      // Note that even there is no renaming in the current scope, there could
-      // be renaming in deeper callee scope, we need to traverse all the callee
-      // profiles.
-      matchProfileForNewFunctions(NewIRFunctions, CalleeFS,
-                                  OldProfToNewSymbolMap);
+      FunctionSamples &CalleeProfile = CS.second;
+      matchCalleeProfile(FuncProfile.getFunction(), CalleeProfile.getFunction(),
+                         IRCalleesToMatch, MatchResult);
+
+      // Traverse all the inlined callee profiles.
+      matchProfileForNewFunctions(NewIRFunctions, CalleeProfile);
     }
 
     // Update the CalleeMap using the new name and remove the old entry.
-    for (auto &P : FSamplesToUpdate) {
-      const FunctionId &OldFunction = P.second->getFunction();
-      assert(P.first != OldFunction &&
+    for (auto &P : MatchResult) {
+      assert(P.first != P.second &&
              "Renamed function name should be different from the old map key");
-      P.second->setFunction(P.first);
-      CalleeMap[P.first] = *P.second;
-      CalleeMap.erase(OldFunction);
+      FunctionSamples &FS = CalleeMap[P.second];
+      FS.setFunction(P.first);
+      CalleeMap[P.first] = FS;
+      CalleeMap.erase(P.second);
     }
   }
 }
 
-std::vector<FunctionSamples *>
-SampleProfileMatcher::sortFuncProfiles(SampleProfileMap &ProfileMap) {
-  std::vector<FunctionSamples *> SortedProfiles;
-  for (auto &I : ProfileMap)
-    SortedProfiles.push_back(&I.second);
-
-  llvm::stable_sort(SortedProfiles,
-                    [](const FunctionSamples *A, const FunctionSamples *B) {
-                      if (A->getTotalSamples() == B->getTotalSamples())
-                        return A->getContext() < B->getContext();
-                      return A->getTotalSamples() > B->getTotalSamples();
-                    });
-  return SortedProfiles;
-}
-
+// Match the unused profile with new IR functions on the profiled call-graph.
+// The high-level steps for the algorithm:
+// 1) Find all the new functions that show only in the IR, use them as the
+// matching candidates to compute new callees.
+//
+// 2) Traverse all the nodes in the profiled call-graph.
+// For each function caller scope:
+//  a) Find a set of callees in the IR that doesn't exist in the profile. See
+//  findNewIRCallees.
+//  b) Find a set of callees in the profile that doesn't exist
+//  in the IR. See matchCalleeProfile.
+//  c) Match the callee pairs between a and b. Compute a similarity ratio
+//  between the pair, it's considered match if the similarity is above a given
+//  threshold. See MatchProfileForNewFunctions.
+//  d) Update the profile with the matched name in-place.
 void SampleProfileMatcher::runCallGraphMatching() {
-  if (!SalvageRenamedProfile)
+  if (!SalvageUnusedProfile)
     return;
   assert(SymbolMap && "SymbolMap is null");
   assert(FunctionProfileNameMap.empty() &&
@@ -889,29 +879,29 @@ void SampleProfileMatcher::runCallGraphMatching() {
   if (NewIRFunctions.empty())
     return;
 
-  // The new functions found by the renaming matching. Save them into a map
-  // whose key is the old(profile) function name and value is the new(renamed)
-  // function.
-  FunctionMap OldProfToNewSymbolMap;
   // Sort the profiles to make the matching order deterministic.
-  for (auto *P : sortFuncProfiles(Reader.getProfiles()))
-    matchProfileForNewFunctions(NewIRFunctions, *P, OldProfToNewSymbolMap);
-
-  FunctionProfileNameMap.clear();
-  if (OldProfToNewSymbolMap.empty())
+  std::vector<NameFunctionSamples> SortedProfiles;
+  ::llvm::sortFuncProfiles(Reader.getProfiles(), SortedProfiles);
+  for (auto &P : SortedProfiles)
+    matchProfileForNewFunctions(NewIRFunctions,
+                                *const_cast<FunctionSamples *>(P.second));
+
+  clearCacheData();
+  if (ProfileNameToFuncMap.empty())
     return;
   // Update all the data generated by the old profile.
   // Add the new function to the SymbolMap, which will be used in
   // SampleLoader.
-  for (auto &I : OldProfToNewSymbolMap) {
+  for (auto &I : ProfileNameToFuncMap) {
     assert(I.second && "New function is null");
     SymbolMap->emplace(FunctionId(I.second->getName()), I.second);
   }
 
-  // Re-flatten the profiles after the renaming.
+  // Re-flatten the profiles after the matching.
   FlattenedProfiles.clear();
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
+  ProfileNameToFuncMap.clear();
 }
 
 void SampleProfileMatcher::runOnFunction(Function &F) {
@@ -920,7 +910,8 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   runCFGMatching(F);
 }
 
-void SampleProfileMatcher::runOnModule(FunctionMap &SymMap) {
+void SampleProfileMatcher::runOnModule(
+    HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap) {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
   SymbolMap = &SymMap;
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index ef50ba024af3d..1f3cc588991d6 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-renamed-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-for-cg-matching=0 --min-bb-for-cg-matching=0 2>&1 | FileCheck %s
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-renamed-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-for-cg-matching=10 --min-bb-for-cg-matching=10 2>&1 | FileCheck %s  --check-prefix=TINY-FUNC
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-count-for-cg-matching=10 --min-func-count-for-cg-matching=10 2>&1 | FileCheck %s  --check-prefix=TINY-FUNC
 
 
 ; CHECK: Function new_block_only is not in profile or symbol list table.

>From c61ee03b516986a4bdb9ffb7202f4bfeb86f04cc Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 3 Jun 2024 18:37:39 -0700
Subject: [PATCH 09/22] fix comment

---
 llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h | 2 +-
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index fa66890294945..a2cf62666ba40 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -34,7 +34,7 @@ class SampleProfileMatcher {
   // in the profile.
   StringMap<LocToLocMap> FuncMappings;
 
-  // Match state for an anchor/callsite or function.
+  // Match state for an anchor/callsite.
   enum class MatchState {
     Unknown = 0,
     // Initial match between input profile and current IR.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 3b349cc461bce..8b4cdef996c76 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -630,8 +630,8 @@ void SampleProfileMatcher::findNewIRFunctions(
   }
 
   for (auto &F : M) {
-    // Skip declarations, as even if the function can be recognized renamed, we
-    // have nothing to do with it.
+    // Skip declarations, as even if the function can be matched, we have
+    // nothing to do with it.
     if (F.isDeclaration())
       continue;
 
@@ -797,7 +797,7 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
   // Find the new candidate IR callees in the current caller scope.
   std::vector<Function *> *IRCalleesToMatch = nullptr;
   if (auto *IRCaller = findFuncByProfileName(FuncProfile.getFunction())) {
-    // No callees for external function, skip the rename matching.
+    // No callees for external function, skip the call graph matching.
     if (IRCaller->isDeclaration())
       return;
     IRCalleesToMatch = findNewIRCallees(*IRCaller, NewIRFunctions);

>From 81811c35436ff527f35b212bf5f504cb101cf344 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 9 Jun 2024 14:35:31 -0700
Subject: [PATCH 10/22] run matching in top-down order and along with CFG
 matching

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  77 ++---
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |   8 +-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 295 ++++++++----------
 .../non-probe-stale-profile-matching.ll       |  12 +-
 ...pseudo-probe-stale-profile-matching-LCS.ll |  22 +-
 .../pseudo-probe-stale-profile-renaming.ll    |  30 +-
 6 files changed, 199 insertions(+), 245 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index a2cf62666ba40..6016f3f9d7ac4 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -83,6 +83,8 @@ class SampleProfileMatcher {
   // candidate callees.
   std::unordered_map<Function *, std::vector<Function *>> FuncToNewCalleesMap;
 
+  HashKeyMap<std::unordered_map, FunctionId, Function *> NewIRFunctions;
+
   // Pointer to the Profile Symbol List in the reader.
   std::shared_ptr<ProfileSymbolList> PSL;
 
@@ -106,14 +108,14 @@ class SampleProfileMatcher {
       "unknown.indirect.callee";
 
 public:
-  SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
-                       const PseudoProbeManager *ProbeManager,
-                       ThinOrFullLTOPhase LTOPhase,
-                       std::shared_ptr<ProfileSymbolList> PSL)
+  SampleProfileMatcher(
+      Module &M, SampleProfileReader &Reader,
+      const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase,
+      HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
+      std::shared_ptr<ProfileSymbolList> PSL)
       : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase),
-        PSL(PSL) {};
-  void runOnModule(
-      HashKeyMap<std::unordered_map, FunctionId, Function *> &SymbolMap);
+        SymbolMap(&SymMap), PSL(PSL){};
+  void runOnModule(std::vector<Function *> &OrderedFuncList);
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
     // will be used for sample loader.
@@ -182,6 +184,11 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
+  // Check if the two functions are equal. If MatchUnusedFunction is set and the
+  // two functions are both new, try to match the two functions.
+  bool isFunctionEqual(const FunctionId &IRFuncName,
+                       const FunctionId &ProfileFuncName,
+                       bool MatchUnusedFunction);
   // This function implements the Myers diff algorithm used for stale profile
   // matching. The algorithm provides a simple and efficient way to find the
   // Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
@@ -192,55 +199,35 @@ class SampleProfileMatcher {
   // parts from the resulting SES are used to remap the IR locations to the
   // profile locations. As the number of function callsite is usually not big,
   // we currently just implements the basic greedy version(page 6 of the paper).
-  LocToLocMap
-  longestCommonSequence(const AnchorList &IRCallsiteAnchors,
-                        const AnchorList &ProfileCallsiteAnchors) const;
+  LocToLocMap longestCommonSequence(const AnchorList &IRCallsiteAnchors,
+                                    const AnchorList &ProfileCallsiteAnchors,
+                                    bool MatchUnusedFunction);
   void matchNonCallsiteLocs(const LocToLocMap &AnchorMatchings,
                             const AnchorMap &IRAnchors,
                             LocToLocMap &IRToProfileLocationMap);
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
-                               LocToLocMap &IRToProfileLocationMap);
-  /// Find the existing or new matched function using the profile name.
-  ///
-  /// \returns The function pointer.
-  Function *findFuncByProfileName(const FunctionId &ProfileName) const;
-  /// Match the callee profile with the IR function. If the profile callee is
-  /// found in the SymbolMap, which means it's an original matched symbol, skip
-  /// the matching. Otherwise match the callee profile with all functions in
-  /// \p NewIRFuncsToMatch and save the match result into \p MatchResult.
-  ///
-  /// \param Caller The caller function.
-  /// \param ProfileCalleeName The profile callee name.
-  /// \param IRCalleesToMatch The new candidate IR callees in the same scope to
-  /// match.
-  /// \param MatchResult The matched result.
-  void matchCalleeProfile(
-      const FunctionId &Caller, const FunctionId &ProfileCalleeName,
-      const std::vector<Function *> *IRCalleesToMatch,
-      std::vector<std::pair<FunctionId, FunctionId>> &MatchResult);
-  std::vector<Function *> *
-  findNewIRCallees(Function &Caller,
-                   const StringMap<Function *> &NewIRFunctions);
+                               LocToLocMap &IRToProfileLocationMap,
+                               bool RunCFGMatching, bool RunCGMatching);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
-  /// Determine if the function matches profile by computing a similarity ratio
-  /// between two callsite anchors extracted from function and profile. If it's
-  /// above the threshold, the function matches the profile.
-  ///
-  /// \returns True if the function matches profile.
-  bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc);
+  // Determine if the function matches profile by computing a similarity ratio
+  // between two callsite anchors extracted from function and profile. If it's
+  // above the threshold, the function matches the profile.
+  bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc,
+                              bool FindOnly);
   void matchProfileForNewFunctions(const StringMap<Function *> &NewIRFunctions,
                                    FunctionSamples &FS);
-  /// Find functions that don't show in the profile or profile symbol list,
-  /// which are supposed to be new functions. We use them as the targets for
-  /// renaming matching.
-  ///
-  /// \param NewIRFunctions The map from function name to the IR function.
-  void findNewIRFunctions(StringMap<Function *> &NewIRFunctions);
+  // Find functions that don't show in the profile or profile symbol list,
+  // which are supposed to be new functions. We use them as the targets for
+  // renaming matching.
+  void findNewIRFunctions();
+  void updateProfillesAndSymbolMap();
+  void updateProfileWithNewName(FunctionSamples &FuncProfile);
+
   void clearCacheData() {
     FunctionProfileNameMap.clear();
-    FuncToNewCalleesMap.clear();
+    ProfileNameToFuncMap.clear();
   }
   void runCallGraphMatching();
   void reportOrPersistProfileStats();
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6195ae049c75e..9be215591c1a5 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -2077,7 +2077,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
     MatchingManager = std::make_unique<SampleProfileMatcher>(
-        M, *Reader, ProbeManager.get(), LTOPhase, PSL);
+        M, *Reader, ProbeManager.get(), LTOPhase, SymbolMap, PSL);
   }
 
   return true;
@@ -2196,14 +2196,16 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
   assert(SymbolMap.count(FunctionId()) == 0 &&
          "No empty StringRef should be added in SymbolMap");
 
+  std::vector<Function *> OrderedFuncList = buildFunctionOrder(M, CG);
+
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
-    MatchingManager->runOnModule(SymbolMap);
+    MatchingManager->runOnModule(OrderedFuncList);
     MatchingManager->clearMatchingData();
   }
 
   bool retval = false;
-  for (auto *F : buildFunctionOrder(M, CG)) {
+  for (auto *F : OrderedFuncList) {
     assert(!F->isDeclaration());
     clearFunctionData();
     retval |= runOnFunction(*F, AM);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 8b4cdef996c76..13f22bcecb0e8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -147,8 +147,37 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
   }
 }
 
-LocToLocMap SampleProfileMatcher::longestCommonSequence(
-    const AnchorList &AnchorList1, const AnchorList &AnchorList2) const {
+bool SampleProfileMatcher::isFunctionEqual(const FunctionId &IRFuncName,
+                                           const FunctionId &ProfileFuncName,
+                                           bool FindMatchedProfileOnly) {
+  if (IRFuncName == ProfileFuncName)
+    return true;
+  if (!SalvageUnusedProfile)
+    return false;
+  // If both IR function and profile function are new, try to match the profile
+  // function.
+
+  // Check whether IR function is new.
+  auto R = NewIRFunctions.find(IRFuncName);
+  if (R == NewIRFunctions.end() || !R->second)
+    return false;
+  Function &IRFunc = *R->second;
+  assert(FunctionId(IRFunc.getName()) != ProfileFuncName &&
+         "IR function should be different from profile function to match");
+
+  // Check whether profile function is new.
+  auto F = SymbolMap->find(ProfileFuncName);
+  if (F != SymbolMap->end())
+    return false;
+
+  return functionMatchesProfile(IRFunc, ProfileFuncName,
+                                FindMatchedProfileOnly);
+}
+
+LocToLocMap
+SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
+                                            const AnchorList &AnchorList2,
+                                            bool MatchUnusedFunction) {
   int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
           MaxDepth = Size1 + Size2;
   auto Index = [&](int32_t I) { return I + MaxDepth; };
@@ -209,7 +238,8 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence(
         X = V[Index(K - 1)] + 1;
       Y = X - K;
       while (X < Size1 && Y < Size2 &&
-             AnchorList1[X].second == AnchorList2[Y].second)
+             isFunctionEqual(AnchorList1[X].second, AnchorList2[Y].second,
+                             !MatchUnusedFunction))
         X++, Y++;
 
       V[Index(K)] = X;
@@ -314,7 +344,10 @@ void SampleProfileMatcher::getFilteredAnchorList(
 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
 void SampleProfileMatcher::runStaleProfileMatching(
     const Function &F, const AnchorMap &IRAnchors,
-    const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap) {
+    const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
+    bool RunCFGMatching, bool RunCGMatching) {
+  if (!RunCFGMatching && !RunCGMatching)
+    return;
   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
                     << "\n");
   assert(IRToProfileLocationMap.empty() &&
@@ -331,15 +364,22 @@ void SampleProfileMatcher::runStaleProfileMatching(
   // Match the callsite anchors by finding the longest common subsequence
   // between IR and profile. Note that we need to use IR anchor as base(A side)
   // to align with the order of IRToProfileLocationMap.
-  LocToLocMap MatchedAnchors =
-      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
-
-  // Match the non-callsite locations and write the result to
-  // IRToProfileLocationMap.
-  matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
+  // This is also used for call graph matching. During the searching, if both
+  // the anchor from IR and prfile are new functions but they are not same
+  // function, it could be due to the function is renamed, we run the matching
+  // for them and consider the anchor is equal if it's matched. The matching
+  // result is also used later to update the profile with new name.
+  LocToLocMap MatchedAnchors = longestCommonSequence(
+      FilteredIRAnchorsList, FilteredProfileAnchorList, RunCGMatching);
+
+  // Apply the CFG matching results: match the non-callsite locations and write
+  // the result to IRToProfileLocationMap. Note that CFG matching won't take
+  // effect if it's not written into IRToProfileLocationMap.
+  if (RunCFGMatching)
+    matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
 }
 
-void SampleProfileMatcher::runCFGMatching(Function &F) {
+void SampleProfileMatcher::runOnFunction(Function &F) {
   // We need to use flattened function samples for matching.
   // Unlike IR, which includes all callsites from the source code, the callsites
   // in profile only show up when they are hit by samples, i,e. the profile
@@ -364,25 +404,29 @@ void SampleProfileMatcher::runCFGMatching(Function &F) {
   if (ReportProfileStaleness || PersistProfileStaleness)
     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
 
-  // For probe-based profiles, run matching only when the current profile is not
-  // valid.
-  if (SalvageStaleProfile && (!FunctionSamples::ProfileIsProbeBased ||
-                              !ProbeManager->profileIsValid(F, *FSFlattened))) {
+  if (SalvageStaleProfile) {
+    // For probe-based profiles, run matching only when profile checksum is
+    // mismatched.
+    bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
+                            !ProbeManager->profileIsValid(F, *FSFlattened);
+    bool RunCFGMatching =
+        !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
+    bool RunCGMatching = SalvageUnusedProfile;
     // For imported functions, the checksum metadata(pseudo_probe_desc) are
     // dropped, so we leverage function attribute(profile-checksum-mismatch) to
     // transfer the info: add the attribute during pre-link phase and check it
     // during post-link phase(see "profileIsValid").
-    if (FunctionSamples::ProfileIsProbeBased &&
-        LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
+    if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
       F.addFnAttr("profile-checksum-mismatch");
 
     // The matching result will be saved to IRToProfileLocationMap, create a
     // new map for each function.
     auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
     runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
-                            IRToProfileLocationMap);
+                            IRToProfileLocationMap, RunCFGMatching,
+                            RunCGMatching);
     // Find and update callsite match states after matching.
-    if (ReportProfileStaleness || PersistProfileStaleness)
+    if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
       recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
                                 &IRToProfileLocationMap);
   }
@@ -618,8 +662,7 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   }
 }
 
-void SampleProfileMatcher::findNewIRFunctions(
-    StringMap<Function *> &NewIRFunctions) {
+void SampleProfileMatcher::findNewIRFunctions() {
   // TODO: Support MD5 profile.
   if (FunctionSamples::UseMD5)
     return;
@@ -653,46 +696,8 @@ void SampleProfileMatcher::findNewIRFunctions(
 
     LLVM_DEBUG(dbgs() << "Function " << CanonFName
                       << " is not in profile or symbol list table.\n");
-    NewIRFunctions[CanonFName] = &F;
-  }
-}
-
-std::vector<Function *> *SampleProfileMatcher::findNewIRCallees(
-    Function &Func, const StringMap<Function *> &NewIRFunctions) {
-  auto R = FuncToNewCalleesMap.try_emplace(&Func, std::vector<Function *>());
-  std::vector<Function *> &IRCalleesToMatch = R.first->second;
-  // Skip the lookup if it's in the cache.
-  if (!R.second)
-    return &IRCalleesToMatch;
-
-  for (auto &BB : Func) {
-    for (auto &I : BB) {
-      const auto *CB = dyn_cast<CallBase>(&I);
-      if (!CB || isa<IntrinsicInst>(&I))
-        continue;
-      Function *Callee = CB->getCalledFunction();
-      if (!Callee || Callee->isDeclaration())
-        continue;
-      StringRef CalleeName =
-          FunctionSamples::getCanonicalFnName(Callee->getName());
-      if (NewIRFunctions.count(CalleeName))
-        IRCalleesToMatch.push_back(Callee);
-    }
+    NewIRFunctions[FunctionId(CanonFName)] = &F;
   }
-  return &IRCalleesToMatch;
-}
-
-Function *SampleProfileMatcher::findFuncByProfileName(
-    const FunctionId &ProfileName) const {
-  auto F = SymbolMap->find(ProfileName);
-  if (F != SymbolMap->end())
-    return F->second;
-
-  // Find in new matched function map.
-  auto NewF = ProfileNameToFuncMap.find(ProfileName);
-  if (NewF != ProfileNameToFuncMap.end())
-    return NewF->second;
-  return nullptr;
 }
 
 bool SampleProfileMatcher::functionMatchesProfileHelper(
@@ -739,8 +744,12 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
     return false;
 
   // Use the diff algorithm to find the LCS between IR and profile.
-  LocToLocMap MatchedAnchors =
-      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
+
+  // Don't recursively match the callee function to avoid infinite matching,
+  // callee functions should be handled later since it's processed in top-down
+  // order .
+  LocToLocMap MatchedAnchors = longestCommonSequence(
+      FilteredIRAnchorsList, FilteredProfileAnchorList, false);
 
   Similarity =
       static_cast<float>(MatchedAnchors.size()) * 2 /
@@ -754,70 +763,55 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   return Similarity * 100 > FuncProfileSimilarityThreshold;
 }
 
+// If FindMatchedProfileOnly is set to true, only use the processed function
+// results. This is used for skipping the repeated recursive matching.
 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
-                                                  const FunctionId &ProfFunc) {
+                                                  const FunctionId &ProfFunc,
+                                                  bool FindMatchedProfileOnly) {
   auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
   if (R != FunctionProfileNameMap.end())
     return R->second;
 
+  if (FindMatchedProfileOnly)
+    return false;
+
   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
   FunctionProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
-  ProfileNameToFuncMap[ProfFunc] = &IRFunc;
-  return Matched;
-}
-
-void SampleProfileMatcher::matchCalleeProfile(
-    const FunctionId &Caller, const FunctionId &ProfileCalleeName,
-    const std::vector<Function *> *IRCalleesToMatch,
-    std::vector<std::pair<FunctionId, FunctionId>> &MatchResult) {
-  if (!IRCalleesToMatch)
-    return;
-  // Check whether this is an existing function matching the profile, we only
-  // run the matching when the callee profile is unused.
-  auto F = SymbolMap->find(ProfileCalleeName);
-  if (F != SymbolMap->end())
-    return;
+  if (Matched) {
+    ProfileNameToFuncMap[ProfFunc] = &IRFunc;
+    LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
+                      << " matches profile:" << ProfFunc << "\n");
+  }
 
-  for (auto *IRFuncCandidate : *IRCalleesToMatch)
-    if (functionMatchesProfile(*IRFuncCandidate, ProfileCalleeName)) {
-      FunctionId IRCalleeName(IRFuncCandidate->getName());
-      assert(IRCalleeName != ProfileCalleeName &&
-             "New callee symbol is not a new function");
-      LLVM_DEBUG(dbgs() << "In function " << Caller
-                        << ", changing profile name from " << ProfileCalleeName
-                        << " to " << IRCalleeName << "\n");
-      MatchResult.emplace_back(IRCalleeName, ProfileCalleeName);
-      return;
-    }
+  return Matched;
 }
 
-// Traverse the profiled call-graph recursively to run the matching.
-void SampleProfileMatcher::matchProfileForNewFunctions(
-    const StringMap<Function *> &NewIRFunctions, FunctionSamples &FuncProfile) {
-  // Find the new candidate IR callees in the current caller scope.
-  std::vector<Function *> *IRCalleesToMatch = nullptr;
-  if (auto *IRCaller = findFuncByProfileName(FuncProfile.getFunction())) {
-    // No callees for external function, skip the call graph matching.
-    if (IRCaller->isDeclaration())
-      return;
-    IRCalleesToMatch = findNewIRCallees(*IRCaller, NewIRFunctions);
-  }
-  // Don't return here when IRCalleesToMatch is nullptr or empty, this is
-  // because even if there is no matching in the current scope, there could be
-  // matching in deeper callee scope/edge, so we need to keep traversing the
-  // call-graph. For IRCalleesToMatch is nullptr or empty case, later the
-  // matching function(matchCalleeProfile) will handle this to make it non-op.
+void SampleProfileMatcher::updateProfileWithNewName(
+    FunctionSamples &FuncProfile) {
+  auto FindNewMatch =
+      [&](const FunctionId &ProfileName,
+          std::vector<std::pair<FunctionId, FunctionId>> &MatchResult,
+          [[maybe_unused]] const FunctionId &CallerName) {
+        auto P = ProfileNameToFuncMap.find(ProfileName);
+        if (P != ProfileNameToFuncMap.end()) {
+          FunctionId IRCallee(P->second->getName());
+          assert(IRCallee != ProfileName &&
+                 "New callee symbol is not a new function");
+          LLVM_DEBUG(dbgs()
+                     << "Profile name is updated from " << ProfileName << " to "
+                     << IRCallee << " under caller: " << CallerName << "\n");
+          MatchResult.emplace_back(IRCallee, ProfileName);
+        }
+      };
 
-  // Match non-inline callees.
+  // Update non-inline callees.
   for (auto &BS : const_cast<BodySampleMap &>(FuncProfile.getBodySamples())) {
     // New function to old function pairs used to update the CallTargetMap.
     std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
     SampleRecord::CallTargetMap &CTM =
         const_cast<SampleRecord::CallTargetMap &>(BS.second.getCallTargets());
     for (const auto &TS : CTM)
-      matchCalleeProfile(FuncProfile.getFunction(), TS.first, IRCalleesToMatch,
-                         MatchResult);
-
+      FindNewMatch(TS.first, MatchResult, FuncProfile.getFunction());
     // Update the CallTargetMap.
     for (const auto &P : MatchResult) {
       CTM[P.first] = CTM[P.second];
@@ -825,21 +819,17 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
     }
   }
 
-  // Match inline callees.
+  // Update inline callees recursively.
   for (auto &CM :
        const_cast<CallsiteSampleMap &>(FuncProfile.getCallsiteSamples())) {
     auto &CalleeMap = CM.second;
     // New function to old function pairs used to update the CallsiteSampleMap.
     std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
     for (auto &CS : CalleeMap) {
-      FunctionSamples &CalleeProfile = CS.second;
-      matchCalleeProfile(FuncProfile.getFunction(), CalleeProfile.getFunction(),
-                         IRCalleesToMatch, MatchResult);
-
-      // Traverse all the inlined callee profiles.
-      matchProfileForNewFunctions(NewIRFunctions, CalleeProfile);
+      FindNewMatch(CS.second.getFunction(), MatchResult,
+                   FuncProfile.getFunction());
+      updateProfileWithNewName(CS.second);
     }
-
     // Update the CalleeMap using the new name and remove the old entry.
     for (auto &P : MatchResult) {
       assert(P.first != P.second &&
@@ -852,73 +842,38 @@ void SampleProfileMatcher::matchProfileForNewFunctions(
   }
 }
 
-// Match the unused profile with new IR functions on the profiled call-graph.
-// The high-level steps for the algorithm:
-// 1) Find all the new functions that show only in the IR, use them as the
-// matching candidates to compute new callees.
-//
-// 2) Traverse all the nodes in the profiled call-graph.
-// For each function caller scope:
-//  a) Find a set of callees in the IR that doesn't exist in the profile. See
-//  findNewIRCallees.
-//  b) Find a set of callees in the profile that doesn't exist
-//  in the IR. See matchCalleeProfile.
-//  c) Match the callee pairs between a and b. Compute a similarity ratio
-//  between the pair, it's considered match if the similarity is above a given
-//  threshold. See MatchProfileForNewFunctions.
-//  d) Update the profile with the matched name in-place.
-void SampleProfileMatcher::runCallGraphMatching() {
-  if (!SalvageUnusedProfile)
-    return;
-  assert(SymbolMap && "SymbolMap is null");
-  assert(FunctionProfileNameMap.empty() &&
-         "FunctionProfileNameMap is not empty before the call graph matching");
-
-  StringMap<Function *> NewIRFunctions;
-  findNewIRFunctions(NewIRFunctions);
-  if (NewIRFunctions.empty())
-    return;
-
-  // Sort the profiles to make the matching order deterministic.
-  std::vector<NameFunctionSamples> SortedProfiles;
-  ::llvm::sortFuncProfiles(Reader.getProfiles(), SortedProfiles);
-  for (auto &P : SortedProfiles)
-    matchProfileForNewFunctions(NewIRFunctions,
-                                *const_cast<FunctionSamples *>(P.second));
-
-  clearCacheData();
+void SampleProfileMatcher::updateProfillesAndSymbolMap() {
   if (ProfileNameToFuncMap.empty())
     return;
-  // Update all the data generated by the old profile.
+  for (auto &P : Reader.getProfiles())
+    updateProfileWithNewName(P.second);
+
   // Add the new function to the SymbolMap, which will be used in
   // SampleLoader.
   for (auto &I : ProfileNameToFuncMap) {
     assert(I.second && "New function is null");
     SymbolMap->emplace(FunctionId(I.second->getName()), I.second);
   }
-
-  // Re-flatten the profiles after the matching.
-  FlattenedProfiles.clear();
-  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
-                                   FunctionSamples::ProfileIsCS);
-  ProfileNameToFuncMap.clear();
-}
-
-void SampleProfileMatcher::runOnFunction(Function &F) {
-  if (skipProfileForFunction(F))
-    return;
-  runCFGMatching(F);
 }
 
 void SampleProfileMatcher::runOnModule(
-    HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap) {
+    std::vector<Function *> &OrderedFuncList) {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
-  SymbolMap = &SymMap;
-  runCallGraphMatching();
+  if (SalvageUnusedProfile)
+    findNewIRFunctions();
+
+  // Process the matching in top-down order so that the caller matching result
+  // can be used to the callee matching.
+  for (auto *F : OrderedFuncList) {
+    if (skipProfileForFunction(*F))
+      continue;
+    runOnFunction(*F);
+  }
 
-  for (auto &F : M)
-    runOnFunction(F);
+  // Update the profile map and symbol map with the new function name.
+  if (SalvageUnusedProfile)
+    updateProfillesAndSymbolMap();
 
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
index 5394a00ced86a..3ca94a4563675 100644
--- a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
@@ -48,18 +48,18 @@
 ;    }
 ;  }
 
-; CHECK: Run stale profile matching for bar
-
-; CHECK: Run stale profile matching for foo
-; CHECK: Callsite with callee:bar is matched from 1.15 to 1.15
-; CHECK: Callsite with callee:bar is matched from 2 to 2
-
 ; CHECK: Run stale profile matching for main
 ; CHECK: Callsite with callee:foo is matched from 4 to 2
 ; CHECK: Callsite with callee:bar is matched from 5 to 3
 ; CHECK: Callsite with callee:foo is matched from 8 to 4
 ; CHECK: Callsite with callee:bar is matched from 9 to 5
 
+; CHECK: Run stale profile matching for foo
+; CHECK: Callsite with callee:bar is matched from 1.15 to 1.15
+; CHECK: Callsite with callee:bar is matched from 2 to 2
+
+; CHECK: Run stale profile matching for bar
+
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll
index ecf8484d98e59..610bc58161f56 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll
@@ -2,17 +2,6 @@
 ; REQUIRES: asserts
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-LCS.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
 
-; CHECK: Run stale profile matching for test_direct_call
-; CHECK: Location is matched from 1 to 1
-; CHECK: Location is matched from 2 to 2
-; CHECK: Location is matched from 3 to 3
-; CHECK: Callsite with callee:C is matched from 4 to 2
-; CHECK: Location is rematched backwards from 3 to 1
-; CHECK: Callsite with callee:A is matched from 5 to 4
-; CHECK: Callsite with callee:B is matched from 6 to 5
-; CHECK: Location is matched from 7 to 6
-; CHECK: Callsite with callee:A is matched from 8 to 6
-
 ; CHECK: Run stale profile matching for test_indirect_call
 ; CHECK: Location is matched from 1 to 1
 ; CHECK: Location is matched from 2 to 2
@@ -27,6 +16,17 @@
 ; CHECK: Callsite with callee:unknown.indirect.callee is matched from 9 to 6
 ; CHECK: Callsite with callee:C is matched from 10 to 7
 
+; CHECK: Run stale profile matching for test_direct_call
+; CHECK: Location is matched from 1 to 1
+; CHECK: Location is matched from 2 to 2
+; CHECK: Location is matched from 3 to 3
+; CHECK: Callsite with callee:C is matched from 4 to 2
+; CHECK: Location is rematched backwards from 3 to 1
+; CHECK: Callsite with callee:A is matched from 5 to 4
+; CHECK: Callsite with callee:B is matched from 6 to 5
+; CHECK: Location is matched from 7 to 6
+; CHECK: Callsite with callee:A is matched from 8 to 6
+
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 1f3cc588991d6..00f9d0397f16f 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -3,18 +3,28 @@
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-count-for-cg-matching=10 --min-func-count-for-cg-matching=10 2>&1 | FileCheck %s  --check-prefix=TINY-FUNC
 
-
+; Verify find new IR functions.
 ; CHECK: Function new_block_only is not in profile or symbol list table.
 ; CHECK: Function new_foo is not in profile or symbol list table.
 
+; CHECK: Run stale profile matching for main
 ; CHECK: The similarity between new_foo(IR) and foo(profile) is 0.86
-; CHECK: In function main, changing profile name from foo to new_foo
-; CHECK: The checksums for new_block_only(IR) and block_only(Profile) match
-; CHECK: In function baz, changing profile name from block_only to new_block_only
-; CHECK: In function baz, changing profile name from block_only to new_block_only
-; CHECK: In function cold_func, changing profile name from block_only to new_block_only
-; CHECK: In function test_noninline, changing profile name from foo to new_foo
-; CHECK: In function baz, changing profile name from block_only to new_block_only
+; CHECK: Function:new_foo matches profile:foo
+; CHECK: Run stale profile matching for test_noninline
+; CHECK: Run stale profile matching for cold_func
+; CHECK: The checksums for new_block_only(IR) and block_only(Profile) match.
+; CHECK: Function:new_block_only matches profile:block_only
+; CHECK: Run stale profile matching for baz
+; CHECK: Run stale profile matching for bar
+
+; Verify profile new name update.
+; CHECK-DAG: Profile name is updated from foo to new_foo under caller: test_noninline
+; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
+; CHECK-DAG: Profile name is updated from foo to new_foo under caller: main
+; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
+; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
+; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: cold_func
+
 
 ; Verify the matched function is updated correctly by checking the inlining.
 ; CHECK: 'new_foo' inlined into 'main' to match profiling context with (cost=110, threshold=3000) at callsite main:2:7.5;
@@ -22,8 +32,8 @@
 ; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ new_foo:2:3.3 @ main:2:7.5;
 ; CHECK: 'new_foo' inlined into 'test_noninline' to match profiling context with (cost=110, threshold=3000) at callsite test_noninline:1:3.2;
 
-; TINY-FUNC-NOT: block_only to new_block_only
-; TINY-FUNC-NOT: from foo to new_foo
+; TINY-FUNC-NOT: Function:new_foo matches profile:foo
+; TINY-FUNC-NOT: Function:new_block_only matches profile:block_only
 
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

>From 4bdda81fcdcd475cc21a8f320f7b0379f941c174 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 9 Jun 2024 18:37:39 -0700
Subject: [PATCH 11/22] add stats

---
 .../Transforms/IPO/SampleProfileMatcher.h     | 18 +++++------
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 31 +++++++++++++++++--
 .../pseudo-probe-stale-profile-renaming.ll    |  5 ++-
 3 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 6016f3f9d7ac4..3981034b2b0a3 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -79,10 +79,7 @@ class SampleProfileMatcher {
   // the profile is unused(to be matched) or not.
   HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap;
 
-  // A map from the caller to its new callees, this is used as a cache for the
-  // candidate callees.
-  std::unordered_map<Function *, std::vector<Function *>> FuncToNewCalleesMap;
-
+  // The new functions from IR.
   HashKeyMap<std::unordered_map, FunctionId, Function *> NewIRFunctions;
 
   // Pointer to the Profile Symbol List in the reader.
@@ -102,6 +99,10 @@ class SampleProfileMatcher {
   uint64_t MismatchedCallsiteSamples = 0;
   uint64_t RecoveredCallsiteSamples = 0;
 
+  // Profile call-graph matching statstics:
+  uint64_t NumRecoveredUnusedSamples = 0;
+  uint64_t NumRecoveredUnusedFunc = 0;
+
   // A dummy name for unknown indirect callee, used to differentiate from a
   // non-call instruction that also has an empty callee name.
   static constexpr const char *UnknownIndirectCallee =
@@ -121,6 +122,10 @@ class SampleProfileMatcher {
     // will be used for sample loader.
     FuncCallsiteMatchStates.clear();
     FlattenedProfiles.clear();
+
+    NewIRFunctions.clear();
+    FunctionProfileNameMap.clear();
+    ProfileNameToFuncMap.clear();
   }
 
 private:
@@ -224,11 +229,6 @@ class SampleProfileMatcher {
   void findNewIRFunctions();
   void updateProfillesAndSymbolMap();
   void updateProfileWithNewName(FunctionSamples &FuncProfile);
-
-  void clearCacheData() {
-    FunctionProfileNameMap.clear();
-    ProfileNameToFuncMap.clear();
-  }
   void runCallGraphMatching();
   void reportOrPersistProfileStats();
 };
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 13f22bcecb0e8..a6a29724a575a 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -592,6 +592,14 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   if (!ReportProfileStaleness && !PersistProfileStaleness)
     return;
 
+  if (SalvageUnusedProfile) {
+    for (const auto &I : ProfileNameToFuncMap) {
+      if (GlobalValue::isAvailableExternallyLinkage(I.second->getLinkage()))
+        continue;
+      NumRecoveredUnusedFunc++;
+    }
+  }
+
   // Count profile mismatches for profile staleness report.
   for (const auto &F : M) {
     if (skipProfileForFunction(F))
@@ -622,6 +630,13 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
              << MismatchedFunctionSamples << "/" << TotalFunctionSamples
              << ") of samples are discarded due to function hash mismatch.\n";
     }
+    if (SalvageUnusedProfile) {
+      errs() << "(" << NumRecoveredUnusedFunc << "/" << TotalProfiledFunc
+             << ") of functions' profile are matched and ("
+             << NumRecoveredUnusedSamples << "/" << TotalFunctionSamples
+             << ") of samples are reused by call graph matching.\n";
+    }
+
     errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
            << TotalProfiledCallsites
            << ") of callsites' profile are invalid and ("
@@ -648,6 +663,13 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
       ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
     }
 
+    if (SalvageUnusedProfile) {
+      ProfStatsVec.emplace_back("NumRecoveredUnusedFunc",
+                                NumRecoveredUnusedFunc);
+      ProfStatsVec.emplace_back("NumRecoveredUnusedSamples",
+                                NumRecoveredUnusedSamples);
+    }
+
     ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
     ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
     ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
@@ -746,7 +768,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   // Use the diff algorithm to find the LCS between IR and profile.
 
   // Don't recursively match the callee function to avoid infinite matching,
-  // callee functions should be handled later since it's processed in top-down
+  // callee functions will be handled later since it's processed in top-down
   // order .
   LocToLocMap MatchedAnchors = longestCommonSequence(
       FilteredIRAnchorsList, FilteredProfileAnchorList, false);
@@ -814,7 +836,10 @@ void SampleProfileMatcher::updateProfileWithNewName(
       FindNewMatch(TS.first, MatchResult, FuncProfile.getFunction());
     // Update the CallTargetMap.
     for (const auto &P : MatchResult) {
-      CTM[P.first] = CTM[P.second];
+      uint64_t Samples = CTM[P.second];
+      if (ReportProfileStaleness || PersistProfileStaleness)
+        NumRecoveredUnusedSamples += Samples;
+      CTM[P.first] = Samples;
       CTM.erase(P.second);
     }
   }
@@ -835,6 +860,8 @@ void SampleProfileMatcher::updateProfileWithNewName(
       assert(P.first != P.second &&
              "Renamed function name should be different from the old map key");
       FunctionSamples &FS = CalleeMap[P.second];
+      if (ReportProfileStaleness || PersistProfileStaleness)
+        NumRecoveredUnusedSamples += FS.getTotalSamples();
       FS.setFunction(P.first);
       CalleeMap[P.first] = FS;
       CalleeMap.erase(P.second);
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 00f9d0397f16f..7ac2c0f669f13 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -persist-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-count-for-cg-matching=10 --min-func-count-for-cg-matching=10 2>&1 | FileCheck %s  --check-prefix=TINY-FUNC
 
 ; Verify find new IR functions.
@@ -25,6 +25,7 @@
 ; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
 ; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: cold_func
 
+; CHECK: (2/3) of functions' profile are matched and (78/81) of samples are reused by call graph matching.
 
 ; Verify the matched function is updated correctly by checking the inlining.
 ; CHECK: 'new_foo' inlined into 'main' to match profiling context with (cost=110, threshold=3000) at callsite main:2:7.5;
@@ -32,6 +33,8 @@
 ; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ new_foo:2:3.3 @ main:2:7.5;
 ; CHECK: 'new_foo' inlined into 'test_noninline' to match profiling context with (cost=110, threshold=3000) at callsite test_noninline:1:3.2;
 
+; CHECK: !"NumRecoveredUnusedFunc", i64 2, !"NumRecoveredUnusedSamples", i64 78
+
 ; TINY-FUNC-NOT: Function:new_foo matches profile:foo
 ; TINY-FUNC-NOT: Function:new_block_only matches profile:block_only
 

>From df773949d9eb48e868b8da341b574d76c6aafc02 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 9 Jun 2024 19:22:53 -0700
Subject: [PATCH 12/22] FunctionProfileNameMap  to FuncToProfileNameMap and fix
 lint

---
 llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h | 6 +++---
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 3981034b2b0a3..ad8e6f27c2a54 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -69,7 +69,7 @@ class SampleProfileMatcher {
   // matching result.
   std::unordered_map<std::pair<const Function *, FunctionId>, bool,
                      FuncProfNameMapHash>
-      FunctionProfileNameMap;
+      FuncToProfileNameMap;
   // The new functions found by the call graph matching. The map's key is the
   // old profile name and value is the new(renamed) function.
   HashKeyMap<std::unordered_map, FunctionId, Function *> ProfileNameToFuncMap;
@@ -115,7 +115,7 @@ class SampleProfileMatcher {
       HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
       std::shared_ptr<ProfileSymbolList> PSL)
       : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase),
-        SymbolMap(&SymMap), PSL(PSL){};
+        SymbolMap(&SymMap), PSL(PSL) {};
   void runOnModule(std::vector<Function *> &OrderedFuncList);
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
@@ -124,7 +124,7 @@ class SampleProfileMatcher {
     FlattenedProfiles.clear();
 
     NewIRFunctions.clear();
-    FunctionProfileNameMap.clear();
+    FuncToProfileNameMap.clear();
     ProfileNameToFuncMap.clear();
   }
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index a6a29724a575a..65d6b13128099 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -790,15 +790,15 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
                                                   const FunctionId &ProfFunc,
                                                   bool FindMatchedProfileOnly) {
-  auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
-  if (R != FunctionProfileNameMap.end())
+  auto R = FuncToProfileNameMap.find({&IRFunc, ProfFunc});
+  if (R != FuncToProfileNameMap.end())
     return R->second;
 
   if (FindMatchedProfileOnly)
     return false;
 
   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
-  FunctionProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
+  FuncToProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
   if (Matched) {
     ProfileNameToFuncMap[ProfFunc] = &IRFunc;
     LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()

>From d00140636552bf834b866df93503f726077480da Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 11 Jun 2024 09:01:12 -0700
Subject: [PATCH 13/22] fix varibale name

---
 .../llvm/Transforms/IPO/SampleProfileMatcher.h     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index ad8e6f27c2a54..7729b6fb1890d 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -58,7 +58,7 @@ class SampleProfileMatcher {
   StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
       FuncCallsiteMatchStates;
 
-  struct FuncProfNameMapHash {
+  struct FuncToProfileNameMapHash {
     uint64_t
     operator()(const std::pair<const Function *, FunctionId> &P) const {
       return hash_combine(P.first, P.second);
@@ -68,7 +68,7 @@ class SampleProfileMatcher {
   // indicating whether they are matched. This is used as a cache for the
   // matching result.
   std::unordered_map<std::pair<const Function *, FunctionId>, bool,
-                     FuncProfNameMapHash>
+                     FuncToProfileNameMapHash>
       FuncToProfileNameMap;
   // The new functions found by the call graph matching. The map's key is the
   // old profile name and value is the new(renamed) function.
@@ -143,7 +143,6 @@ class SampleProfileMatcher {
                              const AnchorMap &ProfileAnchors,
                              AnchorList &FilteredIRAnchorsList,
                              AnchorList &FilteredProfileAnchorList);
-  void runCFGMatching(Function &F);
   void runOnFunction(Function &F);
   void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
   void findProfileAnchors(const FunctionSamples &FS,
@@ -189,11 +188,12 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
-  // Check if the two functions are equal. If MatchUnusedFunction is set and the
-  // two functions are both new, try to match the two functions.
+  // Check if the two functions are equal. If FindMatchedProfileOnly is set,
+  // only search the existing matched function. Otherwise, if the two functions
+  // are both new, try to match the two functions.
   bool isFunctionEqual(const FunctionId &IRFuncName,
                        const FunctionId &ProfileFuncName,
-                       bool MatchUnusedFunction);
+                       bool FindMatchedProfileOnly);
   // This function implements the Myers diff algorithm used for stale profile
   // matching. The algorithm provides a simple and efficient way to find the
   // Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
@@ -220,7 +220,7 @@ class SampleProfileMatcher {
   // between two callsite anchors extracted from function and profile. If it's
   // above the threshold, the function matches the profile.
   bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc,
-                              bool FindOnly);
+                              bool FindMatchedProfileOnly);
   void matchProfileForNewFunctions(const StringMap<Function *> &NewIRFunctions,
                                    FunctionSamples &FS);
   // Find functions that don't show in the profile or profile symbol list,

>From 8fc8f5451bcae8865e38a6c0e861bca40b9c36cc Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 12 Jun 2024 16:04:30 -0700
Subject: [PATCH 14/22] fix typo and incorrect comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  6 ++--
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 35 ++++++++++---------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 7729b6fb1890d..b53b2d518a563 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -189,8 +189,8 @@ class SampleProfileMatcher {
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
   // Check if the two functions are equal. If FindMatchedProfileOnly is set,
-  // only search the existing matched function. Otherwise, if the two functions
-  // are both new, try to match the two functions.
+  // only search the existing matched function. Otherwise, try matching the two
+  // functions.
   bool isFunctionEqual(const FunctionId &IRFuncName,
                        const FunctionId &ProfileFuncName,
                        bool FindMatchedProfileOnly);
@@ -227,7 +227,7 @@ class SampleProfileMatcher {
   // which are supposed to be new functions. We use them as the targets for
   // renaming matching.
   void findNewIRFunctions();
-  void updateProfillesAndSymbolMap();
+  void updateProfilesAndSymbolMap();
   void updateProfileWithNewName(FunctionSamples &FuncProfile);
   void runCallGraphMatching();
   void reportOrPersistProfileStats();
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 65d6b13128099..73989e44e6f70 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -154,10 +154,10 @@ bool SampleProfileMatcher::isFunctionEqual(const FunctionId &IRFuncName,
     return true;
   if (!SalvageUnusedProfile)
     return false;
-  // If both IR function and profile function are new, try to match the profile
-  // function.
+  // If IR function and profile function don't appear on either side, try
+  // matching the profile function.
 
-  // Check whether IR function is new.
+  // Check whether IR function appears in profile.
   auto R = NewIRFunctions.find(IRFuncName);
   if (R == NewIRFunctions.end() || !R->second)
     return false;
@@ -165,7 +165,7 @@ bool SampleProfileMatcher::isFunctionEqual(const FunctionId &IRFuncName,
   assert(FunctionId(IRFunc.getName()) != ProfileFuncName &&
          "IR function should be different from profile function to match");
 
-  // Check whether profile function is new.
+  // Check whether profile function appears in IR.
   auto F = SymbolMap->find(ProfileFuncName);
   if (F != SymbolMap->end())
     return false;
@@ -362,19 +362,22 @@ void SampleProfileMatcher::runStaleProfileMatching(
     return;
 
   // Match the callsite anchors by finding the longest common subsequence
-  // between IR and profile. Note that we need to use IR anchor as base(A side)
-  // to align with the order of IRToProfileLocationMap.
-  // This is also used for call graph matching. During the searching, if both
-  // the anchor from IR and prfile are new functions but they are not same
-  // function, it could be due to the function is renamed, we run the matching
-  // for them and consider the anchor is equal if it's matched. The matching
-  // result is also used later to update the profile with new name.
+  // between IR and profile.
+  // Define a match between two anchors as follows:
+  // 1) The function names of anchors are the same.
+  // 2) The similarity between the anchor functions is above a threshold if
+  // RunCGMatching is set.
+  // For 2), we only consider the anchor functions from IR and profile don't
+  // appear on either side to reduce the matching scope. Note that we need to
+  // use IR anchor as base(A side) to align with the order of
+  // IRToProfileLocationMap.
   LocToLocMap MatchedAnchors = longestCommonSequence(
       FilteredIRAnchorsList, FilteredProfileAnchorList, RunCGMatching);
 
-  // Apply the CFG matching results: match the non-callsite locations and write
-  // the result to IRToProfileLocationMap. Note that CFG matching won't take
-  // effect if it's not written into IRToProfileLocationMap.
+  // CFG level matching:
+  // Apply the callsite matchings to infer matching for the basic
+  // block(non-callsite) locations and write the result to
+  // IRToProfileLocationMap.
   if (RunCFGMatching)
     matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
 }
@@ -869,7 +872,7 @@ void SampleProfileMatcher::updateProfileWithNewName(
   }
 }
 
-void SampleProfileMatcher::updateProfillesAndSymbolMap() {
+void SampleProfileMatcher::updateProfilesAndSymbolMap() {
   if (ProfileNameToFuncMap.empty())
     return;
   for (auto &P : Reader.getProfiles())
@@ -900,7 +903,7 @@ void SampleProfileMatcher::runOnModule(
 
   // Update the profile map and symbol map with the new function name.
   if (SalvageUnusedProfile)
-    updateProfillesAndSymbolMap();
+    updateProfilesAndSymbolMap();
 
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();

>From ecc40001e564b302f11365ad75bd65f78c55690e Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 14 Jun 2024 16:28:51 -0700
Subject: [PATCH 15/22] addressing comment: build its own top-down function for
 matcher

---
 .../Transforms/IPO/SampleProfileMatcher.h     | 27 ++++++-------
 .../Utils/SampleProfileLoaderBaseImpl.h       | 16 ++++++++
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 40 +++++++------------
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 30 ++++++++++----
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |  2 +-
 ...w-pm-thinlto-prelink-samplepgo-defaults.ll |  2 +-
 .../pseudo-probe-stale-profile-renaming.ll    |  2 +-
 7 files changed, 68 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index b53b2d518a563..09b8211c4ffaa 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -26,6 +26,7 @@ using AnchorMap = std::map<LineLocation, FunctionId>;
 class SampleProfileMatcher {
   Module &M;
   SampleProfileReader &Reader;
+  LazyCallGraph &CG;
   const PseudoProbeManager *ProbeManager;
   const ThinOrFullLTOPhase LTOPhase;
   SampleProfileMap FlattenedProfiles;
@@ -110,13 +111,13 @@ class SampleProfileMatcher {
 
 public:
   SampleProfileMatcher(
-      Module &M, SampleProfileReader &Reader,
+      Module &M, SampleProfileReader &Reader, LazyCallGraph &CG,
       const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase,
       HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
       std::shared_ptr<ProfileSymbolList> PSL)
-      : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase),
-        SymbolMap(&SymMap), PSL(PSL) {};
-  void runOnModule(std::vector<Function *> &OrderedFuncList);
+      : M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager),
+        LTOPhase(LTOPhase), SymbolMap(&SymMap), PSL(PSL){};
+  void runOnModule();
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
     // will be used for sample loader.
@@ -143,6 +144,7 @@ class SampleProfileMatcher {
                              const AnchorMap &ProfileAnchors,
                              AnchorList &FilteredIRAnchorsList,
                              AnchorList &FilteredProfileAnchorList);
+  std::vector<Function *> buildTopDownFuncOrder();
   void runOnFunction(Function &F);
   void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
   void findProfileAnchors(const FunctionSamples &FS,
@@ -188,12 +190,6 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
-  // Check if the two functions are equal. If FindMatchedProfileOnly is set,
-  // only search the existing matched function. Otherwise, try matching the two
-  // functions.
-  bool isFunctionEqual(const FunctionId &IRFuncName,
-                       const FunctionId &ProfileFuncName,
-                       bool FindMatchedProfileOnly);
   // This function implements the Myers diff algorithm used for stale profile
   // matching. The algorithm provides a simple and efficient way to find the
   // Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
@@ -216,20 +212,23 @@ class SampleProfileMatcher {
                                bool RunCFGMatching, bool RunCGMatching);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
+  // Determine if the function matches profile. If FindMatchedProfileOnly is
+  // set, only search the existing matched function. Otherwise, try matching the
+  // two functions.
+  bool functionMatchesProfile(const FunctionId &IRFuncName,
+                              const FunctionId &ProfileFuncName,
+                              bool FindMatchedProfileOnly);
   // Determine if the function matches profile by computing a similarity ratio
   // between two callsite anchors extracted from function and profile. If it's
   // above the threshold, the function matches the profile.
   bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc,
                               bool FindMatchedProfileOnly);
-  void matchProfileForNewFunctions(const StringMap<Function *> &NewIRFunctions,
-                                   FunctionSamples &FS);
   // Find functions that don't show in the profile or profile symbol list,
   // which are supposed to be new functions. We use them as the targets for
-  // renaming matching.
+  // call graph matching.
   void findNewIRFunctions();
   void updateProfilesAndSymbolMap();
   void updateProfileWithNewName(FunctionSamples &FuncProfile);
-  void runCallGraphMatching();
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 7c725a3c1216c..1898d126ddce9 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
@@ -155,6 +156,21 @@ static inline bool skipProfileForFunction(const Function &F) {
   return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
 }
 
+static inline void
+buildTopDownFuncOrder(LazyCallGraph &CG,
+                      std::vector<Function *> &FunctionOrderList) {
+  CG.buildRefSCCs();
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+    for (LazyCallGraph::SCC &C : RC) {
+      for (LazyCallGraph::Node &N : C) {
+        Function &F = N.getFunction();
+        if (!skipProfileForFunction(F))
+          FunctionOrderList.push_back(&F);
+      }
+    }
+  }
+}
+
 template <typename FT> class SampleProfileLoaderBaseImpl {
 public:
   SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName,
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 9be215591c1a5..2f3747b7bf0bc 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -462,12 +462,13 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
       IntrusiveRefCntPtr<vfs::FileSystem> FS,
       std::function<AssumptionCache &(Function &)> GetAssumptionCache,
       std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
-      std::function<const TargetLibraryInfo &(Function &)> GetTLI)
+      std::function<const TargetLibraryInfo &(Function &)> GetTLI,
+      LazyCallGraph &CG)
       : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
                                     std::move(FS)),
         GetAC(std::move(GetAssumptionCache)),
         GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
-        LTOPhase(LTOPhase),
+        CG(CG), LTOPhase(LTOPhase),
         AnnotatedPassName(AnnotateSampleProfileInlinePhase
                               ? llvm::AnnotateInlinePassName(InlineContext{
                                     LTOPhase, InlinePass::SampleProfileInliner})
@@ -475,7 +476,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
 
   bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
-                   ProfileSummaryInfo *_PSI, LazyCallGraph &CG);
+                   ProfileSummaryInfo *_PSI);
 
 protected:
   bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
@@ -530,6 +531,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
   std::function<AssumptionCache &(Function &)> GetAC;
   std::function<TargetTransformInfo &(Function &)> GetTTI;
   std::function<const TargetLibraryInfo &(Function &)> GetTLI;
+  LazyCallGraph &CG;
 
   /// Profile tracker for different context.
   std::unique_ptr<SampleContextTracker> ContextTracker;
@@ -1934,18 +1936,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
       }
       ++CGI;
     }
-  } else {
-    CG.buildRefSCCs();
-    for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
-      for (LazyCallGraph::SCC &C : RC) {
-        for (LazyCallGraph::Node &N : C) {
-          Function &F = N.getFunction();
-          if (!skipProfileForFunction(F))
-            FunctionOrderList.push_back(&F);
-        }
-      }
-    }
-  }
+  } else
+    buildTopDownFuncOrder(CG, FunctionOrderList);
 
   std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
 
@@ -2077,7 +2069,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
     MatchingManager = std::make_unique<SampleProfileMatcher>(
-        M, *Reader, ProbeManager.get(), LTOPhase, SymbolMap, PSL);
+        M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL);
   }
 
   return true;
@@ -2147,8 +2139,7 @@ void SampleProfileLoader::removePseudoProbeInsts(Module &M) {
 }
 
 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
-                                      ProfileSummaryInfo *_PSI,
-                                      LazyCallGraph &CG) {
+                                      ProfileSummaryInfo *_PSI) {
   GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
 
   PSI = _PSI;
@@ -2196,16 +2187,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
   assert(SymbolMap.count(FunctionId()) == 0 &&
          "No empty StringRef should be added in SymbolMap");
 
-  std::vector<Function *> OrderedFuncList = buildFunctionOrder(M, CG);
-
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
-    MatchingManager->runOnModule(OrderedFuncList);
+    MatchingManager->runOnModule();
     MatchingManager->clearMatchingData();
   }
 
   bool retval = false;
-  for (auto *F : OrderedFuncList) {
+  for (auto *F : buildFunctionOrder(M, CG)) {
     assert(!F->isDeclaration());
     clearFunctionData();
     retval |= runOnFunction(*F, AM);
@@ -2332,19 +2321,18 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
 
   if (!FS)
     FS = vfs::getRealFileSystem();
+  LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
 
   SampleProfileLoader SampleLoader(
       ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
       ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
                                        : ProfileRemappingFileName,
-      LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
-
+      LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG);
   if (!SampleLoader.doInitialization(M, &FAM))
     return PreservedAnalyses::all();
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
-  LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
-  if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
+  if (!SampleLoader.runOnModule(M, &AM, PSI))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 73989e44e6f70..c59aad1393612 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -147,9 +147,9 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
   }
 }
 
-bool SampleProfileMatcher::isFunctionEqual(const FunctionId &IRFuncName,
-                                           const FunctionId &ProfileFuncName,
-                                           bool FindMatchedProfileOnly) {
+bool SampleProfileMatcher::functionMatchesProfile(
+    const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
+    bool FindMatchedProfileOnly) {
   if (IRFuncName == ProfileFuncName)
     return true;
   if (!SalvageUnusedProfile)
@@ -238,8 +238,9 @@ SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
         X = V[Index(K - 1)] + 1;
       Y = X - K;
       while (X < Size1 && Y < Size2 &&
-             isFunctionEqual(AnchorList1[X].second, AnchorList2[Y].second,
-                             !MatchUnusedFunction))
+             functionMatchesProfile(AnchorList1[X].second,
+                                    AnchorList2[Y].second,
+                                    !MatchUnusedFunction))
         X++, Y++;
 
       V[Index(K)] = X;
@@ -886,8 +887,21 @@ void SampleProfileMatcher::updateProfilesAndSymbolMap() {
   }
 }
 
-void SampleProfileMatcher::runOnModule(
-    std::vector<Function *> &OrderedFuncList) {
+std::vector<Function *> SampleProfileMatcher::buildTopDownFuncOrder() {
+  std::vector<Function *> FunctionOrderList;
+  FunctionOrderList.reserve(M.size());
+  ::buildTopDownFuncOrder(CG, FunctionOrderList);
+  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+  LLVM_DEBUG({
+    dbgs() << "Function processing order:\n";
+    for (auto F : FunctionOrderList) {
+      dbgs() << F->getName() << "\n";
+    }
+  });
+  return FunctionOrderList;
+}
+
+void SampleProfileMatcher::runOnModule() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
   if (SalvageUnusedProfile)
@@ -895,7 +909,7 @@ void SampleProfileMatcher::runOnModule(
 
   // Process the matching in top-down order so that the caller matching result
   // can be used to the callee matching.
-  for (auto *F : OrderedFuncList) {
+  for (auto *F : buildTopDownFuncOrder()) {
     if (skipProfileForFunction(*F))
       continue;
     runOnFunction(*F);
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index ac80a31d8fd4b..e5aebc4850e6d 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -31,9 +31,9 @@
 ; CHECK-EP-PIPELINE-START: Running pass: NoOpModulePass
 ; CHECK-O: Running pass: SampleProfileLoaderPass
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
-; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
+; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
 ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 210a4ef1f7664..0bb26330d000a 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -44,8 +44,8 @@
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
 ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass
 ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass
-; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running pass: OpenMPOptPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 7ac2c0f669f13..9b1a56de0328c 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -10,10 +10,10 @@
 ; CHECK: Run stale profile matching for main
 ; CHECK: The similarity between new_foo(IR) and foo(profile) is 0.86
 ; CHECK: Function:new_foo matches profile:foo
-; CHECK: Run stale profile matching for test_noninline
 ; CHECK: Run stale profile matching for cold_func
 ; CHECK: The checksums for new_block_only(IR) and block_only(Profile) match.
 ; CHECK: Function:new_block_only matches profile:block_only
+; CHECK: Run stale profile matching for test_noninline
 ; CHECK: Run stale profile matching for baz
 ; CHECK: Run stale profile matching for bar
 

>From da8b752f2a427ac5abbdef09ef319836e2865bad Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 17 Jun 2024 11:40:05 -0700
Subject: [PATCH 16/22] addressing comment

---
 .../Transforms/IPO/SampleProfileMatcher.h     | 22 +++--
 .../Utils/SampleProfileLoaderBaseImpl.h       |  4 +-
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  2 +-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 94 ++++++++++---------
 .../pseudo-probe-stale-profile-renaming.ll    |  6 +-
 5 files changed, 70 insertions(+), 58 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 09b8211c4ffaa..c8d40d0ca6992 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -101,8 +101,8 @@ class SampleProfileMatcher {
   uint64_t RecoveredCallsiteSamples = 0;
 
   // Profile call-graph matching statstics:
-  uint64_t NumRecoveredUnusedSamples = 0;
-  uint64_t NumRecoveredUnusedFunc = 0;
+  uint64_t NumCallGraphRecoveredProfiledFunc = 0;
+  uint64_t NumCallGraphRecoveredFuncSamples = 0;
 
   // A dummy name for unknown indirect callee, used to differentiate from a
   // non-call instruction that also has an empty callee name.
@@ -116,17 +116,17 @@ class SampleProfileMatcher {
       HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
       std::shared_ptr<ProfileSymbolList> PSL)
       : M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager),
-        LTOPhase(LTOPhase), SymbolMap(&SymMap), PSL(PSL){};
+        LTOPhase(LTOPhase), SymbolMap(&SymMap), PSL(PSL) {};
   void runOnModule();
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
     // will be used for sample loader.
-    FuncCallsiteMatchStates.clear();
-    FlattenedProfiles.clear();
+    freeContainer(FuncCallsiteMatchStates);
+    freeContainer(FlattenedProfiles);
 
-    NewIRFunctions.clear();
-    FuncToProfileNameMap.clear();
-    ProfileNameToFuncMap.clear();
+    freeContainer(NewIRFunctions);
+    freeContainer(ProfileNameToFuncMap);
+    freeContainer(FuncToProfileNameMap);
   }
 
 private:
@@ -140,6 +140,10 @@ class SampleProfileMatcher {
     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
     return getFlattenedSamplesFor(FunctionId(CanonFName));
   }
+  template <typename T> inline void freeContainer(T &C) {
+    T Empty;
+    std::swap(C, Empty);
+  }
   void getFilteredAnchorList(const AnchorMap &IRAnchors,
                              const AnchorMap &ProfileAnchors,
                              AnchorList &FilteredIRAnchorsList,
@@ -210,6 +214,8 @@ class SampleProfileMatcher {
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap,
                                bool RunCFGMatching, bool RunCGMatching);
+  Function *findIfFunctionIsNew(const FunctionId &IRFuncName);
+  bool isProfileUnused(const FunctionId &ProfileFuncName);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
   // Determine if the function matches profile. If FindMatchedProfileOnly is
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 1898d126ddce9..7ca64df32c3aa 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -157,8 +157,8 @@ static inline bool skipProfileForFunction(const Function &F) {
 }
 
 static inline void
-buildTopDownFuncOrder(LazyCallGraph &CG,
-                      std::vector<Function *> &FunctionOrderList) {
+buildBottomUpFuncOrder(LazyCallGraph &CG,
+                       std::vector<Function *> &FunctionOrderList) {
   CG.buildRefSCCs();
   for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
     for (LazyCallGraph::SCC &C : RC) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2f3747b7bf0bc..78fa2e7f3635e 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1937,7 +1937,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
       ++CGI;
     }
   } else
-    buildTopDownFuncOrder(CG, FunctionOrderList);
+    buildBottomUpFuncOrder(CG, FunctionOrderList);
 
   std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index c59aad1393612..a328ebc9c9992 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -27,8 +27,8 @@ static cl::opt<bool> SalvageUnusedProfile(
 
 static cl::opt<unsigned> FuncProfileSimilarityThreshold(
     "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
-    cl::desc("The profile matches the function if their similarity is above "
-             "the given number(percentage)."));
+    cl::desc("Consider a profile matches a function if the similarity of their "
+             "callee sequences is above the specified percentile."));
 
 static cl::opt<unsigned> MinFuncCountForCGMatching(
     "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
@@ -147,6 +147,19 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
   }
 }
 
+Function *
+SampleProfileMatcher::findIfFunctionIsNew(const FunctionId &IRFuncName) {
+  auto R = NewIRFunctions.find(IRFuncName);
+  if (R == NewIRFunctions.end())
+    return nullptr;
+  return R->second;
+}
+
+bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
+  auto F = SymbolMap->find(ProfileFuncName);
+  return F == SymbolMap->end();
+}
+
 bool SampleProfileMatcher::functionMatchesProfile(
     const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
     bool FindMatchedProfileOnly) {
@@ -154,23 +167,16 @@ bool SampleProfileMatcher::functionMatchesProfile(
     return true;
   if (!SalvageUnusedProfile)
     return false;
-  // If IR function and profile function don't appear on either side, try
-  // matching the profile function.
-
-  // Check whether IR function appears in profile.
-  auto R = NewIRFunctions.find(IRFuncName);
-  if (R == NewIRFunctions.end() || !R->second)
-    return false;
-  Function &IRFunc = *R->second;
-  assert(FunctionId(IRFunc.getName()) != ProfileFuncName &&
-         "IR function should be different from profile function to match");
 
-  // Check whether profile function appears in IR.
-  auto F = SymbolMap->find(ProfileFuncName);
-  if (F != SymbolMap->end())
+  // If IR function doesn't have profile and the profile is unused, try
+  // matching them.
+  Function *IRFunc = findIfFunctionIsNew(IRFuncName);
+  if (!IRFunc || !isProfileUnused(ProfileFuncName))
     return false;
 
-  return functionMatchesProfile(IRFunc, ProfileFuncName,
+  assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
+         "IR function should be different from profile function to match");
+  return functionMatchesProfile(*IRFunc, ProfileFuncName,
                                 FindMatchedProfileOnly);
 }
 
@@ -238,9 +244,9 @@ SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
         X = V[Index(K - 1)] + 1;
       Y = X - K;
       while (X < Size1 && Y < Size2 &&
-             functionMatchesProfile(AnchorList1[X].second,
-                                    AnchorList2[Y].second,
-                                    !MatchUnusedFunction))
+             functionMatchesProfile(
+                 AnchorList1[X].second, AnchorList2[Y].second,
+                 !MatchUnusedFunction /* Find matched function only */))
         X++, Y++;
 
       V[Index(K)] = X;
@@ -372,8 +378,9 @@ void SampleProfileMatcher::runStaleProfileMatching(
   // appear on either side to reduce the matching scope. Note that we need to
   // use IR anchor as base(A side) to align with the order of
   // IRToProfileLocationMap.
-  LocToLocMap MatchedAnchors = longestCommonSequence(
-      FilteredIRAnchorsList, FilteredProfileAnchorList, RunCGMatching);
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
+                            RunCGMatching /* Match unused functions */);
 
   // CFG level matching:
   // Apply the callsite matchings to infer matching for the basic
@@ -600,7 +607,7 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
     for (const auto &I : ProfileNameToFuncMap) {
       if (GlobalValue::isAvailableExternallyLinkage(I.second->getLinkage()))
         continue;
-      NumRecoveredUnusedFunc++;
+      NumCallGraphRecoveredProfiledFunc++;
     }
   }
 
@@ -635,9 +642,9 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
              << ") of samples are discarded due to function hash mismatch.\n";
     }
     if (SalvageUnusedProfile) {
-      errs() << "(" << NumRecoveredUnusedFunc << "/" << TotalProfiledFunc
-             << ") of functions' profile are matched and ("
-             << NumRecoveredUnusedSamples << "/" << TotalFunctionSamples
+      errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
+             << TotalProfiledFunc << ") of functions' profile are matched and ("
+             << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
              << ") of samples are reused by call graph matching.\n";
     }
 
@@ -668,10 +675,10 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
     }
 
     if (SalvageUnusedProfile) {
-      ProfStatsVec.emplace_back("NumRecoveredUnusedFunc",
-                                NumRecoveredUnusedFunc);
-      ProfStatsVec.emplace_back("NumRecoveredUnusedSamples",
-                                NumRecoveredUnusedSamples);
+      ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
+                                NumCallGraphRecoveredProfiledFunc);
+      ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
+                                NumCallGraphRecoveredFuncSamples);
     }
 
     ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
@@ -709,7 +716,7 @@ void SampleProfileMatcher::findNewIRFunctions() {
     if (FS)
       continue;
 
-    // For extended binary, functions are fully inlined may not be loaded in the
+    // For extended binary, functions fully inlined may not be loaded in the
     // top-level profile, so check the NameTable which has the all symbol names
     // in profile.
     if (NamesInProfile.count(CanonFName))
@@ -721,7 +728,7 @@ void SampleProfileMatcher::findNewIRFunctions() {
       continue;
 
     LLVM_DEBUG(dbgs() << "Function " << CanonFName
-                      << " is not in profile or symbol list table.\n");
+                      << " is not in profile or profile symbol list.\n");
     NewIRFunctions[FunctionId(CanonFName)] = &F;
   }
 }
@@ -830,21 +837,21 @@ void SampleProfileMatcher::updateProfileWithNewName(
         }
       };
 
+  // A list of new function to old function pair.
+  std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
+
   // Update non-inline callees.
   for (auto &BS : const_cast<BodySampleMap &>(FuncProfile.getBodySamples())) {
-    // New function to old function pairs used to update the CallTargetMap.
-    std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
-    SampleRecord::CallTargetMap &CTM =
-        const_cast<SampleRecord::CallTargetMap &>(BS.second.getCallTargets());
-    for (const auto &TS : CTM)
+    SampleRecord &SR = BS.second;
+    MatchResult.clear();
+    for (const auto &TS : SR.getCallTargets())
       FindNewMatch(TS.first, MatchResult, FuncProfile.getFunction());
     // Update the CallTargetMap.
     for (const auto &P : MatchResult) {
-      uint64_t Samples = CTM[P.second];
+      uint64_t Samples = SR.removeCalledTarget(P.second);
+      SR.addCalledTarget(P.first, Samples);
       if (ReportProfileStaleness || PersistProfileStaleness)
-        NumRecoveredUnusedSamples += Samples;
-      CTM[P.first] = Samples;
-      CTM.erase(P.second);
+        NumCallGraphRecoveredFuncSamples += Samples;
     }
   }
 
@@ -852,8 +859,7 @@ void SampleProfileMatcher::updateProfileWithNewName(
   for (auto &CM :
        const_cast<CallsiteSampleMap &>(FuncProfile.getCallsiteSamples())) {
     auto &CalleeMap = CM.second;
-    // New function to old function pairs used to update the CallsiteSampleMap.
-    std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
+    MatchResult.clear();
     for (auto &CS : CalleeMap) {
       FindNewMatch(CS.second.getFunction(), MatchResult,
                    FuncProfile.getFunction());
@@ -865,7 +871,7 @@ void SampleProfileMatcher::updateProfileWithNewName(
              "Renamed function name should be different from the old map key");
       FunctionSamples &FS = CalleeMap[P.second];
       if (ReportProfileStaleness || PersistProfileStaleness)
-        NumRecoveredUnusedSamples += FS.getTotalSamples();
+        NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
       FS.setFunction(P.first);
       CalleeMap[P.first] = FS;
       CalleeMap.erase(P.second);
@@ -890,7 +896,7 @@ void SampleProfileMatcher::updateProfilesAndSymbolMap() {
 std::vector<Function *> SampleProfileMatcher::buildTopDownFuncOrder() {
   std::vector<Function *> FunctionOrderList;
   FunctionOrderList.reserve(M.size());
-  ::buildTopDownFuncOrder(CG, FunctionOrderList);
+  ::buildBottomUpFuncOrder(CG, FunctionOrderList);
   std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
   LLVM_DEBUG({
     dbgs() << "Function processing order:\n";
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 9b1a56de0328c..41ef897c09f29 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -4,8 +4,8 @@
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-count-for-cg-matching=10 --min-func-count-for-cg-matching=10 2>&1 | FileCheck %s  --check-prefix=TINY-FUNC
 
 ; Verify find new IR functions.
-; CHECK: Function new_block_only is not in profile or symbol list table.
-; CHECK: Function new_foo is not in profile or symbol list table.
+; CHECK: Function new_block_only is not in profile or profile symbol list.
+; CHECK: Function new_foo is not in profile or profile symbol list.
 
 ; CHECK: Run stale profile matching for main
 ; CHECK: The similarity between new_foo(IR) and foo(profile) is 0.86
@@ -33,7 +33,7 @@
 ; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ new_foo:2:3.3 @ main:2:7.5;
 ; CHECK: 'new_foo' inlined into 'test_noninline' to match profiling context with (cost=110, threshold=3000) at callsite test_noninline:1:3.2;
 
-; CHECK: !"NumRecoveredUnusedFunc", i64 2, !"NumRecoveredUnusedSamples", i64 78
+; CHECK: !"NumCallGraphRecoveredProfiledFunc", i64 2, !"NumCallGraphRecoveredFuncSamples", i64 78
 
 ; TINY-FUNC-NOT: Function:new_foo matches profile:foo
 ; TINY-FUNC-NOT: Function:new_block_only matches profile:block_only

>From 93d70fa225b914b250b52f760c26ca42ef9c5696 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 21 Jun 2024 15:21:05 -0700
Subject: [PATCH 17/22] addressing feedback

---
 llvm/include/llvm/ProfileData/SampleProf.h    |  23 +-
 .../Transforms/IPO/SampleProfileMatcher.h     |  28 ++-
 .../Utils/SampleProfileLoaderBaseImpl.h       |   5 +-
 llvm/lib/ProfileData/SampleProf.cpp           |  36 ++-
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  31 ++-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 211 +++++++-----------
 .../pseudo-probe-stale-profile-renaming.prof  |   2 +-
 .../pseudo-probe-stale-profile-renaming.ll    |  12 +-
 8 files changed, 164 insertions(+), 184 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 51d590be124f1..5d67be1a215f0 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -919,12 +919,14 @@ class FunctionSamples {
   /// Returns a pointer to FunctionSamples at the given callsite location
   /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
   /// the restriction to return the FunctionSamples at callsite location
-  /// \p Loc with the maximum total sample count. If \p Remapper is not
-  /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
-  /// as \p CalleeName.
-  const FunctionSamples *
-  findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
-                        SampleProfileReaderItaniumRemapper *Remapper) const;
+  /// \p Loc with the maximum total sample count. If \p Remapper or \p
+  /// FuncNameToProfNameMap is not nullptr, use them to find FunctionSamples
+  /// with equivalent name as \p CalleeName.
+  const FunctionSamples *findFunctionSamplesAt(
+      const LineLocation &Loc, StringRef CalleeName,
+      SampleProfileReaderItaniumRemapper *Remapper,
+      const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+          *FuncNameToProfNameMap = nullptr) const;
 
   bool empty() const { return TotalSamples == 0; }
 
@@ -1170,11 +1172,14 @@ class FunctionSamples {
   /// tree nodes in the profile.
   ///
   /// \returns the FunctionSamples pointer to the inlined instance.
-  /// If \p Remapper is not nullptr, it will be used to find matching
-  /// FunctionSamples with not exactly the same but equivalent name.
+  /// If \p Remapper or \p FuncNameToProfNameMap is not nullptr, it will be used
+  /// to find matching FunctionSamples with not exactly the same but equivalent
+  /// name.
   const FunctionSamples *findFunctionSamples(
       const DILocation *DIL,
-      SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
+      SampleProfileReaderItaniumRemapper *Remapper = nullptr,
+      const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+          *FuncNameToProfNameMap = nullptr) const;
 
   static bool ProfileIsProbeBased;
 
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index c8d40d0ca6992..2b987b356974f 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -70,12 +70,18 @@ class SampleProfileMatcher {
   // matching result.
   std::unordered_map<std::pair<const Function *, FunctionId>, bool,
                      FuncToProfileNameMapHash>
-      FuncToProfileNameMap;
+      FuncProfileMatchCache;
   // The new functions found by the call graph matching. The map's key is the
-  // old profile name and value is the new(renamed) function.
-  HashKeyMap<std::unordered_map, FunctionId, Function *> ProfileNameToFuncMap;
+  // the new(renamed) function pointer and the value is old(unused) profile
+  // name.
+  std::unordered_map<Function *, FunctionId> FuncToProfileNameMap;
 
-  // A map pointer to the SymbolMap in the SampleProfileLoader, which stores all
+  // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
+  // which maps the function name to the matched profile name. This is used
+  // for sample loader to look up profile using the new name.
+  HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FuncNameToProfNameMap;
+
+  // A map pointer to the SymbolMap in SampleProfileLoader, which stores all
   // the original matched symbols before the matching. this is to determine if
   // the profile is unused(to be matched) or not.
   HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap;
@@ -114,9 +120,12 @@ class SampleProfileMatcher {
       Module &M, SampleProfileReader &Reader, LazyCallGraph &CG,
       const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase,
       HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
-      std::shared_ptr<ProfileSymbolList> PSL)
+      std::shared_ptr<ProfileSymbolList> PSL,
+      HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+          &FuncNameToProfNameMap)
       : M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager),
-        LTOPhase(LTOPhase), SymbolMap(&SymMap), PSL(PSL) {};
+        LTOPhase(LTOPhase), FuncNameToProfNameMap(&FuncNameToProfNameMap),
+        SymbolMap(&SymMap), PSL(PSL) {};
   void runOnModule();
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
@@ -125,7 +134,6 @@ class SampleProfileMatcher {
     freeContainer(FlattenedProfiles);
 
     freeContainer(NewIRFunctions);
-    freeContainer(ProfileNameToFuncMap);
     freeContainer(FuncToProfileNameMap);
   }
 
@@ -148,7 +156,6 @@ class SampleProfileMatcher {
                              const AnchorMap &ProfileAnchors,
                              AnchorList &FilteredIRAnchorsList,
                              AnchorList &FilteredProfileAnchorList);
-  std::vector<Function *> buildTopDownFuncOrder();
   void runOnFunction(Function &F);
   void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
   void findProfileAnchors(const FunctionSamples &FS,
@@ -177,6 +184,9 @@ class SampleProfileMatcher {
            State == MatchState::RemovedMatch;
   };
 
+  void countCallGraphRecoveredSamples(
+      const FunctionSamples &FS,
+      std::unordered_set<FunctionId> &MatchedUnusedProfile);
   // Count the samples of checksum mismatched function for the top-level
   // function and all inlinees.
   void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel);
@@ -233,8 +243,6 @@ class SampleProfileMatcher {
   // which are supposed to be new functions. We use them as the targets for
   // call graph matching.
   void findNewIRFunctions();
-  void updateProfilesAndSymbolMap();
-  void updateProfileWithNewName(FunctionSamples &FuncProfile);
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 7ca64df32c3aa..32bf7b8c96be3 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -157,8 +157,8 @@ static inline bool skipProfileForFunction(const Function &F) {
 }
 
 static inline void
-buildBottomUpFuncOrder(LazyCallGraph &CG,
-                       std::vector<Function *> &FunctionOrderList) {
+buildTopDownFuncOrder(LazyCallGraph &CG,
+                      std::vector<Function *> &FunctionOrderList) {
   CG.buildRefSCCs();
   for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
     for (LazyCallGraph::SCC &C : RC) {
@@ -169,6 +169,7 @@ buildBottomUpFuncOrder(LazyCallGraph &CG,
       }
     }
   }
+  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
 }
 
 template <typename FT> class SampleProfileLoaderBaseImpl {
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 59fa71899ed47..472882418c769 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -236,7 +236,9 @@ LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,
 }
 
 const FunctionSamples *FunctionSamples::findFunctionSamples(
-    const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const {
+    const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper,
+    const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+        *FuncNameToProfNameMap) const {
   assert(DIL);
   SmallVector<std::pair<LineLocation, StringRef>, 10> S;
 
@@ -256,7 +258,8 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
     return this;
   const FunctionSamples *FS = this;
   for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
-    FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper);
+    FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper,
+                                   FuncNameToProfNameMap);
   }
   return FS;
 }
@@ -277,19 +280,32 @@ void FunctionSamples::findAllNames(DenseSet<FunctionId> &NameSet) const {
 
 const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
     const LineLocation &Loc, StringRef CalleeName,
-    SampleProfileReaderItaniumRemapper *Remapper) const {
+    SampleProfileReaderItaniumRemapper *Remapper,
+    const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+        *FuncNameToProfNameMap) const {
   CalleeName = getCanonicalFnName(CalleeName);
 
-  auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
-  if (iter == CallsiteSamples.end())
+  auto I = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
+  if (I == CallsiteSamples.end())
     return nullptr;
-  auto FS = iter->second.find(getRepInFormat(CalleeName));
-  if (FS != iter->second.end())
+  auto FS = I->second.find(getRepInFormat(CalleeName));
+  if (FS != I->second.end())
     return &FS->second;
+
+  if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
+    auto R = FuncNameToProfNameMap->find(FunctionId(CalleeName));
+    if (R != FuncNameToProfNameMap->end()) {
+      CalleeName = R->second.stringRef();
+      auto FS = I->second.find(getRepInFormat(CalleeName));
+      if (FS != I->second.end())
+        return &FS->second;
+    }
+  }
+
   if (Remapper) {
     if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {
-      auto FS = iter->second.find(getRepInFormat(*NameInProfile));
-      if (FS != iter->second.end())
+      auto FS = I->second.find(getRepInFormat(*NameInProfile));
+      if (FS != I->second.end())
         return &FS->second;
     }
   }
@@ -300,7 +316,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
     return nullptr;
   uint64_t MaxTotalSamples = 0;
   const FunctionSamples *R = nullptr;
-  for (const auto &NameFS : iter->second)
+  for (const auto &NameFS : I->second)
     if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
       MaxTotalSamples = NameFS.second.getTotalSamples();
       R = &NameFS.second;
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 78fa2e7f3635e..b270ee3a66adc 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -134,6 +134,10 @@ cl::opt<bool> SalvageStaleProfile(
     "salvage-stale-profile", cl::Hidden, cl::init(false),
     cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
              "location for sample profile query."));
+cl::opt<bool>
+    SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false),
+                         cl::desc("Salvage unused profile by matching with new "
+                                  "functions on call graph."));
 
 cl::opt<bool> ReportProfileStaleness(
     "report-profile-staleness", cl::Hidden, cl::init(false),
@@ -528,6 +532,10 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
   /// is one-to-one mapping.
   HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;
 
+  /// Map from function name to profile name generated by call-graph based
+  /// profile fuzzy matching(--salvage-unused-profile).
+  HashKeyMap<std::unordered_map, FunctionId, FunctionId> FuncNameToProfNameMap;
+
   std::function<AssumptionCache &(Function &)> GetAC;
   std::function<TargetTransformInfo &(Function &)> GetTTI;
   std::function<const TargetLibraryInfo &(Function &)> GetTLI;
@@ -698,7 +706,8 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
     return nullptr;
 
   return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
-                                   CalleeName, Reader->getRemapper());
+                                   CalleeName, Reader->getRemapper(),
+                                   &FuncNameToProfNameMap);
 }
 
 /// Returns a vector of FunctionSamples that are the indirect call targets
@@ -776,8 +785,8 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
     if (FunctionSamples::ProfileIsCS)
       it.first->second = ContextTracker->getContextSamplesFor(DIL);
     else
-      it.first->second =
-          Samples->findFunctionSamples(DIL, Reader->getRemapper());
+      it.first->second = Samples->findFunctionSamples(
+          DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
   }
   return it.first->second;
 }
@@ -1936,10 +1945,9 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
       }
       ++CGI;
     }
+    std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
   } else
-    buildBottomUpFuncOrder(CG, FunctionOrderList);
-
-  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+    buildTopDownFuncOrder(CG, FunctionOrderList);
 
   LLVM_DEBUG({
     dbgs() << "Function processing order:\n";
@@ -2069,7 +2077,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
     MatchingManager = std::make_unique<SampleProfileMatcher>(
-        M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL);
+        M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
+        FuncNameToProfNameMap);
   }
 
   return true;
@@ -2184,14 +2193,18 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
       }
     }
   }
-  assert(SymbolMap.count(FunctionId()) == 0 &&
-         "No empty StringRef should be added in SymbolMap");
 
+  // Stale profile matching.
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
     MatchingManager->runOnModule();
     MatchingManager->clearMatchingData();
   }
+  assert(SymbolMap.count(FunctionId()) == 0 &&
+         "No empty StringRef should be added in SymbolMap");
+  assert((SalvageUnusedProfile || FuncNameToProfNameMap.empty()) &&
+         "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
+         "not enabled");
 
   bool retval = false;
   for (auto *F : buildFunctionOrder(M, CG)) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index a328ebc9c9992..220f37ef7e963 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -20,11 +20,6 @@ using namespace sampleprof;
 
 #define DEBUG_TYPE "sample-profile-matcher"
 
-static cl::opt<bool> SalvageUnusedProfile(
-    "salvage-unused-profile", cl::Hidden, cl::init(false),
-    cl::desc(
-        "Salvage unused profile by matching new functions on call graph."));
-
 static cl::opt<unsigned> FuncProfileSimilarityThreshold(
     "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
     cl::desc("Consider a profile matches a function if the similarity of their "
@@ -41,6 +36,7 @@ static cl::opt<unsigned> MinCallCountForCGMatching(
              "run stale profile call graph matching."));
 
 extern cl::opt<bool> SalvageStaleProfile;
+extern cl::opt<bool> SalvageUnusedProfile;
 extern cl::opt<bool> PersistProfileStaleness;
 extern cl::opt<bool> ReportProfileStaleness;
 
@@ -398,6 +394,16 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   // the maximum number of callsites, we merge the function profiles from all
   // contexts, aka, the flattened profile to find profile anchors.
   const auto *FSFlattened = getFlattenedSamplesFor(F);
+  if (SalvageUnusedProfile && !FSFlattened) {
+    // Apply the matching in place to find the new function's matched profile.
+    // TODO: For extended profile format, if a function profile is unused and
+    // it's top-level, even if the profile is matched, it's not found in the
+    // profile. This is because sample reader only read the used profile at the
+    // beginning, we need to support loading the profile on-demand in future.
+    auto R = FuncToProfileNameMap.find(&F);
+    if (R != FuncToProfileNameMap.end())
+      FSFlattened = getFlattenedSamplesFor(R->second);
+  }
   if (!FSFlattened)
     return;
 
@@ -415,32 +421,31 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   if (ReportProfileStaleness || PersistProfileStaleness)
     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
 
-  if (SalvageStaleProfile) {
-    // For probe-based profiles, run matching only when profile checksum is
-    // mismatched.
-    bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
-                            !ProbeManager->profileIsValid(F, *FSFlattened);
-    bool RunCFGMatching =
-        !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
-    bool RunCGMatching = SalvageUnusedProfile;
-    // For imported functions, the checksum metadata(pseudo_probe_desc) are
-    // dropped, so we leverage function attribute(profile-checksum-mismatch) to
-    // transfer the info: add the attribute during pre-link phase and check it
-    // during post-link phase(see "profileIsValid").
-    if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
-      F.addFnAttr("profile-checksum-mismatch");
-
-    // The matching result will be saved to IRToProfileLocationMap, create a
-    // new map for each function.
-    auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
-    runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
-                            IRToProfileLocationMap, RunCFGMatching,
-                            RunCGMatching);
-    // Find and update callsite match states after matching.
-    if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
-      recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
-                                &IRToProfileLocationMap);
-  }
+  if (!SalvageStaleProfile)
+    return;
+  // For probe-based profiles, run matching only when profile checksum is
+  // mismatched.
+  bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
+                          !ProbeManager->profileIsValid(F, *FSFlattened);
+  bool RunCFGMatching =
+      !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
+  bool RunCGMatching = SalvageUnusedProfile;
+  // For imported functions, the checksum metadata(pseudo_probe_desc) are
+  // dropped, so we leverage function attribute(profile-checksum-mismatch) to
+  // transfer the info: add the attribute during pre-link phase and check it
+  // during post-link phase(see "profileIsValid").
+  if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
+    F.addFnAttr("profile-checksum-mismatch");
+
+  // The matching result will be saved to IRToProfileLocationMap, create a
+  // new map for each function.
+  auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
+  runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
+                          RunCFGMatching, RunCGMatching);
+  // Find and update callsite match states after matching.
+  if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
+    recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
+                              &IRToProfileLocationMap);
 }
 
 void SampleProfileMatcher::recordCallsiteMatchStates(
@@ -599,13 +604,30 @@ void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
   }
 }
 
+void SampleProfileMatcher::countCallGraphRecoveredSamples(
+    const FunctionSamples &FS,
+    std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
+  if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
+    NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
+    return;
+  }
+
+  for (const auto &CM : FS.getCallsiteSamples()) {
+    for (const auto &CS : CM.second) {
+      countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
+    }
+  }
+}
+
 void SampleProfileMatcher::computeAndReportProfileStaleness() {
   if (!ReportProfileStaleness && !PersistProfileStaleness)
     return;
 
+  std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
   if (SalvageUnusedProfile) {
-    for (const auto &I : ProfileNameToFuncMap) {
-      if (GlobalValue::isAvailableExternallyLinkage(I.second->getLinkage()))
+    for (const auto &I : FuncToProfileNameMap) {
+      CallGraphRecoveredProfiles.insert(I.second);
+      if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
         continue;
       NumCallGraphRecoveredProfiledFunc++;
     }
@@ -625,6 +647,9 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
     TotalProfiledFunc++;
     TotalFunctionSamples += FS->getTotalSamples();
 
+    if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
+      countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
+
     // Checksum mismatch is only used in pseudo-probe mode.
     if (FunctionSamples::ProfileIsProbeBased)
       countMismatchedFuncSamples(*FS, true);
@@ -741,9 +766,9 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
   assert(FSFlattened && "Flattened profile sample is null");
-  // Similarity check may not be reliable if the function is tiny, we use the
-  // number of basic block as a proxy for the function complexity and skip the
-  // matching if it's too small.
+  // The check for similarity or checksum may not be reliable if the function is
+  // tiny, we use the number of basic block as a proxy for the function
+  // complexity and skip the matching if it's too small.
   if (IRFunc.size() < MinFuncCountForCGMatching ||
       FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
     return false;
@@ -781,8 +806,9 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   // Don't recursively match the callee function to avoid infinite matching,
   // callee functions will be handled later since it's processed in top-down
   // order .
-  LocToLocMap MatchedAnchors = longestCommonSequence(
-      FilteredIRAnchorsList, FilteredProfileAnchorList, false);
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
+                            false /* Match unused functions */);
 
   Similarity =
       static_cast<float>(MatchedAnchors.size()) * 2 /
@@ -801,17 +827,17 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
                                                   const FunctionId &ProfFunc,
                                                   bool FindMatchedProfileOnly) {
-  auto R = FuncToProfileNameMap.find({&IRFunc, ProfFunc});
-  if (R != FuncToProfileNameMap.end())
+  auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
+  if (R != FuncProfileMatchCache.end())
     return R->second;
 
   if (FindMatchedProfileOnly)
     return false;
 
   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
-  FuncToProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
+  FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
   if (Matched) {
-    ProfileNameToFuncMap[ProfFunc] = &IRFunc;
+    FuncToProfileNameMap[&IRFunc] = ProfFunc;
     LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
                       << " matches profile:" << ProfFunc << "\n");
   }
@@ -819,94 +845,6 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
   return Matched;
 }
 
-void SampleProfileMatcher::updateProfileWithNewName(
-    FunctionSamples &FuncProfile) {
-  auto FindNewMatch =
-      [&](const FunctionId &ProfileName,
-          std::vector<std::pair<FunctionId, FunctionId>> &MatchResult,
-          [[maybe_unused]] const FunctionId &CallerName) {
-        auto P = ProfileNameToFuncMap.find(ProfileName);
-        if (P != ProfileNameToFuncMap.end()) {
-          FunctionId IRCallee(P->second->getName());
-          assert(IRCallee != ProfileName &&
-                 "New callee symbol is not a new function");
-          LLVM_DEBUG(dbgs()
-                     << "Profile name is updated from " << ProfileName << " to "
-                     << IRCallee << " under caller: " << CallerName << "\n");
-          MatchResult.emplace_back(IRCallee, ProfileName);
-        }
-      };
-
-  // A list of new function to old function pair.
-  std::vector<std::pair<FunctionId, FunctionId>> MatchResult;
-
-  // Update non-inline callees.
-  for (auto &BS : const_cast<BodySampleMap &>(FuncProfile.getBodySamples())) {
-    SampleRecord &SR = BS.second;
-    MatchResult.clear();
-    for (const auto &TS : SR.getCallTargets())
-      FindNewMatch(TS.first, MatchResult, FuncProfile.getFunction());
-    // Update the CallTargetMap.
-    for (const auto &P : MatchResult) {
-      uint64_t Samples = SR.removeCalledTarget(P.second);
-      SR.addCalledTarget(P.first, Samples);
-      if (ReportProfileStaleness || PersistProfileStaleness)
-        NumCallGraphRecoveredFuncSamples += Samples;
-    }
-  }
-
-  // Update inline callees recursively.
-  for (auto &CM :
-       const_cast<CallsiteSampleMap &>(FuncProfile.getCallsiteSamples())) {
-    auto &CalleeMap = CM.second;
-    MatchResult.clear();
-    for (auto &CS : CalleeMap) {
-      FindNewMatch(CS.second.getFunction(), MatchResult,
-                   FuncProfile.getFunction());
-      updateProfileWithNewName(CS.second);
-    }
-    // Update the CalleeMap using the new name and remove the old entry.
-    for (auto &P : MatchResult) {
-      assert(P.first != P.second &&
-             "Renamed function name should be different from the old map key");
-      FunctionSamples &FS = CalleeMap[P.second];
-      if (ReportProfileStaleness || PersistProfileStaleness)
-        NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
-      FS.setFunction(P.first);
-      CalleeMap[P.first] = FS;
-      CalleeMap.erase(P.second);
-    }
-  }
-}
-
-void SampleProfileMatcher::updateProfilesAndSymbolMap() {
-  if (ProfileNameToFuncMap.empty())
-    return;
-  for (auto &P : Reader.getProfiles())
-    updateProfileWithNewName(P.second);
-
-  // Add the new function to the SymbolMap, which will be used in
-  // SampleLoader.
-  for (auto &I : ProfileNameToFuncMap) {
-    assert(I.second && "New function is null");
-    SymbolMap->emplace(FunctionId(I.second->getName()), I.second);
-  }
-}
-
-std::vector<Function *> SampleProfileMatcher::buildTopDownFuncOrder() {
-  std::vector<Function *> FunctionOrderList;
-  FunctionOrderList.reserve(M.size());
-  ::buildBottomUpFuncOrder(CG, FunctionOrderList);
-  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
-  LLVM_DEBUG({
-    dbgs() << "Function processing order:\n";
-    for (auto F : FunctionOrderList) {
-      dbgs() << F->getName() << "\n";
-    }
-  });
-  return FunctionOrderList;
-}
-
 void SampleProfileMatcher::runOnModule() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
@@ -915,15 +853,22 @@ void SampleProfileMatcher::runOnModule() {
 
   // Process the matching in top-down order so that the caller matching result
   // can be used to the callee matching.
-  for (auto *F : buildTopDownFuncOrder()) {
+  std::vector<Function *> TopDownFunctionList;
+  TopDownFunctionList.reserve(M.size());
+  buildTopDownFuncOrder(CG, TopDownFunctionList);
+  for (auto *F : TopDownFunctionList) {
     if (skipProfileForFunction(*F))
       continue;
     runOnFunction(*F);
   }
 
-  // Update the profile map and symbol map with the new function name.
+  // Update the data in SampleLoader.
   if (SalvageUnusedProfile)
-    updateProfilesAndSymbolMap();
+    for (auto &I : FuncToProfileNameMap) {
+      assert(I.first && "New function is null");
+      FuncNameToProfNameMap->emplace(FunctionId(I.first->getName()), I.second);
+      SymbolMap->emplace(I.second, I.first);
+    }
 
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
index 6ff9cd050dd82..78ff0f322dd0f 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof
@@ -7,7 +7,7 @@ main:47:0
  8: 2
  9: 0
  5: foo:24
-  1: 3
+  1: 4
   2: 3 bar:3
   4: 3 bar:3
   5: 1 mismatch:1
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
index 41ef897c09f29..a549812f46ef6 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll
@@ -17,15 +17,7 @@
 ; CHECK: Run stale profile matching for baz
 ; CHECK: Run stale profile matching for bar
 
-; Verify profile new name update.
-; CHECK-DAG: Profile name is updated from foo to new_foo under caller: test_noninline
-; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
-; CHECK-DAG: Profile name is updated from foo to new_foo under caller: main
-; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
-; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: baz
-; CHECK-DAG: Profile name is updated from block_only to new_block_only under caller: cold_func
-
-; CHECK: (2/3) of functions' profile are matched and (78/81) of samples are reused by call graph matching.
+; CHECK: (2/3) of functions' profile are matched and (55/81) of samples are reused by call graph matching.
 
 ; Verify the matched function is updated correctly by checking the inlining.
 ; CHECK: 'new_foo' inlined into 'main' to match profiling context with (cost=110, threshold=3000) at callsite main:2:7.5;
@@ -33,7 +25,7 @@
 ; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ new_foo:2:3.3 @ main:2:7.5;
 ; CHECK: 'new_foo' inlined into 'test_noninline' to match profiling context with (cost=110, threshold=3000) at callsite test_noninline:1:3.2;
 
-; CHECK: !"NumCallGraphRecoveredProfiledFunc", i64 2, !"NumCallGraphRecoveredFuncSamples", i64 78
+; CHECK: !"NumCallGraphRecoveredProfiledFunc", i64 2, !"NumCallGraphRecoveredFuncSamples", i64 55
 
 ; TINY-FUNC-NOT: Function:new_foo matches profile:foo
 ; TINY-FUNC-NOT: Function:new_block_only matches profile:block_only

>From 8c36fcc4c8d6cac5611116ccfeef808b4de98b2e Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 21 Jun 2024 17:27:47 -0700
Subject: [PATCH 18/22] add test for recursive case

---
 .../Transforms/IPO/SampleProfileMatcher.cpp   |   6 +-
 ...robe-stale-profile-renaming-recursive.prof |  11 ++
 ...-probe-stale-profile-renaming-recursive.ll | 150 ++++++++++++++++++
 3 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll

diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 220f37ef7e963..62c08aa8d737f 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -866,7 +866,11 @@ void SampleProfileMatcher::runOnModule() {
   if (SalvageUnusedProfile)
     for (auto &I : FuncToProfileNameMap) {
       assert(I.first && "New function is null");
-      FuncNameToProfNameMap->emplace(FunctionId(I.first->getName()), I.second);
+      FunctionId FuncName(I.first->getName());
+      FuncNameToProfNameMap->emplace(FuncName, I.second);
+      // We need to remove the old entry to avoid duplicating the function
+      // processing.
+      SymbolMap->erase(FuncName);
       SymbolMap->emplace(I.second, I.first);
     }
 
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof
new file mode 100644
index 0000000000000..edb1404c1d517
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof
@@ -0,0 +1,11 @@
+main:42:0
+ 1: 0
+ 6: 2
+ 7: 0
+ 5: foo:40
+  1: 20
+  2: bar:20
+   1: 20
+   !CFGChecksum: 4294967295
+  !CFGChecksum: 281479271677951
+ !CFGChecksum: 281582264815352
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll
new file mode 100644
index 0000000000000..d9db804b56364
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll
@@ -0,0 +1,150 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -persist-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s
+
+; CHECK: Run stale profile matching for main
+; CHECK: Function:foo_new matches profile:foo
+; CHECK: Run stale profile matching for foo_new
+; CHECK: Function:bar_new matches profile:bar
+; CHECK: Run stale profile matching for bar_new
+
+; CHECK: Function processing order:
+; CHECK: main
+; CHECK: foo_new
+; CHECK: bar_new
+
+; CHECK: 'foo_new' inlined into 'main' to match profiling context with (cost=0, threshold=3000) at callsite main:2:7;
+; CHECK: 'bar_new' inlined into 'main' to match profiling context with (cost=-15, threshold=3000) at callsite foo_new:1:3 @ main:2:7;
+
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: nounwind uwtable
+define dso_local void @bar_new() #0 !dbg !18 {
+entry:
+  call void @llvm.pseudoprobe(i64 8236371237083957767, i64 1, i32 0, i64 -1), !dbg !21
+  %0 = load volatile i32, ptr @x, align 4, !dbg !21, !tbaa !22
+  %inc = add nsw i32 %0, 1, !dbg !21
+  store volatile i32 %inc, ptr @x, align 4, !dbg !21, !tbaa !22
+  ret void, !dbg !26
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local void @foo_new() #0 !dbg !27 {
+entry:
+  call void @llvm.pseudoprobe(i64 -837213161392124280, i64 1, i32 0, i64 -1), !dbg !28
+  call void @bar_new(), !dbg !29
+  ret void, !dbg !31
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #0 !dbg !32 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !38
+    #dbg_value(i32 0, !36, !DIExpression(), !39)
+  br label %for.cond, !dbg !40
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !41
+    #dbg_value(i32 %i.0, !36, !DIExpression(), !39)
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !42
+  %cmp = icmp slt i32 %i.0, 1000000, !dbg !44
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !45
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !46
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !47
+  ret i32 0, !dbg !47
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !48
+  call void @foo_new(), !dbg !50
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !52
+  %inc = add nsw i32 %i.0, 1, !dbg !52
+    #dbg_value(i32 %inc, !36, !DIExpression(), !39)
+  br label %for.cond, !dbg !53, !llvm.loop !54
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
+
+attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test.c", directory: "/home/", checksumkind: CSK_MD5, checksum: "48867dcc5b42e2991317c585b7545860")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 19.0.0"}
+!15 = !{i64 8236371237083957767, i64 4294967295, !"bar_new"}
+!16 = !{i64 -837213161392124280, i64 281479271677951, !"foo_new"}
+!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
+!18 = distinct !DISubprogram(name: "bar_new", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!19 = !DISubroutineType(types: !20)
+!20 = !{null}
+!21 = !DILocation(line: 4, column: 4, scope: !18)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"int", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 5, column: 1, scope: !18)
+!27 = distinct !DISubprogram(name: "foo_new", scope: !3, file: !3, line: 7, type: !19, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!28 = !DILocation(line: 8, column: 3, scope: !27)
+!29 = !DILocation(line: 8, column: 3, scope: !30)
+!30 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
+!31 = !DILocation(line: 9, column: 1, scope: !27)
+!32 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !33, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !35)
+!33 = !DISubroutineType(types: !34)
+!34 = !{!6}
+!35 = !{!36}
+!36 = !DILocalVariable(name: "i", scope: !37, file: !3, line: 12, type: !6)
+!37 = distinct !DILexicalBlock(scope: !32, file: !3, line: 12, column: 3)
+!38 = !DILocation(line: 12, column: 12, scope: !37)
+!39 = !DILocation(line: 0, scope: !37)
+!40 = !DILocation(line: 12, column: 8, scope: !37)
+!41 = !DILocation(line: 12, scope: !37)
+!42 = !DILocation(line: 12, column: 19, scope: !43)
+!43 = distinct !DILexicalBlock(scope: !37, file: !3, line: 12, column: 3)
+!44 = !DILocation(line: 12, column: 21, scope: !43)
+!45 = !DILocation(line: 12, column: 3, scope: !37)
+!46 = !DILocation(line: 0, scope: !32)
+!47 = !DILocation(line: 15, column: 1, scope: !32)
+!48 = !DILocation(line: 13, column: 7, scope: !49)
+!49 = distinct !DILexicalBlock(scope: !43, file: !3, line: 12, column: 41)
+!50 = !DILocation(line: 13, column: 7, scope: !51)
+!51 = !DILexicalBlockFile(scope: !49, file: !3, discriminator: 455082031)
+!52 = !DILocation(line: 12, column: 37, scope: !43)
+!53 = !DILocation(line: 12, column: 3, scope: !43)
+!54 = distinct !{!54, !45, !55, !56}
+!55 = !DILocation(line: 14, column: 3, scope: !37)
+!56 = !{!"llvm.loop.mustprogress"}

>From dc4d4f984e7f2867a79be0d700aef83c86464610 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 24 Jun 2024 15:59:55 -0700
Subject: [PATCH 19/22] renaming and fix comments

---
 llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h | 9 +++++----
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp        | 8 ++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 2b987b356974f..ac1b29268d9b5 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -224,7 +224,8 @@ class SampleProfileMatcher {
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap,
                                bool RunCFGMatching, bool RunCGMatching);
-  Function *findIfFunctionIsNew(const FunctionId &IRFuncName);
+  // Return the function if it doesn't have profile, otherwise return nullptr.
+  Function *functionHasProfile(const FunctionId &IRFuncName);
   bool isProfileUnused(const FunctionId &ProfileFuncName);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
@@ -235,14 +236,14 @@ class SampleProfileMatcher {
                               const FunctionId &ProfileFuncName,
                               bool FindMatchedProfileOnly);
   // Determine if the function matches profile by computing a similarity ratio
-  // between two callsite anchors extracted from function and profile. If it's
-  // above the threshold, the function matches the profile.
+  // between two sequences of callsite anchors extracted from function and
+  // profile. If it's above the threshold, the function matches the profile.
   bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc,
                               bool FindMatchedProfileOnly);
   // Find functions that don't show in the profile or profile symbol list,
   // which are supposed to be new functions. We use them as the targets for
   // call graph matching.
-  void findNewIRFunctions();
+  void findFunctionsWithoutProfile();
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 62c08aa8d737f..653e303c792c7 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -144,7 +144,7 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
 }
 
 Function *
-SampleProfileMatcher::findIfFunctionIsNew(const FunctionId &IRFuncName) {
+SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName) {
   auto R = NewIRFunctions.find(IRFuncName);
   if (R == NewIRFunctions.end())
     return nullptr;
@@ -166,7 +166,7 @@ bool SampleProfileMatcher::functionMatchesProfile(
 
   // If IR function doesn't have profile and the profile is unused, try
   // matching them.
-  Function *IRFunc = findIfFunctionIsNew(IRFuncName);
+  Function *IRFunc = functionHasProfile(IRFuncName);
   if (!IRFunc || !isProfileUnused(ProfileFuncName))
     return false;
 
@@ -720,7 +720,7 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
   }
 }
 
-void SampleProfileMatcher::findNewIRFunctions() {
+void SampleProfileMatcher::findFunctionsWithoutProfile() {
   // TODO: Support MD5 profile.
   if (FunctionSamples::UseMD5)
     return;
@@ -849,7 +849,7 @@ void SampleProfileMatcher::runOnModule() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
   if (SalvageUnusedProfile)
-    findNewIRFunctions();
+    findFunctionsWithoutProfile();
 
   // Process the matching in top-down order so that the caller matching result
   // can be used to the callee matching.

>From 7b458b4333ee078f7b66398a80ba61231700c74a Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 24 Jun 2024 16:16:32 -0700
Subject: [PATCH 20/22] rename to FunctionsWithoutProfile

---
 llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h | 5 +++--
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp        | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index ac1b29268d9b5..9a32e146a4a86 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -87,7 +87,8 @@ class SampleProfileMatcher {
   HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap;
 
   // The new functions from IR.
-  HashKeyMap<std::unordered_map, FunctionId, Function *> NewIRFunctions;
+  HashKeyMap<std::unordered_map, FunctionId, Function *>
+      FunctionsWithoutProfile;
 
   // Pointer to the Profile Symbol List in the reader.
   std::shared_ptr<ProfileSymbolList> PSL;
@@ -133,7 +134,7 @@ class SampleProfileMatcher {
     freeContainer(FuncCallsiteMatchStates);
     freeContainer(FlattenedProfiles);
 
-    freeContainer(NewIRFunctions);
+    freeContainer(FunctionsWithoutProfile);
     freeContainer(FuncToProfileNameMap);
   }
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index f8579a1de9202..2bf98c949d9be 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -151,8 +151,8 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
 
 Function *
 SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName) {
-  auto R = NewIRFunctions.find(IRFuncName);
-  if (R == NewIRFunctions.end())
+  auto R = FunctionsWithoutProfile.find(IRFuncName);
+  if (R == FunctionsWithoutProfile.end())
     return nullptr;
   return R->second;
 }
@@ -770,7 +770,7 @@ void SampleProfileMatcher::findFunctionsWithoutProfile() {
 
     LLVM_DEBUG(dbgs() << "Function " << CanonFName
                       << " is not in profile or profile symbol list.\n");
-    NewIRFunctions[FunctionId(CanonFName)] = &F;
+    FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
   }
 }
 

>From 15e8d0c5df2594c2f1056c9d6d5836c1246f954d Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 25 Jun 2024 10:25:14 -0700
Subject: [PATCH 21/22] refactor functionHasProfile

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  5 +++--
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 9a32e146a4a86..f2ec7c08384b4 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -225,8 +225,9 @@ class SampleProfileMatcher {
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap,
                                bool RunCFGMatching, bool RunCGMatching);
-  // Return the function if it doesn't have profile, otherwise return nullptr.
-  Function *functionHasProfile(const FunctionId &IRFuncName);
+  // If the function doesn't have profile, return the pointer to the function.
+  bool functionHasProfile(const FunctionId &IRFuncName,
+                          Function *&FuncWithoutProfile);
   bool isProfileUnused(const FunctionId &ProfileFuncName);
   bool functionMatchesProfileHelper(const Function &IRFunc,
                                     const FunctionId &ProfFunc);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 2bf98c949d9be..f0cc38f4c7b25 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -149,17 +149,17 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
   }
 }
 
-Function *
-SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName) {
+bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
+                                              Function *&FuncWithoutProfile) {
+  FuncWithoutProfile = nullptr;
   auto R = FunctionsWithoutProfile.find(IRFuncName);
-  if (R == FunctionsWithoutProfile.end())
-    return nullptr;
-  return R->second;
+  if (R != FunctionsWithoutProfile.end())
+    FuncWithoutProfile = R->second;
+  return !FuncWithoutProfile;
 }
 
 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
-  auto F = SymbolMap->find(ProfileFuncName);
-  return F == SymbolMap->end();
+  return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
 }
 
 bool SampleProfileMatcher::functionMatchesProfile(
@@ -172,8 +172,9 @@ bool SampleProfileMatcher::functionMatchesProfile(
 
   // If IR function doesn't have profile and the profile is unused, try
   // matching them.
-  Function *IRFunc = functionHasProfile(IRFuncName);
-  if (!IRFunc || !isProfileUnused(ProfileFuncName))
+  Function *IRFunc = nullptr;
+  if (functionHasProfile(IRFuncName, IRFunc) ||
+      !isProfileUnused(ProfileFuncName))
     return false;
 
   assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&

>From 47c18165a67c4219f11cbca784ee0a981bae8151 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 8 Jul 2024 16:15:29 -0700
Subject: [PATCH 22/22] fix use-after-free

---
 llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h | 5 +++--
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp        | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index f2ec7c08384b4..a67f158433391 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -131,9 +131,10 @@ class SampleProfileMatcher {
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
     // will be used for sample loader.
+    // Do not clear FlattenedProfiles as it contains function names referenced
+    // by FuncNameToProfNameMap. Clearing this memory could lead to a
+    // use-after-free error.
     freeContainer(FuncCallsiteMatchStates);
-    freeContainer(FlattenedProfiles);
-
     freeContainer(FunctionsWithoutProfile);
     freeContainer(FuncToProfileNameMap);
   }
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index f0cc38f4c7b25..312672e56b017 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,7 +782,8 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   float Similarity = 0.0;
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
-  assert(FSFlattened && "Flattened profile sample is null");
+  if (!FSFlattened)
+    return false;
   // The check for similarity or checksum may not be reliable if the function is
   // tiny, we use the number of basic block as a proxy for the function
   // complexity and skip the matching if it's too small.



More information about the llvm-commits mailing list