[llvm] [SampleFDO] Read call-graph matching recovered top-level function profile (PR #101053)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 12:02:36 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/101053

>From 070702c9be2fb437b0765532c03e98c642951906 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 29 Jul 2024 10:17:46 -0700
Subject: [PATCH 1/8] [SampleFDO] Read top-level functions recovered by
 call-graph matching

---
 .../llvm/ProfileData/SampleProfReader.h       |  47 ++++
 .../Transforms/IPO/SampleProfileMatcher.h     |   1 +
 llvm/lib/ProfileData/SampleProfReader.cpp     | 224 +++++++++------
 .../Transforms/IPO/SampleProfileMatcher.cpp   |  64 ++++-
 ...seudo-probe-stale-profile-toplev-func.prof |  23 ++
 .../pseudo-probe-stale-profile-toplev-func.ll | 258 ++++++++++++++++++
 6 files changed, 521 insertions(+), 96 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdc6525308d2..b124233a02d11c 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -392,6 +392,11 @@ class SampleProfileReader {
   /// which doesn't support loading function profiles on demand.
   virtual bool collectFuncsFromModule() { return false; }
 
+  virtual std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+                                       SampleProfileMap &Profiles) {
+    return sampleprof_error::not_implemented;
+  };
+
   /// Print all the profiles on stream \p OS.
   void dump(raw_ostream &OS = dbgs());
 
@@ -413,6 +418,16 @@ class SampleProfileReader {
     if (It != Profiles.end())
       return &It->second;
 
+    if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
+      auto R = FuncNameToProfNameMap->find(FunctionId(Fname));
+      if (R != FuncNameToProfNameMap->end()) {
+        Fname = R->second.stringRef();
+        auto It = Profiles.find(FunctionId(Fname));
+        if (It != Profiles.end())
+          return &It->second;
+      }
+    }
+
     if (Remapper) {
       if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
         auto It = Profiles.find(FunctionId(*NameInProfile));
@@ -494,6 +509,11 @@ class SampleProfileReader {
 
   void setModule(const Module *Mod) { M = Mod; }
 
+  void setFuncNameToProfNameMap(
+      HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FPMap) {
+    FuncNameToProfNameMap = FPMap;
+  }
+
 protected:
   /// Map every function to its associated profile.
   ///
@@ -522,6 +542,21 @@ class SampleProfileReader {
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
 
+  // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
+  // which maps the function name to the matched profile name. This is used
+  // for sample loader to look up profile using the new name.
+  HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+      *FuncNameToProfNameMap = nullptr;
+
+  // A map from a function's context hash to its meta data section range, used
+  // for on-demand read function profile metadata.
+  std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
+      FContextToMetaDataSecRange;
+
+  std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+
+  bool ProfileHasAttribute = false;
+
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
@@ -621,6 +656,8 @@ class SampleProfileReaderBinary : public SampleProfileReader {
 
   /// Read the next function profile instance.
   std::error_code readFuncProfile(const uint8_t *Start);
+  std::error_code readFuncProfile(const uint8_t *Start,
+                                  SampleProfileMap &Profiles);
 
   /// Read the contents of the given profile instance.
   std::error_code readProfile(FunctionSamples &FProfile);
@@ -720,11 +757,15 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   std::error_code readSecHdrTableEntry(uint64_t Idx);
   std::error_code readSecHdrTable();
 
+  std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute,
+                                           SampleProfileMap &Profiles);
   std::error_code readFuncMetadata(bool ProfileHasAttribute);
   std::error_code readFuncMetadata(bool ProfileHasAttribute,
                                    FunctionSamples *FProfile);
   std::error_code readFuncOffsetTable();
   std::error_code readFuncProfiles();
+  std::error_code readFuncProfiles(const DenseSet<StringRef> &FuncsToUse,
+                                   SampleProfileMap &Profiles);
   std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
   std::error_code readCSNameTableSec();
   std::error_code readProfileSymbolList();
@@ -776,6 +817,12 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// the reader has been given a module.
   bool collectFuncsFromModule() override;
 
+  /// Read the profiles on-demand for the given functions. This is used after
+  /// stale call graph matching finds new functions whose profiles aren't read
+  /// at the beginning and we need to re-read the profiles.
+  std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+                               SampleProfileMap &Profiles) override;
+
   std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);
   };
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index a67f158433391c..67edea42e2fe14 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -198,6 +198,7 @@ class SampleProfileMatcher {
   // function and all inlinees.
   void countMismatchedCallsiteSamples(const FunctionSamples &FS);
   void computeAndReportProfileStaleness();
+  void UpdateSampleLoaderWithRecoveredProfiles();
 
   LocToLocMap &getIRToProfileLocationMap(const Function &F) {
     auto Ret = FuncMappings.try_emplace(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4752465fc072e0..f555da866f36eb 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -653,7 +653,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
 }
 
 std::error_code
-SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
+                                           SampleProfileMap &Profiles) {
   Data = Start;
   auto NumHeadSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumHeadSamples.getError())
@@ -678,6 +679,11 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   return sampleprof_error::success;
 }
 
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+  return readFuncProfile(Start, Profiles);
+}
+
 std::error_code SampleProfileReaderBinary::readImpl() {
   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
@@ -725,6 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     break;
   }
   case SecLBRProfile:
+    LBRProfileSecRange = std::make_pair(Data, End);
     if (std::error_code EC = readFuncProfiles())
       return EC;
     break;
@@ -745,9 +752,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     ProfileIsProbeBased =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
-    bool HasAttribute =
+    ProfileHasAttribute =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
-    if (std::error_code EC = readFuncMetadata(HasAttribute))
+    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
       return EC;
     break;
   }
@@ -791,6 +798,19 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
   return false;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readOnDemand(
+    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+  Data = LBRProfileSecRange.first;
+  End = LBRProfileSecRange.second;
+  if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+    return EC;
+  End = Data;
+
+  if (std::error_code EC =
+          readFuncMetadataOnDemand(ProfileHasAttribute, Profiles))
+    return EC;
+  return sampleprof_error::success;
+}
 
 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
   if (!M)
@@ -838,6 +858,95 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
  return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
+    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+  const uint8_t *Start = Data;
+
+  if (Remapper) {
+    for (auto Name : FuncsToUse) {
+      Remapper->insert(Name);
+    }
+  }
+
+  if (ProfileIsCS) {
+    assert(useFuncOffsetList());
+    DenseSet<uint64_t> FuncGuidsToUse;
+    if (useMD5()) {
+      for (auto Name : FuncsToUse)
+        FuncGuidsToUse.insert(Function::getGUID(Name));
+    }
+
+    // For each function in current module, load all context profiles for
+    // the function as well as their callee contexts which can help profile
+    // guided importing for ThinLTO. This can be achieved by walking
+    // through an ordered context container, where contexts are laid out
+    // as if they were walked in preorder of a context trie. While
+    // traversing the trie, a link to the highest common ancestor node is
+    // kept so that all of its decendants will be loaded.
+    const SampleContext *CommonContext = nullptr;
+    for (const auto &NameOffset : FuncOffsetList) {
+      const auto &FContext = NameOffset.first;
+      FunctionId FName = FContext.getFunction();
+      StringRef FNameString;
+      if (!useMD5())
+        FNameString = FName.stringRef();
+
+      // For function in the current module, keep its farthest ancestor
+      // context. This can be used to load itself and its child and
+      // sibling contexts.
+      if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
+          (!useMD5() && (FuncsToUse.count(FNameString) ||
+                         (Remapper && Remapper->exist(FNameString))))) {
+        if (!CommonContext || !CommonContext->isPrefixOf(FContext))
+          CommonContext = &FContext;
+      }
+
+      if (CommonContext == &FContext ||
+          (CommonContext && CommonContext->isPrefixOf(FContext))) {
+        // Load profile for the current context which originated from
+        // the common ancestor.
+        const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+          return EC;
+      }
+    }
+  } else if (useMD5()) {
+    assert(!useFuncOffsetList());
+    for (auto Name : FuncsToUse) {
+      auto GUID = MD5Hash(Name);
+      auto iter = FuncOffsetTable.find(GUID);
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+  } else if (Remapper) {
+    assert(useFuncOffsetList());
+    for (auto NameOffset : FuncOffsetList) {
+      SampleContext FContext(NameOffset.first);
+      auto FuncName = FContext.getFunction();
+      StringRef FuncNameStr = FuncName.stringRef();
+      if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
+        continue;
+      const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+  } else {
+    assert(!useFuncOffsetList());
+    for (auto Name : FuncsToUse) {
+
+      auto iter = FuncOffsetTable.find(MD5Hash(Name));
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+  }
+}
+
 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
   // Collect functions used by current module if the Reader has been
   // given a module.
@@ -849,7 +958,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
 
   // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
   // profiles.
-  const uint8_t *Start = Data;
   if (!LoadFuncsToBeUsed) {
     while (Data < End) {
       if (std::error_code EC = readFuncProfile(Data))
@@ -858,88 +966,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
     assert(Data == End && "More data is read than expected");
   } else {
     // Load function profiles on demand.
-    if (Remapper) {
-      for (auto Name : FuncsToUse) {
-        Remapper->insert(Name);
-      }
-    }
-
-    if (ProfileIsCS) {
-      assert(useFuncOffsetList());
-      DenseSet<uint64_t> FuncGuidsToUse;
-      if (useMD5()) {
-        for (auto Name : FuncsToUse)
-          FuncGuidsToUse.insert(Function::getGUID(Name));
-      }
-
-      // For each function in current module, load all context profiles for
-      // the function as well as their callee contexts which can help profile
-      // guided importing for ThinLTO. This can be achieved by walking
-      // through an ordered context container, where contexts are laid out
-      // as if they were walked in preorder of a context trie. While
-      // traversing the trie, a link to the highest common ancestor node is
-      // kept so that all of its decendants will be loaded.
-      const SampleContext *CommonContext = nullptr;
-      for (const auto &NameOffset : FuncOffsetList) {
-        const auto &FContext = NameOffset.first;
-        FunctionId FName = FContext.getFunction();
-        StringRef FNameString;
-        if (!useMD5())
-          FNameString = FName.stringRef();
-
-        // For function in the current module, keep its farthest ancestor
-        // context. This can be used to load itself and its child and
-        // sibling contexts.
-        if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
-            (!useMD5() && (FuncsToUse.count(FNameString) ||
-                           (Remapper && Remapper->exist(FNameString))))) {
-          if (!CommonContext || !CommonContext->isPrefixOf(FContext))
-            CommonContext = &FContext;
-        }
-
-        if (CommonContext == &FContext ||
-            (CommonContext && CommonContext->isPrefixOf(FContext))) {
-          // Load profile for the current context which originated from
-          // the common ancestor.
-          const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-            return EC;
-        }
-      }
-    } else if (useMD5()) {
-      assert(!useFuncOffsetList());
-      for (auto Name : FuncsToUse) {
-        auto GUID = MD5Hash(Name);
-        auto iter = FuncOffsetTable.find(GUID);
-        if (iter == FuncOffsetTable.end())
-          continue;
-        const uint8_t *FuncProfileAddr = Start + iter->second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    } else if (Remapper) {
-      assert(useFuncOffsetList());
-      for (auto NameOffset : FuncOffsetList) {
-        SampleContext FContext(NameOffset.first);
-        auto FuncName = FContext.getFunction();
-        StringRef FuncNameStr = FuncName.stringRef();
-        if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
-          continue;
-        const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    } else {
-      assert(!useFuncOffsetList());
-      for (auto Name : FuncsToUse) {
-        auto iter = FuncOffsetTable.find(MD5Hash(Name));
-        if (iter == FuncOffsetTable.end())
-          continue;
-        const uint8_t *FuncProfileAddr = Start + iter->second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    }
+    if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+      return EC;
     Data = End;
   }
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
@@ -1245,6 +1273,27 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand(
+    bool ProfileHasAttribute, SampleProfileMap &Profiles) {
+  if (FContextToMetaDataSecRange.empty())
+    return sampleprof_error::success;
+
+  for (auto &I : Profiles) {
+    FunctionSamples *FProfile = &I.second;
+    auto R =
+        FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode());
+    if (R == FContextToMetaDataSecRange.end())
+      continue;
+
+    Data = R->second.first;
+    End = R->second.second;
+    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
+      return EC;
+    assert(Data == End && "More data is read than expected");
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
@@ -1257,8 +1306,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
     if (It != Profiles.end())
       FProfile = &It->second;
 
+    const uint8_t *Start = Data;
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
       return EC;
+
+    FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data};
   }
 
   assert(Data == End && "More data is read than expected");
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 312672e56b0170..b9adc6a0631b80 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,6 +782,26 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   float Similarity = 0.0;
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+  // Check if the function is top-level function. For extended profile format,
+  // if a function profile is unused and it's top-level, even if the profile is
+  // matched, it's not found in the profile. This is because sample reader only
+  // read the used profile at the beginning, we need to read the profile
+  // on-demand. Also save it into the FlattenedProfiles for future look-up.
+  if (!FSFlattened) {
+    DenseSet<StringRef> TopLevelFunc;
+    TopLevelFunc.insert(ProfFunc.stringRef());
+    SampleProfileMap TopLevelProfile;
+    Reader.readOnDemand(TopLevelFunc, TopLevelProfile);
+    assert(TopLevelProfile.size() <= 1 &&
+           "More than one profile is found for top-level function");
+    if (!TopLevelProfile.empty()) {
+      LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc
+                        << " for call-graph matching\n");
+      auto &FS = TopLevelProfile.begin()->second;
+      FSFlattened =
+          &(FlattenedProfiles.create(FS.getContext()) = std::move(FS));
+    }
+  }
   if (!FSFlattened)
     return false;
   // The check for similarity or checksum may not be reliable if the function is
@@ -863,6 +883,39 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
   return Matched;
 }
 
+void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
+  DenseSet<StringRef> RecoveredFuncs;
+  // Update FuncNameToProfNameMap and SymbolMap.
+  for (auto &I : FuncToProfileNameMap) {
+    assert(I.first && "New function is null");
+    FunctionId FuncName(I.first->getName());
+    RecoveredFuncs.insert(I.second.stringRef());
+    FuncNameToProfNameMap->emplace(FuncName, I.second);
+
+    // We need to remove the old entry to avoid duplicating the function
+    // processing.
+    SymbolMap->erase(FuncName);
+    SymbolMap->emplace(I.second, I.first);
+  }
+
+  // Read the top-level profiles for the recovered function profiles. This is
+  // because in extended binary format it only loads the top-level profile for
+  // the functions in the new build but not the recovered functions which is
+  // from the old build.
+  SampleProfileMap TopLevelRecoveredProfiles;
+  Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles);
+  auto &Profiles = Reader.getProfiles();
+  for (auto &I : TopLevelRecoveredProfiles) {
+    LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction()
+                      << " is recovered and re-read by the sample reader.\n");
+    auto &Ctx = I.second.getContext();
+    assert(Profiles.find(Ctx) == Profiles.end() &&
+           "Top level profile is found for the unused profile");
+    Profiles.create(Ctx) = std::move(I.second);
+  }
+  Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
+}
+
 void SampleProfileMatcher::runOnModule() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
@@ -880,17 +933,8 @@ void SampleProfileMatcher::runOnModule() {
     runOnFunction(*F);
   }
 
-  // Update the data in SampleLoader.
   if (SalvageUnusedProfile)
-    for (auto &I : FuncToProfileNameMap) {
-      assert(I.first && "New function is null");
-      FunctionId FuncName(I.first->getName());
-      FuncNameToProfNameMap->emplace(FuncName, I.second);
-      // We need to remove the old entry to avoid duplicating the function
-      // processing.
-      SymbolMap->erase(FuncName);
-      SymbolMap->emplace(I.second, I.first);
-    }
+    UpdateSampleLoaderWithRecoveredProfiles();
 
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
new file mode 100644
index 00000000000000..a1bba5fc88de0e
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
@@ -0,0 +1,23 @@
+foo:2724522:51
+ 1: 51
+ 2: 452674
+ 3: 47
+ 4: 497875
+ 6: 415959
+ 10: 452623
+ 11: 452687 bar:452687
+ 12: 452623
+ 13: 47
+ !CFGChecksum: 281718392333557
+bar:452687:452687
+ 1: 452687
+ !CFGChecksum: 4294967295
+main:204:0
+ 1: 0
+ 2: 51
+ 3: 0
+ 4: 51
+ 5: 51 foo:51
+ 6: 51
+ 7: 0
+ !CFGChecksum: 281582264815352
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
new file mode 100644
index 00000000000000..f1f2506e08d2a5
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -0,0 +1,258 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+
+; CHECK-TEXT: Run stale profile matching for main
+; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching
+; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match.
+; CHECK-TEXT: Function:foo_rename matches profile:foo
+; CHECK-TEXT: Run stale profile matching for foo_rename
+; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader.
+; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
+
+; CHECK-TEXT: Processing Function main
+; CHECK-TEXT:     5:  call void @foo_rename(), !dbg ![[#]] - weight: 51
+; CHECK-TEXT: Processing Function foo_rename
+; CHECK-TEXT:     11:  %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+
+
+; CHECK-EXTBIN: Run stale profile matching for main
+; CHECK-EXTBIN: Read top-level function foo for call-graph matching
+; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match.
+; CHECK-EXTBIN: Function:foo_rename matches profile:foo
+; CHECK-EXTBIN: Run stale profile matching for foo_rename
+; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader.
+; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
+
+; CHECK-EXTBIN: Processing Function main
+; CHECK-EXTBIN:     5:  call void @foo_rename(), !dbg ![[#]] - weight: 51
+; CHECK-EXTBIN: Processing Function foo_rename
+; CHECK-EXTBIN:     11:  %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
+entry:
+    #dbg_value(i32 %x, !22, !DIExpression(), !23)
+  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
+  %add = add nsw i32 %x, 1, !dbg !25
+  ret i32 %add, !dbg !26
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @foo_rename() #0 !dbg !27 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33
+    #dbg_value(i32 0, !31, !DIExpression(), !34)
+  br label %for.cond, !dbg !35
+
+for.cond:                                         ; preds = %if.end7, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36
+    #dbg_value(i32 %i.0, !31, !DIExpression(), !34)
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37
+  %cmp = icmp slt i32 %i.0, 10000, !dbg !39
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42
+  ret void, !dbg !42
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43
+  %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46
+  %rem = srem i32 %0, 3, !dbg !50
+  %cmp1 = icmp eq i32 %rem, 1, !dbg !51
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !52
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53
+  %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46
+  %add = add nsw i32 %1, 100, !dbg !53
+  store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46
+  br label %if.end7, !dbg !54
+
+if.else:                                          ; preds = %for.body
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55
+  %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46
+  %rem2 = srem i32 %2, 2, !dbg !57
+  %cmp3 = icmp eq i32 %rem2, 1, !dbg !58
+  br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59
+
+if.then4:                                         ; preds = %if.else
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60
+  %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46
+  %add5 = add nsw i32 %3, 10, !dbg !60
+  store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46
+  br label %if.end7, !dbg !61
+
+if.else6:                                         ; preds = %if.else
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62
+  %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46
+  %inc = add nsw i32 %4, 1, !dbg !62
+  store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.then4, %if.else6, %if.then
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63
+  %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46
+  %call = call i32 @bar(i32 noundef %5), !dbg !64
+  %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46
+  %add8 = add nsw i32 %6, %call, !dbg !66
+  store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67
+  %inc9 = add nsw i32 %i.0, 1, !dbg !67
+    #dbg_value(i32 %inc9, !31, !DIExpression(), !34)
+  br label %for.cond, !dbg !68, !llvm.loop !69
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #2 !dbg !72 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78
+    #dbg_value(i32 0, !76, !DIExpression(), !79)
+  br label %for.cond, !dbg !80
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81
+    #dbg_value(i32 %i.0, !76, !DIExpression(), !79)
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82
+  %cmp = icmp slt i32 %i.0, 100000, !dbg !84
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87
+  ret i32 0, !dbg !87
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88
+  call void @foo_rename(), !dbg !90
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92
+  %inc = add nsw i32 %i.0, 1, !dbg !92
+    #dbg_value(i32 %inc, !76, !DIExpression(), !79)
+  br label %for.cond, !dbg !93, !llvm.loop !94
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
+
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 20.0.0"}
+!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"}
+!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
+!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!6, !6}
+!21 = !{!22}
+!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
+!23 = !DILocation(line: 0, scope: !18)
+!24 = !DILocation(line: 4, column: 10, scope: !18)
+!25 = !DILocation(line: 4, column: 12, scope: !18)
+!26 = !DILocation(line: 4, column: 3, scope: !18)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null}
+!30 = !{!31}
+!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6)
+!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3)
+!33 = !DILocation(line: 8, column: 12, scope: !32)
+!34 = !DILocation(line: 0, scope: !32)
+!35 = !DILocation(line: 8, column: 8, scope: !32)
+!36 = !DILocation(line: 8, scope: !32)
+!37 = !DILocation(line: 8, column: 19, scope: !38)
+!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3)
+!39 = !DILocation(line: 8, column: 21, scope: !38)
+!40 = !DILocation(line: 8, column: 3, scope: !32)
+!41 = !DILocation(line: 0, scope: !27)
+!42 = !DILocation(line: 17, column: 1, scope: !27)
+!43 = !DILocation(line: 9, column: 10, scope: !44)
+!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10)
+!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39)
+!46 = !{!47, !47, i64 0}
+!47 = !{!"int", !48, i64 0}
+!48 = !{!"omnipotent char", !49, i64 0}
+!49 = !{!"Simple C/C++ TBAA"}
+!50 = !DILocation(line: 9, column: 12, scope: !44)
+!51 = !DILocation(line: 9, column: 16, scope: !44)
+!52 = !DILocation(line: 9, column: 10, scope: !45)
+!53 = !DILocation(line: 10, column: 10, scope: !44)
+!54 = !DILocation(line: 10, column: 8, scope: !44)
+!55 = !DILocation(line: 11, column: 16, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16)
+!57 = !DILocation(line: 11, column: 18, scope: !56)
+!58 = !DILocation(line: 11, column: 22, scope: !56)
+!59 = !DILocation(line: 11, column: 16, scope: !44)
+!60 = !DILocation(line: 12, column: 10, scope: !56)
+!61 = !DILocation(line: 12, column: 8, scope: !56)
+!62 = !DILocation(line: 14, column: 9, scope: !56)
+!63 = !DILocation(line: 15, column: 15, scope: !45)
+!64 = !DILocation(line: 15, column: 11, scope: !65)
+!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079)
+!66 = !DILocation(line: 15, column: 8, scope: !45)
+!67 = !DILocation(line: 8, column: 35, scope: !38)
+!68 = !DILocation(line: 8, column: 3, scope: !38)
+!69 = distinct !{!69, !40, !70, !71}
+!70 = !DILocation(line: 16, column: 3, scope: !32)
+!71 = !{!"llvm.loop.mustprogress"}
+!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75)
+!73 = !DISubroutineType(types: !74)
+!74 = !{!6}
+!75 = !{!76}
+!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6)
+!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3)
+!78 = !DILocation(line: 20, column: 12, scope: !77)
+!79 = !DILocation(line: 0, scope: !77)
+!80 = !DILocation(line: 20, column: 8, scope: !77)
+!81 = !DILocation(line: 20, scope: !77)
+!82 = !DILocation(line: 20, column: 19, scope: !83)
+!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3)
+!84 = !DILocation(line: 20, column: 21, scope: !83)
+!85 = !DILocation(line: 20, column: 3, scope: !77)
+!86 = !DILocation(line: 0, scope: !72)
+!87 = !DILocation(line: 23, column: 1, scope: !72)
+!88 = !DILocation(line: 21, column: 7, scope: !89)
+!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40)
+!90 = !DILocation(line: 21, column: 7, scope: !91)
+!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031)
+!92 = !DILocation(line: 20, column: 36, scope: !83)
+!93 = !DILocation(line: 20, column: 3, scope: !83)
+!94 = distinct !{!94, !85, !95, !71}
+!95 = !DILocation(line: 22, column: 3, scope: !77)

>From 9a420e317601ab13d823c7b4be8fe93a5aaeee42 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 13 Aug 2024 00:17:42 -0700
Subject: [PATCH 2/8] addressing comments

---
 .../llvm/ProfileData/SampleProfReader.h       |  27 +-
 .../Transforms/IPO/SampleProfileMatcher.h     |   2 +-
 llvm/lib/ProfileData/SampleProfReader.cpp     |  24 +-
 .../Transforms/IPO/SampleProfileMatcher.cpp   |  43 ++--
 ...seudo-probe-stale-profile-toplev-func.prof |   2 +-
 ...eudo-probe-stale-profile-toplev-func-cp.ll | 147 +++++++++++
 .../pseudo-probe-stale-profile-toplev-func.ll | 233 ++++++------------
 7 files changed, 265 insertions(+), 213 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index b124233a02d11c..00e4e7096ab7b4 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,6 +380,13 @@ class SampleProfileReader {
     return sampleprof_error::success;
   }
 
+  /// Read sample profiles for the given functions. Currently it's only used 
+  /// for extended binary format to load the profiles on-demand.
+  virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+                               SampleProfileMap &Profiles) {
+    return sampleprof_error::not_implemented;
+  };
+
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
 
@@ -392,11 +399,6 @@ class SampleProfileReader {
   /// which doesn't support loading function profiles on demand.
   virtual bool collectFuncsFromModule() { return false; }
 
-  virtual std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
-                                       SampleProfileMap &Profiles) {
-    return sampleprof_error::not_implemented;
-  };
-
   /// Print all the profiles on stream \p OS.
   void dump(raw_ostream &OS = dbgs());
 
@@ -551,7 +553,7 @@ class SampleProfileReader {
   // A map from a function's context hash to its meta data section range, used
   // for on-demand read function profile metadata.
   std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
-      FContextToMetaDataSecRange;
+      FuncMetadataIndex;
 
   std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
 
@@ -757,8 +759,8 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   std::error_code readSecHdrTableEntry(uint64_t Idx);
   std::error_code readSecHdrTable();
 
-  std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute,
-                                           SampleProfileMap &Profiles);
+  std::error_code readFuncMetadata(bool ProfileHasAttribute,
+                                   SampleProfileMap &Profiles);
   std::error_code readFuncMetadata(bool ProfileHasAttribute);
   std::error_code readFuncMetadata(bool ProfileHasAttribute,
                                    FunctionSamples *FProfile);
@@ -818,10 +820,11 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   bool collectFuncsFromModule() override;
 
   /// Read the profiles on-demand for the given functions. This is used after
-  /// stale call graph matching finds new functions whose profiles aren't read
-  /// at the beginning and we need to re-read the profiles.
-  std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
-                               SampleProfileMap &Profiles) override;
+  /// stale call graph matching finds new functions whose profiles aren't loaded
+  /// at the beginning and we need to loaded the profiles explicitly for 
+  /// potential matching.
+  std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+                       SampleProfileMap &Profiles) override;
 
   std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 67edea42e2fe14..076d91adfd1dea 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -198,7 +198,7 @@ class SampleProfileMatcher {
   // function and all inlinees.
   void countMismatchedCallsiteSamples(const FunctionSamples &FS);
   void computeAndReportProfileStaleness();
-  void UpdateSampleLoaderWithRecoveredProfiles();
+  void UpdateWithSalvagedProfiles();
 
   LocToLocMap &getIRToProfileLocationMap(const Function &F) {
     auto Ret = FuncMappings.try_emplace(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index f555da866f36eb..4c0a45bfb47cf8 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -798,16 +798,16 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
   return false;
 }
 
-std::error_code SampleProfileReaderExtBinaryBase::readOnDemand(
-    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+std::error_code
+SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
+                                       SampleProfileMap &Profiles) {
   Data = LBRProfileSecRange.first;
   End = LBRProfileSecRange.second;
   if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
     return EC;
   End = Data;
 
-  if (std::error_code EC =
-          readFuncMetadataOnDemand(ProfileHasAttribute, Profiles))
+  if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, Profiles))
     return EC;
   return sampleprof_error::success;
 }
@@ -945,6 +945,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
         return EC;
     }
   }
+
+  return sampleprof_error::success;
 }
 
 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
@@ -1273,16 +1275,16 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
   return sampleprof_error::success;
 }
 
-std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand(
-    bool ProfileHasAttribute, SampleProfileMap &Profiles) {
-  if (FContextToMetaDataSecRange.empty())
+std::error_code
+SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
+                                                   SampleProfileMap &Profiles) {
+  if (FuncMetadataIndex.empty())
     return sampleprof_error::success;
 
   for (auto &I : Profiles) {
     FunctionSamples *FProfile = &I.second;
-    auto R =
-        FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode());
-    if (R == FContextToMetaDataSecRange.end())
+    auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode());
+    if (R == FuncMetadataIndex.end())
       continue;
 
     Data = R->second.first;
@@ -1310,7 +1312,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
       return EC;
 
-    FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data};
+    FuncMetadataIndex[FContext.getHashCode()] = {Start, Data};
   }
 
   assert(Data == End && "More data is read than expected");
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index b9adc6a0631b80..574a157c636835 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,16 +782,15 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   float Similarity = 0.0;
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
-  // Check if the function is top-level function. For extended profile format,
-  // if a function profile is unused and it's top-level, even if the profile is
-  // matched, it's not found in the profile. This is because sample reader only
-  // read the used profile at the beginning, we need to read the profile
-  // on-demand. Also save it into the FlattenedProfiles for future look-up.
+  // With extbinary profile format, initial profile loading only reads profile
+  // based on current function names in the module.
+  // However, if a function is renamed, sample loader fails to load its original
+  // profile(which has a different name), we will miss this case. To address
+  // this, we load the top-level profile candidate explicitly for the matching.
   if (!FSFlattened) {
-    DenseSet<StringRef> TopLevelFunc;
-    TopLevelFunc.insert(ProfFunc.stringRef());
+    DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
     SampleProfileMap TopLevelProfile;
-    Reader.readOnDemand(TopLevelFunc, TopLevelProfile);
+    Reader.read(TopLevelFunc, TopLevelProfile);
     assert(TopLevelProfile.size() <= 1 &&
            "More than one profile is found for top-level function");
     if (!TopLevelProfile.empty()) {
@@ -883,13 +882,13 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
   return Matched;
 }
 
-void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
-  DenseSet<StringRef> RecoveredFuncs;
+void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
+  DenseSet<StringRef> ProfileSalvagedFuncs;
   // Update FuncNameToProfNameMap and SymbolMap.
   for (auto &I : FuncToProfileNameMap) {
     assert(I.first && "New function is null");
     FunctionId FuncName(I.first->getName());
-    RecoveredFuncs.insert(I.second.stringRef());
+    ProfileSalvagedFuncs.insert(I.second.stringRef());
     FuncNameToProfNameMap->emplace(FuncName, I.second);
 
     // We need to remove the old entry to avoid duplicating the function
@@ -898,21 +897,11 @@ void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
     SymbolMap->emplace(I.second, I.first);
   }
 
-  // Read the top-level profiles for the recovered function profiles. This is
-  // because in extended binary format it only loads the top-level profile for
-  // the functions in the new build but not the recovered functions which is
-  // from the old build.
-  SampleProfileMap TopLevelRecoveredProfiles;
-  Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles);
-  auto &Profiles = Reader.getProfiles();
-  for (auto &I : TopLevelRecoveredProfiles) {
-    LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction()
-                      << " is recovered and re-read by the sample reader.\n");
-    auto &Ctx = I.second.getContext();
-    assert(Profiles.find(Ctx) == Profiles.end() &&
-           "Top level profile is found for the unused profile");
-    Profiles.create(Ctx) = std::move(I.second);
-  }
+  // With extbinary profile format, initial profile loading only reads profile
+  // based on current function names in the module, so we need to load top-level
+  // profiles for functions with different profile name explicitly after
+  // function-profile name map is established with stale profile matching.
+  Reader.read(ProfileSalvagedFuncs, Reader.getProfiles());
   Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
 }
 
@@ -934,7 +923,7 @@ void SampleProfileMatcher::runOnModule() {
   }
 
   if (SalvageUnusedProfile)
-    UpdateSampleLoaderWithRecoveredProfiles();
+    UpdateWithSalvagedProfiles();
 
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
index a1bba5fc88de0e..86c8cb3285afe2 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
@@ -8,7 +8,7 @@ foo:2724522:51
  11: 452687 bar:452687
  12: 452623
  13: 47
- !CFGChecksum: 281718392333557
+ !CFGChecksum: 281479271677951
 bar:452687:452687
  1: 452687
  !CFGChecksum: 4294967295
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
new file mode 100644
index 00000000000000..750bf03fa2d939
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
@@ -0,0 +1,147 @@
+; *** IR Dump Before SampleProfileLoaderPass on [module] ***
+; ModuleID = 'test_rename.c'
+source_filename = "test_rename.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
+entry:
+    #dbg_value(i32 %x, !22, !DIExpression(), !23)
+  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
+  %add = add nsw i32 %x, 1, !dbg !25
+  ret i32 %add, !dbg !26
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @foo_rename() #0 !dbg !27 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30
+  %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31
+  %call = call i32 @bar(i32 noundef %0), !dbg !35
+  %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31
+  %add = add nsw i32 %1, %call, !dbg !37
+  store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31
+  ret void, !dbg !38
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #1 !dbg !39 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45
+    #dbg_value(i32 0, !43, !DIExpression(), !46)
+  br label %for.cond, !dbg !47
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48
+    #dbg_value(i32 %i.0, !43, !DIExpression(), !46)
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49
+  %cmp = icmp slt i32 %i.0, 100000, !dbg !51
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54
+  ret i32 0, !dbg !54
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55
+  call void @foo_rename(), !dbg !57
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59
+  %inc = add nsw i32 %i.0, 1, !dbg !59
+    #dbg_value(i32 %inc, !43, !DIExpression(), !46)
+  br label %for.cond, !dbg !60, !llvm.loop !61
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
+
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/llvm_test/rename/extbinary", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)"}
+!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"}
+!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
+!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!6, !6}
+!21 = !{!22}
+!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
+!23 = !DILocation(line: 0, scope: !18)
+!24 = !DILocation(line: 4, column: 10, scope: !18)
+!25 = !DILocation(line: 4, column: 12, scope: !18)
+!26 = !DILocation(line: 4, column: 3, scope: !18)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null}
+!30 = !DILocation(line: 8, column: 15, scope: !27)
+!31 = !{!32, !32, i64 0}
+!32 = !{!"int", !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}
+!35 = !DILocation(line: 8, column: 11, scope: !36)
+!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
+!37 = !DILocation(line: 8, column: 8, scope: !27)
+!38 = !DILocation(line: 9, column: 1, scope: !27)
+!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42)
+!40 = !DISubroutineType(types: !41)
+!41 = !{!6}
+!42 = !{!43}
+!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6)
+!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3)
+!45 = !DILocation(line: 12, column: 12, scope: !44)
+!46 = !DILocation(line: 0, scope: !44)
+!47 = !DILocation(line: 12, column: 8, scope: !44)
+!48 = !DILocation(line: 12, scope: !44)
+!49 = !DILocation(line: 12, column: 19, scope: !50)
+!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3)
+!51 = !DILocation(line: 12, column: 21, scope: !50)
+!52 = !DILocation(line: 12, column: 3, scope: !44)
+!53 = !DILocation(line: 0, scope: !39)
+!54 = !DILocation(line: 15, column: 1, scope: !39)
+!55 = !DILocation(line: 13, column: 7, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40)
+!57 = !DILocation(line: 13, column: 7, scope: !58)
+!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031)
+!59 = !DILocation(line: 12, column: 36, scope: !50)
+!60 = !DILocation(line: 12, column: 3, scope: !50)
+!61 = distinct !{!61, !52, !62, !63}
+!62 = !DILocation(line: 14, column: 3, scope: !44)
+!63 = !{!"llvm.loop.mustprogress"}
+Function foo_rename is not in profile or profile symbol list.
+Run stale profile matching for main
+Run stale profile matching for bar
+(0/2) of functions' profile are invalid and (0/452891) of samples are discarded due to function hash mismatch.
+(0/2) of functions' profile are matched and (0/452891) of samples are reused by call graph matching.
+(1/1) of callsites' profile are invalid and (51/452891) of samples are discarded due to callsite location mismatch.
+(0/1) of callsites and (0/51) of samples are recovered by stale profile matching.
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
index f1f2506e08d2a5..356b16ca6ad059 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -9,13 +9,12 @@
 ; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match.
 ; CHECK-TEXT: Function:foo_rename matches profile:foo
 ; CHECK-TEXT: Run stale profile matching for foo_rename
-; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader.
 ; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
 
 ; CHECK-TEXT: Processing Function main
 ; CHECK-TEXT:     5:  call void @foo_rename(), !dbg ![[#]] - weight: 51
 ; CHECK-TEXT: Processing Function foo_rename
-; CHECK-TEXT:     11:  %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+; CHECK-TEXT:     2:  %call = call i32 @bar(i32 noundef %0), !dbg ![[#]] - weight: 452674
 
 
 ; CHECK-EXTBIN: Run stale profile matching for main
@@ -23,13 +22,12 @@
 ; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match.
 ; CHECK-EXTBIN: Function:foo_rename matches profile:foo
 ; CHECK-EXTBIN: Run stale profile matching for foo_rename
-; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader.
 ; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
 
 ; CHECK-EXTBIN: Processing Function main
 ; CHECK-EXTBIN:     5:  call void @foo_rename(), !dbg ![[#]] - weight: 51
 ; CHECK-EXTBIN: Processing Function foo_rename
-; CHECK-EXTBIN:     11:  %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+; CHECK-EXTBIN:     2:  %call = call i32 @bar(i32 noundef %0), !dbg ![[#]] - weight: 452674
 
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
@@ -49,110 +47,55 @@ entry:
 ; Function Attrs: noinline nounwind uwtable
 define dso_local void @foo_rename() #0 !dbg !27 {
 entry:
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33
-    #dbg_value(i32 0, !31, !DIExpression(), !34)
-  br label %for.cond, !dbg !35
-
-for.cond:                                         ; preds = %if.end7, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36
-    #dbg_value(i32 %i.0, !31, !DIExpression(), !34)
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37
-  %cmp = icmp slt i32 %i.0, 10000, !dbg !39
-  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40
-
-for.cond.cleanup:                                 ; preds = %for.cond
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42
-  ret void, !dbg !42
-
-for.body:                                         ; preds = %for.cond
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43
-  %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46
-  %rem = srem i32 %0, 3, !dbg !50
-  %cmp1 = icmp eq i32 %rem, 1, !dbg !51
-  br i1 %cmp1, label %if.then, label %if.else, !dbg !52
-
-if.then:                                          ; preds = %for.body
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53
-  %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46
-  %add = add nsw i32 %1, 100, !dbg !53
-  store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46
-  br label %if.end7, !dbg !54
-
-if.else:                                          ; preds = %for.body
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55
-  %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46
-  %rem2 = srem i32 %2, 2, !dbg !57
-  %cmp3 = icmp eq i32 %rem2, 1, !dbg !58
-  br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59
-
-if.then4:                                         ; preds = %if.else
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60
-  %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46
-  %add5 = add nsw i32 %3, 10, !dbg !60
-  store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46
-  br label %if.end7, !dbg !61
-
-if.else6:                                         ; preds = %if.else
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62
-  %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46
-  %inc = add nsw i32 %4, 1, !dbg !62
-  store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46
-  br label %if.end7
-
-if.end7:                                          ; preds = %if.then4, %if.else6, %if.then
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63
-  %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46
-  %call = call i32 @bar(i32 noundef %5), !dbg !64
-  %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46
-  %add8 = add nsw i32 %6, %call, !dbg !66
-  store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67
-  %inc9 = add nsw i32 %i.0, 1, !dbg !67
-    #dbg_value(i32 %inc9, !31, !DIExpression(), !34)
-  br label %for.cond, !dbg !68, !llvm.loop !69
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30
+  %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31
+  %call = call i32 @bar(i32 noundef %0), !dbg !35
+  %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31
+  %add = add nsw i32 %1, %call, !dbg !37
+  store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31
+  ret void, !dbg !38
 }
 
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
-
 ; Function Attrs: nounwind uwtable
-define dso_local i32 @main() #2 !dbg !72 {
+define dso_local i32 @main() #1 !dbg !39 {
 entry:
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78
-    #dbg_value(i32 0, !76, !DIExpression(), !79)
-  br label %for.cond, !dbg !80
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45
+    #dbg_value(i32 0, !43, !DIExpression(), !46)
+  br label %for.cond, !dbg !47
 
 for.cond:                                         ; preds = %for.body, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81
-    #dbg_value(i32 %i.0, !76, !DIExpression(), !79)
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82
-  %cmp = icmp slt i32 %i.0, 100000, !dbg !84
-  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48
+    #dbg_value(i32 %i.0, !43, !DIExpression(), !46)
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49
+  %cmp = icmp slt i32 %i.0, 100000, !dbg !51
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52
 
 for.cond.cleanup:                                 ; preds = %for.cond
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87
-  ret i32 0, !dbg !87
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54
+  ret i32 0, !dbg !54
 
 for.body:                                         ; preds = %for.cond
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88
-  call void @foo_rename(), !dbg !90
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92
-  %inc = add nsw i32 %i.0, 1, !dbg !92
-    #dbg_value(i32 %inc, !76, !DIExpression(), !79)
-  br label %for.cond, !dbg !93, !llvm.loop !94
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55
+  call void @foo_rename(), !dbg !57
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59
+  %inc = add nsw i32 %i.0, 1, !dbg !59
+    #dbg_value(i32 %inc, !43, !DIExpression(), !46)
+  br label %for.cond, !dbg !60, !llvm.loop !61
 }
 
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
 ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
 
 attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
 attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 
 !llvm.dbg.cu = !{!2}
@@ -163,7 +106,7 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo
 !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
 !1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
 !2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
-!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005")
+!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67")
 !4 = !{!0}
 !5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
 !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
@@ -176,7 +119,7 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo
 !13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
 !14 = !{!"clang version 20.0.0"}
 !15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
-!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"}
+!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"}
 !17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
 !18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
 !19 = !DISubroutineType(types: !20)
@@ -187,72 +130,40 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo
 !24 = !DILocation(line: 4, column: 10, scope: !18)
 !25 = !DILocation(line: 4, column: 12, scope: !18)
 !26 = !DILocation(line: 4, column: 3, scope: !18)
-!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
 !28 = !DISubroutineType(types: !29)
 !29 = !{null}
-!30 = !{!31}
-!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6)
-!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3)
-!33 = !DILocation(line: 8, column: 12, scope: !32)
-!34 = !DILocation(line: 0, scope: !32)
-!35 = !DILocation(line: 8, column: 8, scope: !32)
-!36 = !DILocation(line: 8, scope: !32)
-!37 = !DILocation(line: 8, column: 19, scope: !38)
-!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3)
-!39 = !DILocation(line: 8, column: 21, scope: !38)
-!40 = !DILocation(line: 8, column: 3, scope: !32)
-!41 = !DILocation(line: 0, scope: !27)
-!42 = !DILocation(line: 17, column: 1, scope: !27)
-!43 = !DILocation(line: 9, column: 10, scope: !44)
-!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10)
-!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39)
-!46 = !{!47, !47, i64 0}
-!47 = !{!"int", !48, i64 0}
-!48 = !{!"omnipotent char", !49, i64 0}
-!49 = !{!"Simple C/C++ TBAA"}
-!50 = !DILocation(line: 9, column: 12, scope: !44)
-!51 = !DILocation(line: 9, column: 16, scope: !44)
-!52 = !DILocation(line: 9, column: 10, scope: !45)
-!53 = !DILocation(line: 10, column: 10, scope: !44)
-!54 = !DILocation(line: 10, column: 8, scope: !44)
-!55 = !DILocation(line: 11, column: 16, scope: !56)
-!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16)
-!57 = !DILocation(line: 11, column: 18, scope: !56)
-!58 = !DILocation(line: 11, column: 22, scope: !56)
-!59 = !DILocation(line: 11, column: 16, scope: !44)
-!60 = !DILocation(line: 12, column: 10, scope: !56)
-!61 = !DILocation(line: 12, column: 8, scope: !56)
-!62 = !DILocation(line: 14, column: 9, scope: !56)
-!63 = !DILocation(line: 15, column: 15, scope: !45)
-!64 = !DILocation(line: 15, column: 11, scope: !65)
-!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079)
-!66 = !DILocation(line: 15, column: 8, scope: !45)
-!67 = !DILocation(line: 8, column: 35, scope: !38)
-!68 = !DILocation(line: 8, column: 3, scope: !38)
-!69 = distinct !{!69, !40, !70, !71}
-!70 = !DILocation(line: 16, column: 3, scope: !32)
-!71 = !{!"llvm.loop.mustprogress"}
-!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75)
-!73 = !DISubroutineType(types: !74)
-!74 = !{!6}
-!75 = !{!76}
-!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6)
-!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3)
-!78 = !DILocation(line: 20, column: 12, scope: !77)
-!79 = !DILocation(line: 0, scope: !77)
-!80 = !DILocation(line: 20, column: 8, scope: !77)
-!81 = !DILocation(line: 20, scope: !77)
-!82 = !DILocation(line: 20, column: 19, scope: !83)
-!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3)
-!84 = !DILocation(line: 20, column: 21, scope: !83)
-!85 = !DILocation(line: 20, column: 3, scope: !77)
-!86 = !DILocation(line: 0, scope: !72)
-!87 = !DILocation(line: 23, column: 1, scope: !72)
-!88 = !DILocation(line: 21, column: 7, scope: !89)
-!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40)
-!90 = !DILocation(line: 21, column: 7, scope: !91)
-!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031)
-!92 = !DILocation(line: 20, column: 36, scope: !83)
-!93 = !DILocation(line: 20, column: 3, scope: !83)
-!94 = distinct !{!94, !85, !95, !71}
-!95 = !DILocation(line: 22, column: 3, scope: !77)
+!30 = !DILocation(line: 8, column: 15, scope: !27)
+!31 = !{!32, !32, i64 0}
+!32 = !{!"int", !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}
+!35 = !DILocation(line: 8, column: 11, scope: !36)
+!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
+!37 = !DILocation(line: 8, column: 8, scope: !27)
+!38 = !DILocation(line: 9, column: 1, scope: !27)
+!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42)
+!40 = !DISubroutineType(types: !41)
+!41 = !{!6}
+!42 = !{!43}
+!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6)
+!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3)
+!45 = !DILocation(line: 12, column: 12, scope: !44)
+!46 = !DILocation(line: 0, scope: !44)
+!47 = !DILocation(line: 12, column: 8, scope: !44)
+!48 = !DILocation(line: 12, scope: !44)
+!49 = !DILocation(line: 12, column: 19, scope: !50)
+!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3)
+!51 = !DILocation(line: 12, column: 21, scope: !50)
+!52 = !DILocation(line: 12, column: 3, scope: !44)
+!53 = !DILocation(line: 0, scope: !39)
+!54 = !DILocation(line: 15, column: 1, scope: !39)
+!55 = !DILocation(line: 13, column: 7, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40)
+!57 = !DILocation(line: 13, column: 7, scope: !58)
+!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031)
+!59 = !DILocation(line: 12, column: 36, scope: !50)
+!60 = !DILocation(line: 12, column: 3, scope: !50)
+!61 = distinct !{!61, !52, !62, !63}
+!62 = !DILocation(line: 14, column: 3, scope: !44)
+!63 = !{!"llvm.loop.mustprogress"}

>From 91ce2b23236bde42930a3dbb05fb2531c10d90a7 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 13 Aug 2024 09:20:31 -0700
Subject: [PATCH 3/8] fix lint

---
 llvm/include/llvm/ProfileData/SampleProfReader.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 00e4e7096ab7b4..907663fd50094d 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,7 +380,7 @@ class SampleProfileReader {
     return sampleprof_error::success;
   }
 
-  /// Read sample profiles for the given functions. Currently it's only used 
+  /// Read sample profiles for the given functions. Currently it's only used
   /// for extended binary format to load the profiles on-demand.
   virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
                                SampleProfileMap &Profiles) {
@@ -821,7 +821,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
 
   /// Read the profiles on-demand for the given functions. This is used after
   /// stale call graph matching finds new functions whose profiles aren't loaded
-  /// at the beginning and we need to loaded the profiles explicitly for 
+  /// at the beginning and we need to loaded the profiles explicitly for
   /// potential matching.
   std::error_code read(const DenseSet<StringRef> &FuncsToUse,
                        SampleProfileMap &Profiles) override;

>From ab2f83da198013aa55e95c7312a65288dee4df18 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 16 Aug 2024 16:54:15 -0700
Subject: [PATCH 4/8] addressing comments

---
 .../llvm/ProfileData/SampleProfReader.h       |  20 ++-
 .../Transforms/IPO/SampleProfileMatcher.cpp   |  33 ++--
 ...eudo-probe-stale-profile-toplev-func-cp.ll | 147 ------------------
 .../pseudo-probe-stale-profile-toplev-func.ll |   4 +-
 4 files changed, 35 insertions(+), 169 deletions(-)
 delete mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 907663fd50094d..c86b97740e4f79 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,8 +380,17 @@ class SampleProfileReader {
     return sampleprof_error::success;
   }
 
-  /// Read sample profiles for the given functions. Currently it's only used
-  /// for extended binary format to load the profiles on-demand.
+  /// Read sample profiles for the given functions. Currently it's only used for
+  /// extended binary format to load the profiles on-demand.
+  std::error_code read(const DenseSet<StringRef> &FuncsToUse) {
+    if (std::error_code EC = read(FuncsToUse, Profiles))
+      return EC;
+    return sampleprof_error::success;
+  };
+
+  /// Read sample profiles for the given functions and write them to the given
+  /// profile map. Currently it's only used for extended binary format to load
+  /// the profiles on-demand.
   virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
                                SampleProfileMap &Profiles) {
     return sampleprof_error::not_implemented;
@@ -512,8 +521,8 @@ class SampleProfileReader {
   void setModule(const Module *Mod) { M = Mod; }
 
   void setFuncNameToProfNameMap(
-      HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FPMap) {
-    FuncNameToProfNameMap = FPMap;
+      const HashKeyMap<std::unordered_map, FunctionId, FunctionId> &FPMap) {
+    FuncNameToProfNameMap = &FPMap;
   }
 
 protected:
@@ -547,7 +556,7 @@ class SampleProfileReader {
   // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
   // which maps the function name to the matched profile name. This is used
   // for sample loader to look up profile using the new name.
-  HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+  const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
       *FuncNameToProfNameMap = nullptr;
 
   // A map from a function's context hash to its meta data section range, used
@@ -557,6 +566,7 @@ class SampleProfileReader {
 
   std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
 
+  /// Whether the profile has attribute metadata.
   bool ProfileHasAttribute = false;
 
   /// \brief Whether samples are collected based on pseudo probes.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 574a157c636835..77cede8744707b 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -36,6 +36,12 @@ static cl::opt<unsigned> MinCallCountForCGMatching(
     cl::desc("The minimum number of call anchors required for a function to "
              "run stale profile call graph matching."));
 
+static cl::opt<bool> ReadToplevProfileforCGMatching(
+    "read-toplev-profile-for-cg-matching", cl::Hidden, cl::init(false),
+    cl::desc(
+        "Read top-level profiles that the sample reader initially skips for "
+        "the call-graph matching(only meaningful for extended binary format)"));
+
 extern cl::opt<bool> SalvageStaleProfile;
 extern cl::opt<bool> SalvageUnusedProfile;
 extern cl::opt<bool> PersistProfileStaleness;
@@ -784,22 +790,19 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
   // With extbinary profile format, initial profile loading only reads profile
   // based on current function names in the module.
-  // However, if a function is renamed, sample loader fails to load its original
+  // However, if a function is renamed, sample loader skips to load its original
   // profile(which has a different name), we will miss this case. To address
   // this, we load the top-level profile candidate explicitly for the matching.
-  if (!FSFlattened) {
+  if (!FSFlattened && ReadToplevProfileforCGMatching) {
     DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
-    SampleProfileMap TopLevelProfile;
-    Reader.read(TopLevelFunc, TopLevelProfile);
-    assert(TopLevelProfile.size() <= 1 &&
-           "More than one profile is found for top-level function");
-    if (!TopLevelProfile.empty()) {
-      LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc
-                        << " for call-graph matching\n");
-      auto &FS = TopLevelProfile.begin()->second;
-      FSFlattened =
-          &(FlattenedProfiles.create(FS.getContext()) = std::move(FS));
-    }
+    if (std::error_code EC = Reader.read(TopLevelFunc, FlattenedProfiles))
+      return false;
+    FSFlattened = getFlattenedSamplesFor(ProfFunc);
+    LLVM_DEBUG({
+      if (FSFlattened)
+        dbgs() << "Read top-level function " << ProfFunc
+               << " for call-graph matching\n";
+    });
   }
   if (!FSFlattened)
     return false;
@@ -901,8 +904,8 @@ void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
   // based on current function names in the module, so we need to load top-level
   // profiles for functions with different profile name explicitly after
   // function-profile name map is established with stale profile matching.
-  Reader.read(ProfileSalvagedFuncs, Reader.getProfiles());
-  Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
+  Reader.read(ProfileSalvagedFuncs);
+  Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
 }
 
 void SampleProfileMatcher::runOnModule() {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
deleted file mode 100644
index 750bf03fa2d939..00000000000000
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; *** IR Dump Before SampleProfileLoaderPass on [module] ***
-; ModuleID = 'test_rename.c'
-source_filename = "test_rename.c"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
- at x = dso_local global i32 0, align 4, !dbg !0
-
-; Function Attrs: noinline nounwind uwtable
-define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
-entry:
-    #dbg_value(i32 %x, !22, !DIExpression(), !23)
-  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
-  %add = add nsw i32 %x, 1, !dbg !25
-  ret i32 %add, !dbg !26
-}
-
-; Function Attrs: noinline nounwind uwtable
-define dso_local void @foo_rename() #0 !dbg !27 {
-entry:
-  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30
-  %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31
-  %call = call i32 @bar(i32 noundef %0), !dbg !35
-  %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31
-  %add = add nsw i32 %1, %call, !dbg !37
-  store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31
-  ret void, !dbg !38
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() #1 !dbg !39 {
-entry:
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45
-    #dbg_value(i32 0, !43, !DIExpression(), !46)
-  br label %for.cond, !dbg !47
-
-for.cond:                                         ; preds = %for.body, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48
-    #dbg_value(i32 %i.0, !43, !DIExpression(), !46)
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49
-  %cmp = icmp slt i32 %i.0, 100000, !dbg !51
-  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52
-
-for.cond.cleanup:                                 ; preds = %for.cond
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54
-  ret i32 0, !dbg !54
-
-for.body:                                         ; preds = %for.cond
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55
-  call void @foo_rename(), !dbg !57
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59
-  %inc = add nsw i32 %i.0, 1, !dbg !59
-    #dbg_value(i32 %inc, !43, !DIExpression(), !46)
-  br label %for.cond, !dbg !60, !llvm.loop !61
-}
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
-declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
-
-attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
-
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
-!llvm.ident = !{!14}
-!llvm.pseudo_probe_desc = !{!15, !16, !17}
-
-!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
-!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
-!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
-!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/llvm_test/rename/extbinary", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67")
-!4 = !{!0}
-!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
-!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!7 = !{i32 7, !"Dwarf Version", i32 5}
-!8 = !{i32 2, !"Debug Info Version", i32 3}
-!9 = !{i32 1, !"wchar_size", i32 4}
-!10 = !{i32 8, !"PIC Level", i32 2}
-!11 = !{i32 7, !"PIE Level", i32 2}
-!12 = !{i32 7, !"uwtable", i32 2}
-!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
-!14 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)"}
-!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
-!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"}
-!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
-!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
-!19 = !DISubroutineType(types: !20)
-!20 = !{!6, !6}
-!21 = !{!22}
-!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
-!23 = !DILocation(line: 0, scope: !18)
-!24 = !DILocation(line: 4, column: 10, scope: !18)
-!25 = !DILocation(line: 4, column: 12, scope: !18)
-!26 = !DILocation(line: 4, column: 3, scope: !18)
-!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
-!28 = !DISubroutineType(types: !29)
-!29 = !{null}
-!30 = !DILocation(line: 8, column: 15, scope: !27)
-!31 = !{!32, !32, i64 0}
-!32 = !{!"int", !33, i64 0}
-!33 = !{!"omnipotent char", !34, i64 0}
-!34 = !{!"Simple C/C++ TBAA"}
-!35 = !DILocation(line: 8, column: 11, scope: !36)
-!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
-!37 = !DILocation(line: 8, column: 8, scope: !27)
-!38 = !DILocation(line: 9, column: 1, scope: !27)
-!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42)
-!40 = !DISubroutineType(types: !41)
-!41 = !{!6}
-!42 = !{!43}
-!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6)
-!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3)
-!45 = !DILocation(line: 12, column: 12, scope: !44)
-!46 = !DILocation(line: 0, scope: !44)
-!47 = !DILocation(line: 12, column: 8, scope: !44)
-!48 = !DILocation(line: 12, scope: !44)
-!49 = !DILocation(line: 12, column: 19, scope: !50)
-!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3)
-!51 = !DILocation(line: 12, column: 21, scope: !50)
-!52 = !DILocation(line: 12, column: 3, scope: !44)
-!53 = !DILocation(line: 0, scope: !39)
-!54 = !DILocation(line: 15, column: 1, scope: !39)
-!55 = !DILocation(line: 13, column: 7, scope: !56)
-!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40)
-!57 = !DILocation(line: 13, column: 7, scope: !58)
-!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031)
-!59 = !DILocation(line: 12, column: 36, scope: !50)
-!60 = !DILocation(line: 12, column: 3, scope: !50)
-!61 = distinct !{!61, !52, !62, !63}
-!62 = !DILocation(line: 14, column: 3, scope: !44)
-!63 = !{!"llvm.loop.mustprogress"}
-Function foo_rename is not in profile or profile symbol list.
-Run stale profile matching for main
-Run stale profile matching for bar
-(0/2) of functions' profile are invalid and (0/452891) of samples are discarded due to function hash mismatch.
-(0/2) of functions' profile are matched and (0/452891) of samples are reused by call graph matching.
-(1/1) of callsites' profile are invalid and (51/452891) of samples are discarded due to callsite location mismatch.
-(0/1) of callsites and (0/51) of samples are recovered by stale profile matching.
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
index 356b16ca6ad059..7b3fe9e047bd20 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -1,8 +1,8 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
 ; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
 
 ; CHECK-TEXT: Run stale profile matching for main
 ; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching

>From 259ab87f5a708cf1ee50e4b83dceb771fee791af Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 19 Aug 2024 10:45:22 -0700
Subject: [PATCH 5/8] check whether a profile is already loaded

---
 llvm/include/llvm/ProfileData/SampleProfReader.h | 15 +++++++++------
 llvm/lib/ProfileData/SampleProfReader.cpp        |  6 +++---
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index c86b97740e4f79..a93cf25e3f7f7a 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,13 +380,16 @@ class SampleProfileReader {
     return sampleprof_error::success;
   }
 
-  /// Read sample profiles for the given functions. Currently it's only used for
-  /// extended binary format to load the profiles on-demand.
+  /// Read sample profiles for the given functions.
   std::error_code read(const DenseSet<StringRef> &FuncsToUse) {
-    if (std::error_code EC = read(FuncsToUse, Profiles))
+    DenseSet<StringRef> S;
+    for (StringRef F : FuncsToUse)
+      if (Profiles.find(FunctionId(F)) == Profiles.end())
+        S.insert(F);
+    if (std::error_code EC = read(S, Profiles))
       return EC;
     return sampleprof_error::success;
-  };
+  }
 
   /// Read sample profiles for the given functions and write them to the given
   /// profile map. Currently it's only used for extended binary format to load
@@ -394,7 +397,7 @@ class SampleProfileReader {
   virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
                                SampleProfileMap &Profiles) {
     return sampleprof_error::not_implemented;
-  };
+  }
 
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
@@ -564,7 +567,7 @@ class SampleProfileReader {
   std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
       FuncMetadataIndex;
 
-  std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+  std::pair<const uint8_t *, const uint8_t *> ProfileSecRange;
 
   /// Whether the profile has attribute metadata.
   bool ProfileHasAttribute = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4c0a45bfb47cf8..71464e8dae65ce 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -731,7 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     break;
   }
   case SecLBRProfile:
-    LBRProfileSecRange = std::make_pair(Data, End);
+    ProfileSecRange = std::make_pair(Data, End);
     if (std::error_code EC = readFuncProfiles())
       return EC;
     break;
@@ -801,8 +801,8 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
 std::error_code
 SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
                                        SampleProfileMap &Profiles) {
-  Data = LBRProfileSecRange.first;
-  End = LBRProfileSecRange.second;
+  Data = ProfileSecRange.first;
+  End = ProfileSecRange.second;
   if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
     return EC;
   End = Data;

>From 11b5a6602f81942d59f4357b05a6a755a6212f33 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 19 Aug 2024 11:29:54 -0700
Subject: [PATCH 6/8] load profiles into the sample reader's profile map

---
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 77cede8744707b..afd5933e39eb42 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -416,18 +416,19 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   // callsites in one context may differ from those in another context. To get
   // the maximum number of callsites, we merge the function profiles from all
   // contexts, aka, the flattened profile to find profile anchors.
-  const auto *FSFlattened = getFlattenedSamplesFor(F);
-  if (SalvageUnusedProfile && !FSFlattened) {
+  const auto *FSForMatching = getFlattenedSamplesFor(F);
+  if (SalvageUnusedProfile && !FSForMatching) {
     // Apply the matching in place to find the new function's matched profile.
-    // TODO: For extended profile format, if a function profile is unused and
-    // it's top-level, even if the profile is matched, it's not found in the
-    // profile. This is because sample reader only read the used profile at the
-    // beginning, we need to support loading the profile on-demand in future.
     auto R = FuncToProfileNameMap.find(&F);
-    if (R != FuncToProfileNameMap.end())
-      FSFlattened = getFlattenedSamplesFor(R->second);
+    if (R != FuncToProfileNameMap.end()) {
+      FSForMatching = getFlattenedSamplesFor(R->second);
+      // Try to find the salvaged top-level profiles that are explicitly loaded
+      // for the matching, see "functionMatchesProfileHelper" for the details.
+      if (!FSForMatching)
+        FSForMatching = Reader.getSamplesFor(R->second.stringRef());
+    }
   }
-  if (!FSFlattened)
+  if (!FSForMatching)
     return;
 
   // Anchors for IR. It's a map from IR location to callee name, callee name is
@@ -438,7 +439,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   // Anchors for profile. It's a map from callsite location to a set of callee
   // name.
   AnchorMap ProfileAnchors;
-  findProfileAnchors(*FSFlattened, ProfileAnchors);
+  findProfileAnchors(*FSForMatching, ProfileAnchors);
 
   // Compute the callsite match states for profile staleness report.
   if (ReportProfileStaleness || PersistProfileStaleness)
@@ -449,7 +450,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   // For probe-based profiles, run matching only when profile checksum is
   // mismatched.
   bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
-                          !ProbeManager->profileIsValid(F, *FSFlattened);
+                          !ProbeManager->profileIsValid(F, *FSForMatching);
   bool RunCFGMatching =
       !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
   bool RunCGMatching = SalvageUnusedProfile;
@@ -787,30 +788,30 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   // two sequences are.
   float Similarity = 0.0;
 
-  const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+  const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
   // With extbinary profile format, initial profile loading only reads profile
   // based on current function names in the module.
   // However, if a function is renamed, sample loader skips to load its original
   // profile(which has a different name), we will miss this case. To address
   // this, we load the top-level profile candidate explicitly for the matching.
-  if (!FSFlattened && ReadToplevProfileforCGMatching) {
+  if (!FSForMatching && ReadToplevProfileforCGMatching) {
     DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
-    if (std::error_code EC = Reader.read(TopLevelFunc, FlattenedProfiles))
+    if (std::error_code EC = Reader.read(TopLevelFunc))
       return false;
-    FSFlattened = getFlattenedSamplesFor(ProfFunc);
+    FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
     LLVM_DEBUG({
-      if (FSFlattened)
+      if (FSForMatching)
         dbgs() << "Read top-level function " << ProfFunc
                << " for call-graph matching\n";
     });
   }
-  if (!FSFlattened)
+  if (!FSForMatching)
     return false;
   // The check for similarity or checksum may not be reliable if the function is
   // tiny, we use the number of basic block as a proxy for the function
   // complexity and skip the matching if it's too small.
   if (IRFunc.size() < MinFuncCountForCGMatching ||
-      FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
+      FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
     return false;
 
   // For probe-based function, we first trust the checksum info. If the checksum
@@ -818,7 +819,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   if (FunctionSamples::ProfileIsProbeBased) {
     const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
     if (FuncDesc &&
-        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
       LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
                         << "(IR) and " << ProfFunc << "(Profile) match.\n");
 
@@ -829,7 +830,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   AnchorMap IRAnchors;
   findIRAnchors(IRFunc, IRAnchors);
   AnchorMap ProfileAnchors;
-  findProfileAnchors(*FSFlattened, ProfileAnchors);
+  findProfileAnchors(*FSForMatching, ProfileAnchors);
 
   AnchorList FilteredIRAnchorsList;
   AnchorList FilteredProfileAnchorList;

>From 60440849a6e1cdb5ff7ceae1aa5a32c068fae60d Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 19 Aug 2024 17:59:29 -0700
Subject: [PATCH 7/8] make read function private

---
 .../llvm/ProfileData/SampleProfReader.h       | 29 ++++++++++---------
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 10 +++----
 .../pseudo-probe-stale-profile-toplev-func.ll |  4 +--
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index a93cf25e3f7f7a..6cab1195938888 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -391,14 +391,6 @@ class SampleProfileReader {
     return sampleprof_error::success;
   }
 
-  /// Read sample profiles for the given functions and write them to the given
-  /// profile map. Currently it's only used for extended binary format to load
-  /// the profiles on-demand.
-  virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
-                               SampleProfileMap &Profiles) {
-    return sampleprof_error::not_implemented;
-  }
-
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
 
@@ -554,6 +546,14 @@ class SampleProfileReader {
   /// Compute summary for this profile.
   void computeSummary();
 
+  /// Read sample profiles for the given functions and write them to the given
+  /// profile map. Currently it's only used for extended binary format to load
+  /// the profiles on-demand.
+  virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+                               SampleProfileMap &Profiles) {
+    return sampleprof_error::not_implemented;
+  }
+
   std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
 
   // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
@@ -832,18 +832,19 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// the reader has been given a module.
   bool collectFuncsFromModule() override;
 
+  std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
+    return std::move(ProfSymList);
+  };
+
+  void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
+
+private:
   /// Read the profiles on-demand for the given functions. This is used after
   /// stale call graph matching finds new functions whose profiles aren't loaded
   /// at the beginning and we need to loaded the profiles explicitly for
   /// potential matching.
   std::error_code read(const DenseSet<StringRef> &FuncsToUse,
                        SampleProfileMap &Profiles) override;
-
-  std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
-    return std::move(ProfSymList);
-  };
-
-  void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
 };
 
 class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index afd5933e39eb42..1c3d89bfc3b123 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -36,10 +36,10 @@ static cl::opt<unsigned> MinCallCountForCGMatching(
     cl::desc("The minimum number of call anchors required for a function to "
              "run stale profile call graph matching."));
 
-static cl::opt<bool> ReadToplevProfileforCGMatching(
-    "read-toplev-profile-for-cg-matching", cl::Hidden, cl::init(false),
+static cl::opt<bool> LoadFuncProfileforCGMatching(
+    "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
     cl::desc(
-        "Read top-level profiles that the sample reader initially skips for "
+        "Load top-level profiles that the sample reader initially skipped for "
         "the call-graph matching(only meaningful for extended binary format)"));
 
 extern cl::opt<bool> SalvageStaleProfile;
@@ -424,7 +424,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
       FSForMatching = getFlattenedSamplesFor(R->second);
       // Try to find the salvaged top-level profiles that are explicitly loaded
       // for the matching, see "functionMatchesProfileHelper" for the details.
-      if (!FSForMatching)
+      if (!FSForMatching && LoadFuncProfileforCGMatching)
         FSForMatching = Reader.getSamplesFor(R->second.stringRef());
     }
   }
@@ -794,7 +794,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   // However, if a function is renamed, sample loader skips to load its original
   // profile(which has a different name), we will miss this case. To address
   // this, we load the top-level profile candidate explicitly for the matching.
-  if (!FSForMatching && ReadToplevProfileforCGMatching) {
+  if (!FSForMatching && LoadFuncProfileforCGMatching) {
     DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
     if (std::error_code EC = Reader.read(TopLevelFunc))
       return false;
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
index 7b3fe9e047bd20..c839364f235536 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -1,8 +1,8 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --load-func-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
 ; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --load-func-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
 
 ; CHECK-TEXT: Run stale profile matching for main
 ; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching

>From 6fbb401bb1546374eb2a3ffa6bca5df182f5fdbe Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 27 Aug 2024 11:58:44 -0700
Subject: [PATCH 8/8] add space before (

---
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 1c3d89bfc3b123..0c676e8fb95fdb 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -40,7 +40,8 @@ static cl::opt<bool> LoadFuncProfileforCGMatching(
     "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
     cl::desc(
         "Load top-level profiles that the sample reader initially skipped for "
-        "the call-graph matching(only meaningful for extended binary format)"));
+        "the call-graph matching (only meaningful for extended binary "
+        "format)"));
 
 extern cl::opt<bool> SalvageStaleProfile;
 extern cl::opt<bool> SalvageUnusedProfile;



More information about the llvm-commits mailing list