[llvm] [SampleFDO] Read call-graph matching recovered top-level function profiless (PR #101053)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 29 11:17:02 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/101053

>From 070702c9be2fb437b0765532c03e98c642951906 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 29 Jul 2024 10:17:46 -0700
Subject: [PATCH] [SampleFDO] Read top-level functions recovered by call-graph
 matching

---
 .../llvm/ProfileData/SampleProfReader.h       |  47 ++++
 .../Transforms/IPO/SampleProfileMatcher.h     |   1 +
 llvm/lib/ProfileData/SampleProfReader.cpp     | 224 +++++++++------
 .../Transforms/IPO/SampleProfileMatcher.cpp   |  64 ++++-
 ...seudo-probe-stale-profile-toplev-func.prof |  23 ++
 .../pseudo-probe-stale-profile-toplev-func.ll | 258 ++++++++++++++++++
 6 files changed, 521 insertions(+), 96 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdc6525308d..b124233a02d11 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -392,6 +392,11 @@ class SampleProfileReader {
   /// which doesn't support loading function profiles on demand.
   virtual bool collectFuncsFromModule() { return false; }
 
+  virtual std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+                                       SampleProfileMap &Profiles) {
+    return sampleprof_error::not_implemented;
+  };
+
   /// Print all the profiles on stream \p OS.
   void dump(raw_ostream &OS = dbgs());
 
@@ -413,6 +418,16 @@ class SampleProfileReader {
     if (It != Profiles.end())
       return &It->second;
 
+    if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
+      auto R = FuncNameToProfNameMap->find(FunctionId(Fname));
+      if (R != FuncNameToProfNameMap->end()) {
+        Fname = R->second.stringRef();
+        auto It = Profiles.find(FunctionId(Fname));
+        if (It != Profiles.end())
+          return &It->second;
+      }
+    }
+
     if (Remapper) {
       if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
         auto It = Profiles.find(FunctionId(*NameInProfile));
@@ -494,6 +509,11 @@ class SampleProfileReader {
 
   void setModule(const Module *Mod) { M = Mod; }
 
+  void setFuncNameToProfNameMap(
+      HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FPMap) {
+    FuncNameToProfNameMap = FPMap;
+  }
+
 protected:
   /// Map every function to its associated profile.
   ///
@@ -522,6 +542,21 @@ class SampleProfileReader {
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
 
+  // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
+  // which maps the function name to the matched profile name. This is used
+  // for sample loader to look up profile using the new name.
+  HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+      *FuncNameToProfNameMap = nullptr;
+
+  // A map from a function's context hash to its meta data section range, used
+  // for on-demand read function profile metadata.
+  std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
+      FContextToMetaDataSecRange;
+
+  std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+
+  bool ProfileHasAttribute = false;
+
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
@@ -621,6 +656,8 @@ class SampleProfileReaderBinary : public SampleProfileReader {
 
   /// Read the next function profile instance.
   std::error_code readFuncProfile(const uint8_t *Start);
+  std::error_code readFuncProfile(const uint8_t *Start,
+                                  SampleProfileMap &Profiles);
 
   /// Read the contents of the given profile instance.
   std::error_code readProfile(FunctionSamples &FProfile);
@@ -720,11 +757,15 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   std::error_code readSecHdrTableEntry(uint64_t Idx);
   std::error_code readSecHdrTable();
 
+  std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute,
+                                           SampleProfileMap &Profiles);
   std::error_code readFuncMetadata(bool ProfileHasAttribute);
   std::error_code readFuncMetadata(bool ProfileHasAttribute,
                                    FunctionSamples *FProfile);
   std::error_code readFuncOffsetTable();
   std::error_code readFuncProfiles();
+  std::error_code readFuncProfiles(const DenseSet<StringRef> &FuncsToUse,
+                                   SampleProfileMap &Profiles);
   std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
   std::error_code readCSNameTableSec();
   std::error_code readProfileSymbolList();
@@ -776,6 +817,12 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// the reader has been given a module.
   bool collectFuncsFromModule() override;
 
+  /// Read the profiles on-demand for the given functions. This is used after
+  /// stale call graph matching finds new functions whose profiles aren't read
+  /// at the beginning and we need to re-read the profiles.
+  std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+                               SampleProfileMap &Profiles) override;
+
   std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);
   };
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index a67f158433391..67edea42e2fe1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -198,6 +198,7 @@ class SampleProfileMatcher {
   // function and all inlinees.
   void countMismatchedCallsiteSamples(const FunctionSamples &FS);
   void computeAndReportProfileStaleness();
+  void UpdateSampleLoaderWithRecoveredProfiles();
 
   LocToLocMap &getIRToProfileLocationMap(const Function &F) {
     auto Ret = FuncMappings.try_emplace(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4752465fc072e..f555da866f36e 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -653,7 +653,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
 }
 
 std::error_code
-SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
+                                           SampleProfileMap &Profiles) {
   Data = Start;
   auto NumHeadSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumHeadSamples.getError())
@@ -678,6 +679,11 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   return sampleprof_error::success;
 }
 
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+  return readFuncProfile(Start, Profiles);
+}
+
 std::error_code SampleProfileReaderBinary::readImpl() {
   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
@@ -725,6 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     break;
   }
   case SecLBRProfile:
+    LBRProfileSecRange = std::make_pair(Data, End);
     if (std::error_code EC = readFuncProfiles())
       return EC;
     break;
@@ -745,9 +752,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     ProfileIsProbeBased =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
-    bool HasAttribute =
+    ProfileHasAttribute =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
-    if (std::error_code EC = readFuncMetadata(HasAttribute))
+    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
       return EC;
     break;
   }
@@ -791,6 +798,19 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
   return false;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readOnDemand(
+    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+  Data = LBRProfileSecRange.first;
+  End = LBRProfileSecRange.second;
+  if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+    return EC;
+  End = Data;
+
+  if (std::error_code EC =
+          readFuncMetadataOnDemand(ProfileHasAttribute, Profiles))
+    return EC;
+  return sampleprof_error::success;
+}
 
 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
   if (!M)
@@ -838,6 +858,95 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
  return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
+    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+  const uint8_t *Start = Data;
+
+  if (Remapper) {
+    for (auto Name : FuncsToUse) {
+      Remapper->insert(Name);
+    }
+  }
+
+  if (ProfileIsCS) {
+    assert(useFuncOffsetList());
+    DenseSet<uint64_t> FuncGuidsToUse;
+    if (useMD5()) {
+      for (auto Name : FuncsToUse)
+        FuncGuidsToUse.insert(Function::getGUID(Name));
+    }
+
+    // For each function in current module, load all context profiles for
+    // the function as well as their callee contexts which can help profile
+    // guided importing for ThinLTO. This can be achieved by walking
+    // through an ordered context container, where contexts are laid out
+    // as if they were walked in preorder of a context trie. While
+    // traversing the trie, a link to the highest common ancestor node is
+    // kept so that all of its decendants will be loaded.
+    const SampleContext *CommonContext = nullptr;
+    for (const auto &NameOffset : FuncOffsetList) {
+      const auto &FContext = NameOffset.first;
+      FunctionId FName = FContext.getFunction();
+      StringRef FNameString;
+      if (!useMD5())
+        FNameString = FName.stringRef();
+
+      // For function in the current module, keep its farthest ancestor
+      // context. This can be used to load itself and its child and
+      // sibling contexts.
+      if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
+          (!useMD5() && (FuncsToUse.count(FNameString) ||
+                         (Remapper && Remapper->exist(FNameString))))) {
+        if (!CommonContext || !CommonContext->isPrefixOf(FContext))
+          CommonContext = &FContext;
+      }
+
+      if (CommonContext == &FContext ||
+          (CommonContext && CommonContext->isPrefixOf(FContext))) {
+        // Load profile for the current context which originated from
+        // the common ancestor.
+        const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+          return EC;
+      }
+    }
+  } else if (useMD5()) {
+    assert(!useFuncOffsetList());
+    for (auto Name : FuncsToUse) {
+      auto GUID = MD5Hash(Name);
+      auto iter = FuncOffsetTable.find(GUID);
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+  } else if (Remapper) {
+    assert(useFuncOffsetList());
+    for (auto NameOffset : FuncOffsetList) {
+      SampleContext FContext(NameOffset.first);
+      auto FuncName = FContext.getFunction();
+      StringRef FuncNameStr = FuncName.stringRef();
+      if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
+        continue;
+      const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+  } else {
+    assert(!useFuncOffsetList());
+    for (auto Name : FuncsToUse) {
+
+      auto iter = FuncOffsetTable.find(MD5Hash(Name));
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+  }
+}
+
 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
   // Collect functions used by current module if the Reader has been
   // given a module.
@@ -849,7 +958,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
 
   // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
   // profiles.
-  const uint8_t *Start = Data;
   if (!LoadFuncsToBeUsed) {
     while (Data < End) {
       if (std::error_code EC = readFuncProfile(Data))
@@ -858,88 +966,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
     assert(Data == End && "More data is read than expected");
   } else {
     // Load function profiles on demand.
-    if (Remapper) {
-      for (auto Name : FuncsToUse) {
-        Remapper->insert(Name);
-      }
-    }
-
-    if (ProfileIsCS) {
-      assert(useFuncOffsetList());
-      DenseSet<uint64_t> FuncGuidsToUse;
-      if (useMD5()) {
-        for (auto Name : FuncsToUse)
-          FuncGuidsToUse.insert(Function::getGUID(Name));
-      }
-
-      // For each function in current module, load all context profiles for
-      // the function as well as their callee contexts which can help profile
-      // guided importing for ThinLTO. This can be achieved by walking
-      // through an ordered context container, where contexts are laid out
-      // as if they were walked in preorder of a context trie. While
-      // traversing the trie, a link to the highest common ancestor node is
-      // kept so that all of its decendants will be loaded.
-      const SampleContext *CommonContext = nullptr;
-      for (const auto &NameOffset : FuncOffsetList) {
-        const auto &FContext = NameOffset.first;
-        FunctionId FName = FContext.getFunction();
-        StringRef FNameString;
-        if (!useMD5())
-          FNameString = FName.stringRef();
-
-        // For function in the current module, keep its farthest ancestor
-        // context. This can be used to load itself and its child and
-        // sibling contexts.
-        if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
-            (!useMD5() && (FuncsToUse.count(FNameString) ||
-                           (Remapper && Remapper->exist(FNameString))))) {
-          if (!CommonContext || !CommonContext->isPrefixOf(FContext))
-            CommonContext = &FContext;
-        }
-
-        if (CommonContext == &FContext ||
-            (CommonContext && CommonContext->isPrefixOf(FContext))) {
-          // Load profile for the current context which originated from
-          // the common ancestor.
-          const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-            return EC;
-        }
-      }
-    } else if (useMD5()) {
-      assert(!useFuncOffsetList());
-      for (auto Name : FuncsToUse) {
-        auto GUID = MD5Hash(Name);
-        auto iter = FuncOffsetTable.find(GUID);
-        if (iter == FuncOffsetTable.end())
-          continue;
-        const uint8_t *FuncProfileAddr = Start + iter->second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    } else if (Remapper) {
-      assert(useFuncOffsetList());
-      for (auto NameOffset : FuncOffsetList) {
-        SampleContext FContext(NameOffset.first);
-        auto FuncName = FContext.getFunction();
-        StringRef FuncNameStr = FuncName.stringRef();
-        if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
-          continue;
-        const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    } else {
-      assert(!useFuncOffsetList());
-      for (auto Name : FuncsToUse) {
-        auto iter = FuncOffsetTable.find(MD5Hash(Name));
-        if (iter == FuncOffsetTable.end())
-          continue;
-        const uint8_t *FuncProfileAddr = Start + iter->second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    }
+    if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+      return EC;
     Data = End;
   }
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
@@ -1245,6 +1273,27 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand(
+    bool ProfileHasAttribute, SampleProfileMap &Profiles) {
+  if (FContextToMetaDataSecRange.empty())
+    return sampleprof_error::success;
+
+  for (auto &I : Profiles) {
+    FunctionSamples *FProfile = &I.second;
+    auto R =
+        FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode());
+    if (R == FContextToMetaDataSecRange.end())
+      continue;
+
+    Data = R->second.first;
+    End = R->second.second;
+    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
+      return EC;
+    assert(Data == End && "More data is read than expected");
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
@@ -1257,8 +1306,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
     if (It != Profiles.end())
       FProfile = &It->second;
 
+    const uint8_t *Start = Data;
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
       return EC;
+
+    FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data};
   }
 
   assert(Data == End && "More data is read than expected");
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 312672e56b017..b9adc6a0631b8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,6 +782,26 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   float Similarity = 0.0;
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+  // Check if the function is top-level function. For extended profile format,
+  // if a function profile is unused and it's top-level, even if the profile is
+  // matched, it's not found in the profile. This is because sample reader only
+  // read the used profile at the beginning, we need to read the profile
+  // on-demand. Also save it into the FlattenedProfiles for future look-up.
+  if (!FSFlattened) {
+    DenseSet<StringRef> TopLevelFunc;
+    TopLevelFunc.insert(ProfFunc.stringRef());
+    SampleProfileMap TopLevelProfile;
+    Reader.readOnDemand(TopLevelFunc, TopLevelProfile);
+    assert(TopLevelProfile.size() <= 1 &&
+           "More than one profile is found for top-level function");
+    if (!TopLevelProfile.empty()) {
+      LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc
+                        << " for call-graph matching\n");
+      auto &FS = TopLevelProfile.begin()->second;
+      FSFlattened =
+          &(FlattenedProfiles.create(FS.getContext()) = std::move(FS));
+    }
+  }
   if (!FSFlattened)
     return false;
   // The check for similarity or checksum may not be reliable if the function is
@@ -863,6 +883,39 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
   return Matched;
 }
 
+void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
+  DenseSet<StringRef> RecoveredFuncs;
+  // Update FuncNameToProfNameMap and SymbolMap.
+  for (auto &I : FuncToProfileNameMap) {
+    assert(I.first && "New function is null");
+    FunctionId FuncName(I.first->getName());
+    RecoveredFuncs.insert(I.second.stringRef());
+    FuncNameToProfNameMap->emplace(FuncName, I.second);
+
+    // We need to remove the old entry to avoid duplicating the function
+    // processing.
+    SymbolMap->erase(FuncName);
+    SymbolMap->emplace(I.second, I.first);
+  }
+
+  // Read the top-level profiles for the recovered function profiles. This is
+  // because in extended binary format it only loads the top-level profile for
+  // the functions in the new build but not the recovered functions which is
+  // from the old build.
+  SampleProfileMap TopLevelRecoveredProfiles;
+  Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles);
+  auto &Profiles = Reader.getProfiles();
+  for (auto &I : TopLevelRecoveredProfiles) {
+    LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction()
+                      << " is recovered and re-read by the sample reader.\n");
+    auto &Ctx = I.second.getContext();
+    assert(Profiles.find(Ctx) == Profiles.end() &&
+           "Top level profile is found for the unused profile");
+    Profiles.create(Ctx) = std::move(I.second);
+  }
+  Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
+}
+
 void SampleProfileMatcher::runOnModule() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
@@ -880,17 +933,8 @@ void SampleProfileMatcher::runOnModule() {
     runOnFunction(*F);
   }
 
-  // Update the data in SampleLoader.
   if (SalvageUnusedProfile)
-    for (auto &I : FuncToProfileNameMap) {
-      assert(I.first && "New function is null");
-      FunctionId FuncName(I.first->getName());
-      FuncNameToProfNameMap->emplace(FuncName, I.second);
-      // We need to remove the old entry to avoid duplicating the function
-      // processing.
-      SymbolMap->erase(FuncName);
-      SymbolMap->emplace(I.second, I.first);
-    }
+    UpdateSampleLoaderWithRecoveredProfiles();
 
   if (SalvageStaleProfile)
     distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
new file mode 100644
index 0000000000000..a1bba5fc88de0
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
@@ -0,0 +1,23 @@
+foo:2724522:51
+ 1: 51
+ 2: 452674
+ 3: 47
+ 4: 497875
+ 6: 415959
+ 10: 452623
+ 11: 452687 bar:452687
+ 12: 452623
+ 13: 47
+ !CFGChecksum: 281718392333557
+bar:452687:452687
+ 1: 452687
+ !CFGChecksum: 4294967295
+main:204:0
+ 1: 0
+ 2: 51
+ 3: 0
+ 4: 51
+ 5: 51 foo:51
+ 6: 51
+ 7: 0
+ !CFGChecksum: 281582264815352
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
new file mode 100644
index 0000000000000..f1f2506e08d2a
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -0,0 +1,258 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+
+; CHECK-TEXT: Run stale profile matching for main
+; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching
+; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match.
+; CHECK-TEXT: Function:foo_rename matches profile:foo
+; CHECK-TEXT: Run stale profile matching for foo_rename
+; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader.
+; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
+
+; CHECK-TEXT: Processing Function main
+; CHECK-TEXT:     5:  call void @foo_rename(), !dbg ![[#]] - weight: 51
+; CHECK-TEXT: Processing Function foo_rename
+; CHECK-TEXT:     11:  %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+
+
+; CHECK-EXTBIN: Run stale profile matching for main
+; CHECK-EXTBIN: Read top-level function foo for call-graph matching
+; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match.
+; CHECK-EXTBIN: Function:foo_rename matches profile:foo
+; CHECK-EXTBIN: Run stale profile matching for foo_rename
+; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader.
+; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
+
+; CHECK-EXTBIN: Processing Function main
+; CHECK-EXTBIN:     5:  call void @foo_rename(), !dbg ![[#]] - weight: 51
+; CHECK-EXTBIN: Processing Function foo_rename
+; CHECK-EXTBIN:     11:  %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
+entry:
+    #dbg_value(i32 %x, !22, !DIExpression(), !23)
+  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
+  %add = add nsw i32 %x, 1, !dbg !25
+  ret i32 %add, !dbg !26
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @foo_rename() #0 !dbg !27 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33
+    #dbg_value(i32 0, !31, !DIExpression(), !34)
+  br label %for.cond, !dbg !35
+
+for.cond:                                         ; preds = %if.end7, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36
+    #dbg_value(i32 %i.0, !31, !DIExpression(), !34)
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37
+  %cmp = icmp slt i32 %i.0, 10000, !dbg !39
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42
+  ret void, !dbg !42
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43
+  %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46
+  %rem = srem i32 %0, 3, !dbg !50
+  %cmp1 = icmp eq i32 %rem, 1, !dbg !51
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !52
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53
+  %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46
+  %add = add nsw i32 %1, 100, !dbg !53
+  store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46
+  br label %if.end7, !dbg !54
+
+if.else:                                          ; preds = %for.body
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55
+  %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46
+  %rem2 = srem i32 %2, 2, !dbg !57
+  %cmp3 = icmp eq i32 %rem2, 1, !dbg !58
+  br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59
+
+if.then4:                                         ; preds = %if.else
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60
+  %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46
+  %add5 = add nsw i32 %3, 10, !dbg !60
+  store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46
+  br label %if.end7, !dbg !61
+
+if.else6:                                         ; preds = %if.else
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62
+  %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46
+  %inc = add nsw i32 %4, 1, !dbg !62
+  store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.then4, %if.else6, %if.then
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63
+  %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46
+  %call = call i32 @bar(i32 noundef %5), !dbg !64
+  %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46
+  %add8 = add nsw i32 %6, %call, !dbg !66
+  store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46
+  call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67
+  %inc9 = add nsw i32 %i.0, 1, !dbg !67
+    #dbg_value(i32 %inc9, !31, !DIExpression(), !34)
+  br label %for.cond, !dbg !68, !llvm.loop !69
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #2 !dbg !72 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78
+    #dbg_value(i32 0, !76, !DIExpression(), !79)
+  br label %for.cond, !dbg !80
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81
+    #dbg_value(i32 %i.0, !76, !DIExpression(), !79)
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82
+  %cmp = icmp slt i32 %i.0, 100000, !dbg !84
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87
+  ret i32 0, !dbg !87
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88
+  call void @foo_rename(), !dbg !90
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92
+  %inc = add nsw i32 %i.0, 1, !dbg !92
+    #dbg_value(i32 %inc, !76, !DIExpression(), !79)
+  br label %for.cond, !dbg !93, !llvm.loop !94
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
+
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 20.0.0"}
+!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"}
+!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
+!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!6, !6}
+!21 = !{!22}
+!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
+!23 = !DILocation(line: 0, scope: !18)
+!24 = !DILocation(line: 4, column: 10, scope: !18)
+!25 = !DILocation(line: 4, column: 12, scope: !18)
+!26 = !DILocation(line: 4, column: 3, scope: !18)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null}
+!30 = !{!31}
+!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6)
+!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3)
+!33 = !DILocation(line: 8, column: 12, scope: !32)
+!34 = !DILocation(line: 0, scope: !32)
+!35 = !DILocation(line: 8, column: 8, scope: !32)
+!36 = !DILocation(line: 8, scope: !32)
+!37 = !DILocation(line: 8, column: 19, scope: !38)
+!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3)
+!39 = !DILocation(line: 8, column: 21, scope: !38)
+!40 = !DILocation(line: 8, column: 3, scope: !32)
+!41 = !DILocation(line: 0, scope: !27)
+!42 = !DILocation(line: 17, column: 1, scope: !27)
+!43 = !DILocation(line: 9, column: 10, scope: !44)
+!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10)
+!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39)
+!46 = !{!47, !47, i64 0}
+!47 = !{!"int", !48, i64 0}
+!48 = !{!"omnipotent char", !49, i64 0}
+!49 = !{!"Simple C/C++ TBAA"}
+!50 = !DILocation(line: 9, column: 12, scope: !44)
+!51 = !DILocation(line: 9, column: 16, scope: !44)
+!52 = !DILocation(line: 9, column: 10, scope: !45)
+!53 = !DILocation(line: 10, column: 10, scope: !44)
+!54 = !DILocation(line: 10, column: 8, scope: !44)
+!55 = !DILocation(line: 11, column: 16, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16)
+!57 = !DILocation(line: 11, column: 18, scope: !56)
+!58 = !DILocation(line: 11, column: 22, scope: !56)
+!59 = !DILocation(line: 11, column: 16, scope: !44)
+!60 = !DILocation(line: 12, column: 10, scope: !56)
+!61 = !DILocation(line: 12, column: 8, scope: !56)
+!62 = !DILocation(line: 14, column: 9, scope: !56)
+!63 = !DILocation(line: 15, column: 15, scope: !45)
+!64 = !DILocation(line: 15, column: 11, scope: !65)
+!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079)
+!66 = !DILocation(line: 15, column: 8, scope: !45)
+!67 = !DILocation(line: 8, column: 35, scope: !38)
+!68 = !DILocation(line: 8, column: 3, scope: !38)
+!69 = distinct !{!69, !40, !70, !71}
+!70 = !DILocation(line: 16, column: 3, scope: !32)
+!71 = !{!"llvm.loop.mustprogress"}
+!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75)
+!73 = !DISubroutineType(types: !74)
+!74 = !{!6}
+!75 = !{!76}
+!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6)
+!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3)
+!78 = !DILocation(line: 20, column: 12, scope: !77)
+!79 = !DILocation(line: 0, scope: !77)
+!80 = !DILocation(line: 20, column: 8, scope: !77)
+!81 = !DILocation(line: 20, scope: !77)
+!82 = !DILocation(line: 20, column: 19, scope: !83)
+!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3)
+!84 = !DILocation(line: 20, column: 21, scope: !83)
+!85 = !DILocation(line: 20, column: 3, scope: !77)
+!86 = !DILocation(line: 0, scope: !72)
+!87 = !DILocation(line: 23, column: 1, scope: !72)
+!88 = !DILocation(line: 21, column: 7, scope: !89)
+!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40)
+!90 = !DILocation(line: 21, column: 7, scope: !91)
+!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031)
+!92 = !DILocation(line: 20, column: 36, scope: !83)
+!93 = !DILocation(line: 20, column: 3, scope: !83)
+!94 = distinct !{!94, !85, !95, !71}
+!95 = !DILocation(line: 22, column: 3, scope: !77)



More information about the llvm-commits mailing list