[llvm] [SampleFDO] Read call-graph matching recovered top-level function profiless (PR #101053)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 29 11:09:39 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Lei Wang (wlei-llvm)

<details>
<summary>Changes</summary>

In the extended binary format, the top-level function profile is only read when the functions shows in the current module. However, if a top-level function is renamed, it's never read in the profile, call graph matching will miss this case. Therefore, this patch adds the support to read top-level functions if they are potential candidates for call-graph matching. If a match is found later, the function will be further preserved for use by the sample loader. 

To do this, we need to tweak the current sample profile reader to support read the profile on-demand by given functions(instead of module), so I added a new function `readOndemand` for this, and because the metadata is read separately from the function profile, this also includes to read the metadata on-demand(`readFuncMetadataOnDemand`).

---

Patch is 36.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101053.diff


6 Files Affected:

- (modified) llvm/include/llvm/ProfileData/SampleProfReader.h (+47) 
- (modified) llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h (+1) 
- (modified) llvm/lib/ProfileData/SampleProfReader.cpp (+148-96) 
- (modified) llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (+54-10) 
- (added) llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof (+23) 
- (added) llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll (+258) 


``````````diff
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdc6525308d..b124233a02d11 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -392,6 +392,11 @@ class SampleProfileReader {
   /// which doesn't support loading function profiles on demand.
   virtual bool collectFuncsFromModule() { return false; }
 
+  virtual std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+                                       SampleProfileMap &Profiles) {
+    return sampleprof_error::not_implemented;
+  };
+
   /// Print all the profiles on stream \p OS.
   void dump(raw_ostream &OS = dbgs());
 
@@ -413,6 +418,16 @@ class SampleProfileReader {
     if (It != Profiles.end())
       return &It->second;
 
+    if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
+      auto R = FuncNameToProfNameMap->find(FunctionId(Fname));
+      if (R != FuncNameToProfNameMap->end()) {
+        Fname = R->second.stringRef();
+        auto It = Profiles.find(FunctionId(Fname));
+        if (It != Profiles.end())
+          return &It->second;
+      }
+    }
+
     if (Remapper) {
       if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
         auto It = Profiles.find(FunctionId(*NameInProfile));
@@ -494,6 +509,11 @@ class SampleProfileReader {
 
   void setModule(const Module *Mod) { M = Mod; }
 
+  void setFuncNameToProfNameMap(
+      HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FPMap) {
+    FuncNameToProfNameMap = FPMap;
+  }
+
 protected:
   /// Map every function to its associated profile.
   ///
@@ -522,6 +542,21 @@ class SampleProfileReader {
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
 
+  // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
+  // which maps the function name to the matched profile name. This is used
+  // for sample loader to look up profile using the new name.
+  HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+      *FuncNameToProfNameMap = nullptr;
+
+  // A map from a function's context hash to its meta data section range, used
+  // for on-demand read function profile metadata.
+  std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
+      FContextToMetaDataSecRange;
+
+  std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+
+  bool ProfileHasAttribute = false;
+
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
@@ -621,6 +656,8 @@ class SampleProfileReaderBinary : public SampleProfileReader {
 
   /// Read the next function profile instance.
   std::error_code readFuncProfile(const uint8_t *Start);
+  std::error_code readFuncProfile(const uint8_t *Start,
+                                  SampleProfileMap &Profiles);
 
   /// Read the contents of the given profile instance.
   std::error_code readProfile(FunctionSamples &FProfile);
@@ -720,11 +757,15 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   std::error_code readSecHdrTableEntry(uint64_t Idx);
   std::error_code readSecHdrTable();
 
+  std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute,
+                                           SampleProfileMap &Profiles);
   std::error_code readFuncMetadata(bool ProfileHasAttribute);
   std::error_code readFuncMetadata(bool ProfileHasAttribute,
                                    FunctionSamples *FProfile);
   std::error_code readFuncOffsetTable();
   std::error_code readFuncProfiles();
+  std::error_code readFuncProfiles(const DenseSet<StringRef> &FuncsToUse,
+                                   SampleProfileMap &Profiles);
   std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
   std::error_code readCSNameTableSec();
   std::error_code readProfileSymbolList();
@@ -776,6 +817,12 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// the reader has been given a module.
   bool collectFuncsFromModule() override;
 
+  /// Read the profiles on-demand for the given functions. This is used after
+  /// stale call graph matching finds new functions whose profiles aren't read
+  /// at the beginning and we need to re-read the profiles.
+  std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+                               SampleProfileMap &Profiles) override;
+
   std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);
   };
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index a67f158433391..67edea42e2fe1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -198,6 +198,7 @@ class SampleProfileMatcher {
   // function and all inlinees.
   void countMismatchedCallsiteSamples(const FunctionSamples &FS);
   void computeAndReportProfileStaleness();
+  void UpdateSampleLoaderWithRecoveredProfiles();
 
   LocToLocMap &getIRToProfileLocationMap(const Function &F) {
     auto Ret = FuncMappings.try_emplace(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4752465fc072e..1fe89b9a48832 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -653,7 +653,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
 }
 
 std::error_code
-SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
+                                           SampleProfileMap &Profiles) {
   Data = Start;
   auto NumHeadSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumHeadSamples.getError())
@@ -678,6 +679,11 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   return sampleprof_error::success;
 }
 
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+  return readFuncProfile(Start, Profiles);
+}
+
 std::error_code SampleProfileReaderBinary::readImpl() {
   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
@@ -725,6 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     break;
   }
   case SecLBRProfile:
+    LBRProfileSecRange = std::make_pair(Data, End);
     if (std::error_code EC = readFuncProfiles())
       return EC;
     break;
@@ -745,9 +752,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     ProfileIsProbeBased =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
-    bool HasAttribute =
+    ProfileHasAttribute =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
-    if (std::error_code EC = readFuncMetadata(HasAttribute))
+    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
       return EC;
     break;
   }
@@ -791,6 +798,19 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
   return false;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readOnDemand(
+    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+  Data = LBRProfileSecRange.first;
+  End = LBRProfileSecRange.second;
+  if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+    return EC;
+  End = Data;
+
+  if (std::error_code EC =
+          readFuncMetadataOnDemand(ProfileHasAttribute, Profiles))
+    return EC;
+  return sampleprof_error::success;
+}
 
 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
   if (!M)
@@ -838,110 +858,118 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
  return sampleprof_error::success;
 }
 
-std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
-  // Collect functions used by current module if the Reader has been
-  // given a module.
-  // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
-  // which will query FunctionSamples::HasUniqSuffix, so it has to be
-  // called after FunctionSamples::HasUniqSuffix is set, i.e. after
-  // NameTable section is read.
-  bool LoadFuncsToBeUsed = collectFuncsFromModule();
-
-  // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
-  // profiles.
-  const uint8_t *Start = Data;
-  if (!LoadFuncsToBeUsed) {
-    while (Data < End) {
-      if (std::error_code EC = readFuncProfile(Data))
-        return EC;
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
+    const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+ const uint8_t *Start = Data;
+
+ if (Remapper) {
+    for (auto Name : FuncsToUse) {
+      Remapper->insert(Name);
     }
-    assert(Data == End && "More data is read than expected");
-  } else {
-    // Load function profiles on demand.
-    if (Remapper) {
-      for (auto Name : FuncsToUse) {
-        Remapper->insert(Name);
-      }
+ }
+
+ if (ProfileIsCS) {
+    assert(useFuncOffsetList());
+    DenseSet<uint64_t> FuncGuidsToUse;
+    if (useMD5()) {
+      for (auto Name : FuncsToUse)
+        FuncGuidsToUse.insert(Function::getGUID(Name));
     }
 
-    if (ProfileIsCS) {
-      assert(useFuncOffsetList());
-      DenseSet<uint64_t> FuncGuidsToUse;
-      if (useMD5()) {
-        for (auto Name : FuncsToUse)
-          FuncGuidsToUse.insert(Function::getGUID(Name));
+    // For each function in current module, load all context profiles for
+    // the function as well as their callee contexts which can help profile
+    // guided importing for ThinLTO. This can be achieved by walking
+    // through an ordered context container, where contexts are laid out
+    // as if they were walked in preorder of a context trie. While
+    // traversing the trie, a link to the highest common ancestor node is
+    // kept so that all of its decendants will be loaded.
+    const SampleContext *CommonContext = nullptr;
+    for (const auto &NameOffset : FuncOffsetList) {
+      const auto &FContext = NameOffset.first;
+      FunctionId FName = FContext.getFunction();
+      StringRef FNameString;
+      if (!useMD5())
+        FNameString = FName.stringRef();
+
+      // For function in the current module, keep its farthest ancestor
+      // context. This can be used to load itself and its child and
+      // sibling contexts.
+      if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
+          (!useMD5() && (FuncsToUse.count(FNameString) ||
+                         (Remapper && Remapper->exist(FNameString))))) {
+        if (!CommonContext || !CommonContext->isPrefixOf(FContext))
+          CommonContext = &FContext;
       }
 
-      // For each function in current module, load all context profiles for
-      // the function as well as their callee contexts which can help profile
-      // guided importing for ThinLTO. This can be achieved by walking
-      // through an ordered context container, where contexts are laid out
-      // as if they were walked in preorder of a context trie. While
-      // traversing the trie, a link to the highest common ancestor node is
-      // kept so that all of its decendants will be loaded.
-      const SampleContext *CommonContext = nullptr;
-      for (const auto &NameOffset : FuncOffsetList) {
-        const auto &FContext = NameOffset.first;
-        FunctionId FName = FContext.getFunction();
-        StringRef FNameString;
-        if (!useMD5())
-          FNameString = FName.stringRef();
-
-        // For function in the current module, keep its farthest ancestor
-        // context. This can be used to load itself and its child and
-        // sibling contexts.
-        if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
-            (!useMD5() && (FuncsToUse.count(FNameString) ||
-                           (Remapper && Remapper->exist(FNameString))))) {
-          if (!CommonContext || !CommonContext->isPrefixOf(FContext))
-            CommonContext = &FContext;
-        }
-
-        if (CommonContext == &FContext ||
-            (CommonContext && CommonContext->isPrefixOf(FContext))) {
-          // Load profile for the current context which originated from
-          // the common ancestor.
-          const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-            return EC;
-        }
-      }
-    } else if (useMD5()) {
-      assert(!useFuncOffsetList());
-      for (auto Name : FuncsToUse) {
-        auto GUID = MD5Hash(Name);
-        auto iter = FuncOffsetTable.find(GUID);
-        if (iter == FuncOffsetTable.end())
-          continue;
-        const uint8_t *FuncProfileAddr = Start + iter->second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
-    } else if (Remapper) {
-      assert(useFuncOffsetList());
-      for (auto NameOffset : FuncOffsetList) {
-        SampleContext FContext(NameOffset.first);
-        auto FuncName = FContext.getFunction();
-        StringRef FuncNameStr = FuncName.stringRef();
-        if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
-          continue;
+      if (CommonContext == &FContext ||
+          (CommonContext && CommonContext->isPrefixOf(FContext))) {
+        // Load profile for the current context which originated from
+        // the common ancestor.
         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
           return EC;
       }
-    } else {
-      assert(!useFuncOffsetList());
-      for (auto Name : FuncsToUse) {
-        auto iter = FuncOffsetTable.find(MD5Hash(Name));
-        if (iter == FuncOffsetTable.end())
-          continue;
-        const uint8_t *FuncProfileAddr = Start + iter->second;
-        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-          return EC;
-      }
     }
+ } else if (useMD5()) {
+    assert(!useFuncOffsetList());
+    for (auto Name : FuncsToUse) {
+      auto GUID = MD5Hash(Name);
+      auto iter = FuncOffsetTable.find(GUID);
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+ } else if (Remapper) {
+    assert(useFuncOffsetList());
+    for (auto NameOffset : FuncOffsetList) {
+      SampleContext FContext(NameOffset.first);
+      auto FuncName = FContext.getFunction();
+      StringRef FuncNameStr = FuncName.stringRef();
+      if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
+        continue;
+      const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+ } else {
+    assert(!useFuncOffsetList());
+    for (auto Name : FuncsToUse) {
+
+      auto iter = FuncOffsetTable.find(MD5Hash(Name));
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+        return EC;
+    }
+ }
+}
+
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
+ // Collect functions used by current module if the Reader has been
+ // given a module.
+ // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
+ // which will query FunctionSamples::HasUniqSuffix, so it has to be
+ // called after FunctionSamples::HasUniqSuffix is set, i.e. after
+ // NameTable section is read.
+ bool LoadFuncsToBeUsed = collectFuncsFromModule();
+
+ // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
+ // profiles.
+ if (!LoadFuncsToBeUsed) {
+    while (Data < End) {
+      if (std::error_code EC = readFuncProfile(Data))
+        return EC;
+    }
+    assert(Data == End && "More data is read than expected");
+ } else {
+    // Load function profiles on demand.
+    if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+      return EC;
     Data = End;
-  }
+ }
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
   assert((!CSProfileCount || ProfileIsCS) &&
@@ -1245,6 +1273,27 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand(
+    bool ProfileHasAttribute, SampleProfileMap &Profiles) {
+  if (FContextToMetaDataSecRange.empty())
+    return sampleprof_error::success;
+
+  for (auto &I : Profiles) {
+    FunctionSamples *FProfile = &I.second;
+    auto R =
+        FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode());
+    if (R == FContextToMetaDataSecRange.end())
+      continue;
+
+    Data = R->second.first;
+    End = R->second.second;
+    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
+      return EC;
+    assert(Data == End && "More data is read than expected");
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
@@ -1257,8 +1306,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
     if (It != Profiles.end())
       FProfile = &It->second;
 
+    const uint8_t *Start = Data;
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
       return EC;
+
+    FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data};
   }
 
   assert(Data == End && "More data is read than expected");
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 312672e56b017..b9adc6a0631b8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,6 +782,26 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   float Similarity = 0.0;
 
   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+  // Check if the function is top-level function. For extended profile format,
+  // if a function profile is unused and it's top-level, even if the profile is
+  // matched, it's not found in the profile. This is because sample reader only
+  // read the used profile at the beginning, we need to read the profile
+  // on-demand. Also save it into the FlattenedProfiles for future look-up.
+  if (!FSFlattened) {
+    DenseSet<StringRef> TopLevelFunc;
+    TopLevelFunc.insert(ProfFunc.stringRef());
+    SampleProfileMap TopLevelProfile;
+    Reader.readOnDemand(TopLevelFunc, TopLevelProfile);
+    assert(TopLevelProfile.size() <= 1 &&
+           "More than one profile is found for top-level function");
+    if (!TopLevelProfile.empty()) {
+      LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc
+                        << " for call-graph matching\n");
+      auto &FS = TopLevelProfile.begin()->second;
+      FSFlattened =
+          &(FlattenedProfiles.create(FS.getContext()) = std::move(FS));
+    }
+  }
   if (!FSFlattened)
     return false;
   // The check for similarity or checksum may not be reliable if the function is
@@ -863,6 +883,39 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
   return Matched;
 }
 
+void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
+  DenseSet<StringRef> RecoveredFuncs;
+  // Update FuncNameToProfNameMap and SymbolMap.
+  for (auto &I : FuncToProfileNameMap) {
+    assert(I.first && "New function is null");
+    FunctionId FuncName(I.first->getName());
+    RecoveredFuncs.insert(I.second.stringRef());
+    FuncNameToProfNameMap->emplace(FuncName, I.second);
+
+    // We need to remove the old entry to avoid duplicating the function
+    // processing.
+    SymbolMap->erase(FuncName);
+    SymbolMap->emplace(I.second, I.first);
+  }
+
+  // Read the top-level profiles for the recovered function profiles. This is
+  // because in extended binary format it only loads the top-level profile for
+  // the functions in the new build but not the recovered functions which is
+  // from the old bu...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/101053


More information about the llvm-commits mailing list