[llvm] c9a8a0e - Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase FDO build speed using MD5 as key to Sample Profile map"

Douglas Yung via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 23 17:59:11 PDT 2023


Author: Douglas Yung
Date: 2023-06-23T17:58:22-07:00
New Revision: c9a8a0e8a9b267fae0352af352e9c735fef019f7

URL: https://github.com/llvm/llvm-project/commit/c9a8a0e8a9b267fae0352af352e9c735fef019f7
DIFF: https://github.com/llvm/llvm-project/commit/c9a8a0e8a9b267fae0352af352e9c735fef019f7.diff

LOG: Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase FDO build speed using MD5 as key to Sample Profile map"

This reverts commit 31af18bccea95fe1ae8aa2c51cf7c8e92a1c208e.

This change is causing build failures on many Windows build bots:

https://lab.llvm.org/buildbot/#/builders/216/builds/22833
https://lab.llvm.org/buildbot/#/builders/123/builds/19602
https://lab.llvm.org/buildbot/#/builders/172/builds/28315
https://lab.llvm.org/buildbot/#/builders/119/builds/13870
https://lab.llvm.org/buildbot/#/builders/233/builds/794
https://lab.llvm.org/buildbot/#/builders/235/builds/387
https://lab.llvm.org/buildbot/#/builders/13/builds/36921
https://lab.llvm.org/buildbot/#/builders/127/builds/50510

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/ProfileData/SampleProfReader.h
    llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
    llvm/lib/ProfileData/SampleProf.cpp
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/lib/Transforms/IPO/SampleContextTracker.cpp
    llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
    llvm/test/tools/llvm-profdata/sample-nametable.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/unittests/tools/llvm-profdata/CMakeLists.txt
    llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp

Removed: 
    llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 26fd38660a135..f92b993bded59 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -318,14 +318,6 @@ struct LineLocationHash {
 
 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
 
-static inline hash_code hashFuncName(StringRef F) {
-  // If function name is already MD5 string, do not hash again.
-  uint64_t Hash;
-  if (F.getAsInteger(10, Hash))
-    Hash = MD5Hash(F);
-  return Hash;
-}
-
 /// Representation of a single sample record.
 ///
 /// A sample record is represented by a positive integer value, which
@@ -638,13 +630,9 @@ class SampleContext {
     return getContextString(FullContext, false);
   }
 
-  hash_code getHashCode() const {
-    if (hasContext())
-      return hash_value(getContextFrames());
-
-    // For non-context function name, use its MD5 as hash value, so that it is
-    // consistent with the profile map's key.
-    return hashFuncName(getName());
+  uint64_t getHashCode() const {
+    return hasContext() ? hash_value(getContextFrames())
+                        : hash_value(getName());
   }
 
   /// Set the name of the function and clear the current context.
@@ -722,12 +710,9 @@ class SampleContext {
   uint32_t Attributes;
 };
 
-static inline hash_code hash_value(const SampleContext &Context) {
-  return Context.getHashCode();
-}
-
-inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) {
-  return OS << Context.toString();
+static inline hash_code hash_value(const SampleContext &arg) {
+  return arg.hasContext() ? hash_value(arg.getContextFrames())
+                          : hash_value(arg.getName());
 }
 
 class FunctionSamples;
@@ -1217,9 +1202,6 @@ class FunctionSamples {
     return !(*this == Other);
   }
 
-  template <typename T>
-  const T &getKey() const;
-
 private:
   /// CFG hash value for the function.
   uint64_t FunctionHash = 0;
@@ -1283,173 +1265,12 @@ class FunctionSamples {
   const LocToLocMap *IRToProfileLocationMap = nullptr;
 };
 
-template <>
-inline const SampleContext &FunctionSamples::getKey<SampleContext>() const {
-  return getContext();
-}
-
 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
 
-/// This class is a wrapper to associative container MapT<KeyT, ValueT> using
-/// the hash value of the original key as the new key. This greatly improves the
-/// performance of insert and query operations especially when hash values of
-/// keys are available a priori, and reduces memory usage if KeyT has a large
-/// size.
-/// When performing any action, if an existing entry with a given key is found,
-/// and the interface "KeyT ValueT::getKey<KeyT>() const" to retrieve a value's
-/// original key exists, this class checks if the given key actually matches
-/// the existing entry's original key. If they do not match, this class behaves
-/// as if the entry did not exist (for insertion, this means the new value will
-/// replace the existing entry's value, as if it is newly inserted). If
-/// ValueT::getKey<KeyT>() is not available, all keys with the same hash value
-/// are considered equivalent (i.e. hash collision is silently ignored). Given
-/// such feature this class should only be used where it does not affect
-/// compilation correctness, for example, when loading a sample profile.
-/// Assuming the hashing algorithm is uniform, the probability of hash collision
-/// with 1,000,000 entries is
-/// (2^64)!/((2^64-1000000)!*(2^64)^1000000) ~= 3*10^-8.
-template <template <typename, typename, typename...> typename MapT,
-          typename KeyT, typename ValueT, typename... MapTArgs>
-class HashKeyMap : public MapT<hash_code, ValueT, MapTArgs...> {
-public:
-  using base_type = MapT<hash_code, ValueT, MapTArgs...>;
-  using key_type = hash_code;
-  using original_key_type = KeyT;
-  using mapped_type = ValueT;
-  using value_type = typename base_type::value_type;
+using SampleProfileMap =
+    std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
 
-  using iterator = typename base_type::iterator;
-  using const_iterator = typename base_type::const_iterator;
-
-private:
-  // If the value type has getKey(), retrieve its original key for comparison.
-  template <typename U = mapped_type,
-            typename = decltype(U().template getKey<original_key_type>())>
-  static bool
-  CheckKeyMatch(const original_key_type &Key, const mapped_type &ExistingValue,
-                original_key_type *ExistingKeyIfDifferent = nullptr) {
-    const original_key_type &ExistingKey =
-        ExistingValue.template getKey<original_key_type>();
-    bool Result = (Key == ExistingKey);
-    if (!Result && ExistingKeyIfDifferent)
-      *ExistingKeyIfDifferent = ExistingKey;
-    return Result;
-  }
-
-  // If getKey() does not exist, this overload is selected, which assumes all
-  // keys with the same hash are equivalent.
-  static bool CheckKeyMatch(...) { return true; }
-
-public:
-  template <typename... Ts>
-  std::pair<iterator, bool> try_emplace(const key_type &Hash,
-                                        const original_key_type &Key,
-                                        Ts &&...Args) {
-    assert(Hash == hash_value(Key));
-    auto Ret = base_type::try_emplace(Hash, std::forward<Ts>(Args)...);
-    if (!Ret.second) {
-      original_key_type ExistingKey;
-      if (LLVM_UNLIKELY(!CheckKeyMatch(Key, Ret.first->second, &ExistingKey))) {
-        dbgs() << "MD5 collision detected: " << Key << " and " << ExistingKey
-               << " has same hash value " << Hash << "\n";
-        Ret.second = true;
-        Ret.first->second = mapped_type(std::forward<Ts>(Args)...);
-      }
-    }
-    return Ret;
-  }
-
-  template <typename... Ts>
-  std::pair<iterator, bool> try_emplace(const original_key_type &Key,
-                                        Ts &&...Args) {
-    key_type Hash = hash_value(Key);
-    return try_emplace(Hash, Key, std::forward<Ts>(Args)...);
-  }
-
-  template <typename... Ts> std::pair<iterator, bool> emplace(Ts &&...Args) {
-    return try_emplace(std::forward<Ts>(Args)...);
-  }
-
-  mapped_type &operator[](const original_key_type &Key) {
-    return try_emplace(Key, mapped_type()).first->second;
-  }
-
-  iterator find(const original_key_type &Key) {
-    key_type Hash = hash_value(Key);
-    auto It = base_type::find(Hash);
-    if (It != base_type::end())
-      if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
-        return It;
-    return base_type::end();
-  }
-
-  const_iterator find(const original_key_type &Key) const {
-    key_type Hash = hash_value(Key);
-    auto It = base_type::find(Hash);
-    if (It != base_type::end())
-      if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
-        return It;
-    return base_type::end();
-  }
-
-  size_t erase(const original_key_type &Ctx) {
-    auto It = find(Ctx);
-    if (It != base_type::end()) {
-      base_type::erase(It);
-      return 1;
-    }
-    return 0;
-  }
-};
-
-/// This class provides operator overloads to the map container using MD5 as the
-/// key type, so that existing code can still work in most cases using
-/// SampleContext as key.
-/// Note: when populating container, make sure to assign the SampleContext to
-/// the mapped value immediately because the key no longer holds it.
-class SampleProfileMap
-    : public HashKeyMap<DenseMap, SampleContext, FunctionSamples> {
-public:
-  // Convenience method because this is being used in many places. Set the
-  // FunctionSamples' context if its newly inserted.
-  mapped_type &Create(const SampleContext &Ctx) {
-    auto Ret = try_emplace(Ctx, FunctionSamples());
-    if (Ret.second)
-      Ret.first->second.setContext(Ctx);
-    return Ret.first->second;
-  }
-
-  iterator find(const SampleContext &Ctx) {
-    return HashKeyMap<DenseMap, SampleContext, FunctionSamples>::find(Ctx);
-  }
-
-  const_iterator find(const SampleContext &Ctx) const {
-    return HashKeyMap<DenseMap, SampleContext, FunctionSamples>::find(Ctx);
-  }
-
-  // Overloaded find() to lookup a function by name. This is called by IPO
-  // passes with an actual function name, and it is possible that the profile
-  // reader converted function names in the profile to MD5 strings, so we need
-  // to check if either representation matches.
-  iterator find(StringRef Fname) {
-    hash_code Hash = hashFuncName(Fname);
-    auto It = base_type::find(Hash);
-    if (It != end()) {
-      StringRef CtxName = It->second.getContext().getName();
-      if (LLVM_LIKELY(CtxName == Fname || CtxName == std::to_string(Hash)))
-        return It;
-    }
-    return end();
-  }
-
-  size_t erase(const SampleContext &Ctx) {
-    return HashKeyMap<DenseMap, SampleContext, FunctionSamples>::erase(Ctx);
-  }
-
-  size_t erase(const key_type &Key) { return base_type::erase(Key); }
-};
-
-using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
+using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
 
 void sortFuncProfiles(const SampleProfileMap &ProfileMap,
                       std::vector<NameFunctionSamples> &SortedProfiles);
@@ -1495,6 +1316,8 @@ class SampleContextTrimmer {
                                        bool MergeColdContext,
                                        uint32_t ColdContextFrameLength,
                                        bool TrimBaseProfileOnly);
+  // Canonicalize context profile name and attributes.
+  void canonicalizeContextProfiles();
 
 private:
   SampleProfileMap &ProfileMap;
@@ -1540,12 +1363,12 @@ class ProfileConverter {
                              SampleProfileMap &OutputProfiles,
                              bool ProfileIsCS = false) {
     if (ProfileIsCS) {
-      for (const auto &I : InputProfiles) {
-        // Retain the profile name and clear the full context for each function
-        // profile.
-        FunctionSamples &FS = OutputProfiles.Create(I.second.getName());
-        FS.merge(I.second);
-      }
+      for (const auto &I : InputProfiles)
+        OutputProfiles[I.second.getName()].merge(I.second);
+      // Retain the profile name and clear the full context for each function
+      // profile.
+      for (auto &I : OutputProfiles)
+        I.second.setContext(SampleContext(I.first));
     } else {
       for (const auto &I : InputProfiles)
         flattenNestedProfile(OutputProfiles, I.second);

diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index d32516252eddb..e14b0bfc7912a 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -347,7 +347,7 @@ class SampleProfileReader {
 public:
   SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
                       SampleProfileFormat Format = SPF_None)
-      : Profiles(), Ctx(C), Buffer(std::move(B)), Format(Format) {}
+      : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
 
   virtual ~SampleProfileReader() = default;
 
@@ -383,8 +383,8 @@ class SampleProfileReader {
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
 
-  /// Print the profile for \p FunctionSamples on stream \p OS.
-  void dumpFunctionProfile(const FunctionSamples &FS, raw_ostream &OS = dbgs());
+  /// Print the profile for \p FContext on stream \p OS.
+  void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
 
   /// Collect functions with definitions in Module M. For reader which
   /// support loading function profiles on demand, return true when the
@@ -410,15 +410,23 @@ class SampleProfileReader {
   /// Return the samples collected for function \p F, create empty
   /// FunctionSamples if it doesn't exist.
   FunctionSamples *getOrCreateSamplesFor(const Function &F) {
+    std::string FGUID;
     StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
-    auto it = Profiles.find(CanonName);
-    if (it != Profiles.end())
-      return &it->second;
+    CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
+    auto It = Profiles.find(CanonName);
+    if (It != Profiles.end())
+      return &It->second;
+    if (!FGUID.empty()) {
+      assert(useMD5() && "New name should only be generated for md5 profile");
+      CanonName = *MD5NameBuffer.insert(FGUID).first;
+    }
     return &Profiles[CanonName];
   }
 
   /// Return the samples collected for function \p F.
-  FunctionSamples *getSamplesFor(StringRef Fname) {
+  virtual FunctionSamples *getSamplesFor(StringRef Fname) {
+    std::string FGUID;
+    Fname = getRepInFormat(Fname, useMD5(), FGUID);
     auto It = Profiles.find(Fname);
     if (It != Profiles.end())
       return &It->second;
@@ -646,16 +654,15 @@ class SampleProfileReaderBinary : public SampleProfileReader {
   /// Read the whole name table.
   std::error_code readNameTable();
 
-  /// Read a string indirectly via the name table. Optionally return the index.
-  ErrorOr<StringRef> readStringFromTable(size_t *RetIdx = nullptr);
+  /// Read a string indirectly via the name table.
+  ErrorOr<StringRef> readStringFromTable();
 
-  /// Read a context indirectly via the CSNameTable. Optionally return the
-  /// index.
-  ErrorOr<SampleContextFrames> readContextFromTable(size_t *RetIdx = nullptr);
+  /// Read a context indirectly via the CSNameTable.
+  ErrorOr<SampleContextFrames> readContextFromTable();
 
   /// Read a context indirectly via the CSNameTable if the profile has context,
-  /// otherwise same as readStringFromTable, also return its hash value.
-  ErrorOr<std::pair<SampleContext, hash_code>> readSampleContextFromTable();
+  /// otherwise same as readStringFromTable.
+  ErrorOr<SampleContext> readSampleContextFromTable();
 
   /// Points to the current location in the buffer.
   const uint8_t *Data = nullptr;
@@ -675,24 +682,13 @@ class SampleProfileReaderBinary : public SampleProfileReader {
   /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
   std::vector<std::string> MD5StringBuf;
 
-  /// The starting address of fixed length MD5 name table section.
+  /// The starting address of NameTable containing fixed length MD5.
   const uint8_t *MD5NameMemStart = nullptr;
 
   /// CSNameTable is used to save full context vectors. It is the backing buffer
   /// for SampleContextFrames.
   std::vector<SampleContextFrameVector> CSNameTable;
 
-  /// Table to cache MD5 values of sample contexts corresponding to
-  /// readSampleContextFromTable(), used to index into Profiles or
-  /// FuncOffsetTable.
-  std::vector<hash_code> MD5SampleContextTable;
-
-  /// The starting address of the table of MD5 values of sample contexts. For
-  /// fixed length MD5 non-CS profile it is same as MD5NameMemStart because
-  /// hashes of non-CS contexts are already in the profile. Otherwise it points
-  /// to the start of MD5SampleContextTable.
-  const hash_code *MD5SampleContextStart = nullptr;
-
 private:
   std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
   virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
@@ -766,10 +762,10 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
 
   std::unique_ptr<ProfileSymbolList> ProfSymList;
 
-  /// The table mapping from a function context's MD5 to the offset of its
+  /// The table mapping from function context to the offset of its
   /// FunctionSample towards file start.
   /// At most one of FuncOffsetTable and FuncOffsetList is populated.
-  DenseMap<hash_code, uint64_t> FuncOffsetTable;
+  DenseMap<SampleContext, uint64_t> FuncOffsetTable;
 
   /// The list version of FuncOffsetTable. This is used if every entry is
   /// being accessed.

diff  --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index b43230606312b..8e07478fb083b 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -204,8 +204,9 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
   // profiles before computing profile summary.
   if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
                                 !UseContextLessSummary.getNumOccurrences())) {
-    ProfileConverter::flattenProfile(Profiles, ContextLessProfiles,
-                                     /*ProfileIsCS=*/true);
+    for (const auto &I : Profiles) {
+      ContextLessProfiles[I.second.getName()].merge(I.second);
+    }
     ProfilesToUse = &ContextLessProfiles;
   }
 

diff  --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 3e22301bee43d..fdae8a011e712 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -202,12 +202,13 @@ void sampleprof::sortFuncProfiles(
     const SampleProfileMap &ProfileMap,
     std::vector<NameFunctionSamples> &SortedProfiles) {
   for (const auto &I : ProfileMap) {
-    SortedProfiles.emplace_back(I.first, &I.second);
+    assert(I.first == I.second.getContext() && "Inconsistent profile map");
+    SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
   }
   llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
                                        const NameFunctionSamples &B) {
     if (A.second->getTotalSamples() == B.second->getTotalSamples())
-      return A.second->getContext() < B.second->getContext();
+      return A.first < B.first;
     return A.second->getTotalSamples() > B.second->getTotalSamples();
   });
 }
@@ -356,13 +357,13 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
 
   // Filter the cold profiles from ProfileMap and move them into a tmp
   // container
-  std::vector<std::pair<hash_code, const FunctionSamples *>> ColdProfiles;
+  std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
   for (const auto &I : ProfileMap) {
-    const SampleContext &Context = I.second.getContext();
+    const SampleContext &Context = I.first;
     const FunctionSamples &FunctionProfile = I.second;
     if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
         (!TrimBaseProfileOnly || Context.isBaseContext()))
-      ColdProfiles.emplace_back(I.first, &I.second);
+      ColdProfiles.emplace_back(Context, &I.second);
   }
 
   // Remove the cold profile from ProfileMap and merge them into
@@ -373,8 +374,8 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
       auto MergedContext = I.second->getContext().getContextFrames();
       if (ColdContextFrameLength < MergedContext.size())
         MergedContext = MergedContext.take_back(ColdContextFrameLength);
-      // Need to set MergedProfile's context here otherwise it will be lost.
-      FunctionSamples &MergedProfile = MergedProfileMap.Create(MergedContext);
+      auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
+      FunctionSamples &MergedProfile = Ret.first->second;
       MergedProfile.merge(*I.second);
     }
     ProfileMap.erase(I.first);
@@ -384,17 +385,57 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
   for (const auto &I : MergedProfileMap) {
     // Filter the cold merged profile
     if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
-        ProfileMap.find(I.second.getContext()) == ProfileMap.end())
+        ProfileMap.find(I.first) == ProfileMap.end())
       continue;
     // Merge the profile if the original profile exists, otherwise just insert
-    // as a new profile. If inserted as a new profile from MergedProfileMap, it
-    // already has the right context.
-    auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());
+    // as a new profile
+    auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
+    if (Ret.second) {
+      SampleContext FContext(Ret.first->first, RawContext);
+      FunctionSamples &FProfile = Ret.first->second;
+      FProfile.setContext(FContext);
+    }
     FunctionSamples &OrigProfile = Ret.first->second;
     OrigProfile.merge(I.second);
   }
 }
 
+void SampleContextTrimmer::canonicalizeContextProfiles() {
+  std::vector<SampleContext> ProfilesToBeRemoved;
+  SampleProfileMap ProfilesToBeAdded;
+  for (auto &I : ProfileMap) {
+    FunctionSamples &FProfile = I.second;
+    SampleContext &Context = FProfile.getContext();
+    if (I.first == Context)
+      continue;
+
+    // Use the context string from FunctionSamples to update the keys of
+    // ProfileMap. They can get out of sync after context profile promotion
+    // through pre-inliner.
+    // Duplicate the function profile for later insertion to avoid a conflict
+    // caused by a context both to be add and to be removed. This could happen
+    // when a context is promoted to another context which is also promoted to
+    // the third context. For example, given an original context A @ B @ C that
+    // is promoted to B @ C and the original context B @ C which is promoted to
+    // just C, adding B @ C to the profile map while removing same context (but
+    // with 
diff erent profiles) from the map can cause a conflict if they are
+    // not handled in a right order. This can be solved by just caching the
+    // profiles to be added.
+    auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
+    (void)Ret;
+    assert(Ret.second && "Context conflict during canonicalization");
+    ProfilesToBeRemoved.push_back(I.first);
+  }
+
+  for (auto &I : ProfilesToBeRemoved) {
+    ProfileMap.erase(I);
+  }
+
+  for (auto &I : ProfilesToBeAdded) {
+    ProfileMap.emplace(I.first, I.second);
+  }
+}
+
 std::error_code ProfileSymbolList::write(raw_ostream &OS) {
   // Sort the symbols before output. If doing compression.
   // It will make the compression much more effective.
@@ -468,7 +509,6 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
     if (!ChildProfile)
       continue;
     SampleContext OrigChildContext = ChildProfile->getContext();
-    hash_code OrigChildContextHash = OrigChildContext.getHashCode();
     // Reset the child context to be contextless.
     ChildProfile->getContext().setName(OrigChildContext.getName());
     if (NodeProfile) {
@@ -484,7 +524,6 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
       NodeProfile->removeTotalSamples(Count);
     }
 
-    hash_code NewChildProfileHash(0);
     // Separate child profile to be a standalone profile, if the current parent
     // profile doesn't exist. This is a duplicating operation when the child
     // profile is already incorporated into the parent which is still useful and
@@ -493,20 +532,15 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
     // profile in the prelink phase for to-be-fully-inlined functions.
     if (!NodeProfile) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
-      NewChildProfileHash = ChildProfile->getContext().getHashCode();
     } else if (GenerateMergedBaseProfiles) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
-      NewChildProfileHash = ChildProfile->getContext().getHashCode();
       auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
       SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
           ContextDuplicatedIntoBase);
     }
 
-    // Remove the original child profile. Check if MD5 of new child profile
-    // collides with old profile, in this case the [] operator already
-    // overwritten it without the need of erase.
-    if (NewChildProfileHash != OrigChildContextHash)
-      ProfileMap.erase(OrigChildContextHash);
+    // Remove the original child profile.
+    ProfileMap.erase(OrigChildContext);
   }
 }
 

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index d71fe11b007d4..fbdd9a307321b 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -61,9 +61,9 @@ static cl::opt<bool> ProfileIsFSDisciminator(
 ///
 /// \param FContext Name + context of the function to print.
 /// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
+void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
                                               raw_ostream &OS) {
-  OS << "Function: " << FS.getContext().toString() << ": " << FS;
+  OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
 }
 
 /// Dump all the function profiles found on stream \p OS.
@@ -71,7 +71,7 @@ void SampleProfileReader::dump(raw_ostream &OS) {
   std::vector<NameFunctionSamples> V;
   sortFuncProfiles(Profiles, V);
   for (const auto &I : V)
-    dumpFunctionProfile(*I.second, OS);
+    dumpFunctionProfile(I.first, OS);
 }
 
 static void dumpFunctionProfileJson(const FunctionSamples &S,
@@ -355,7 +355,9 @@ std::error_code SampleProfileReaderText::readImpl() {
       SampleContext FContext(FName, CSNameTable);
       if (FContext.hasContext())
         ++CSProfileCount;
-      FunctionSamples &FProfile = Profiles.Create(FContext);
+      Profiles[FContext] = FunctionSamples();
+      FunctionSamples &FProfile = Profiles[FContext];
+      FProfile.setContext(FContext);
       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
       InlineStack.clear();
@@ -523,8 +525,7 @@ inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
   return *Idx;
 }
 
-ErrorOr<StringRef>
-SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
   auto Idx = readStringIndex(NameTable);
   if (std::error_code EC = Idx.getError())
     return EC;
@@ -542,48 +543,30 @@ SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
        MD5NameMemStart + (*Idx) * sizeof(uint64_t));
     SR = MD5StringBuf.emplace_back(std::to_string(FID));
   }
-  if (RetIdx)
-    *RetIdx = *Idx;
   return SR;
 }
 
-ErrorOr<SampleContextFrames>
-SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
+ErrorOr<SampleContextFrames> SampleProfileReaderBinary::readContextFromTable() {
   auto ContextIdx = readNumber<size_t>();
   if (std::error_code EC = ContextIdx.getError())
     return EC;
   if (*ContextIdx >= CSNameTable.size())
     return sampleprof_error::truncated_name_table;
-  if (RetIdx)
-    *RetIdx = *ContextIdx;
   return CSNameTable[*ContextIdx];
 }
 
-ErrorOr<std::pair<SampleContext, hash_code>>
-SampleProfileReaderBinary::readSampleContextFromTable() {
-  SampleContext Context;
-  size_t Idx;
+ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
   if (ProfileIsCS) {
-    auto FContext(readContextFromTable(&Idx));
+    auto FContext(readContextFromTable());
     if (std::error_code EC = FContext.getError())
       return EC;
-    Context = SampleContext(*FContext);
+    return SampleContext(*FContext);
   } else {
-    auto FName(readStringFromTable(&Idx));
+    auto FName(readStringFromTable());
     if (std::error_code EC = FName.getError())
       return EC;
-    Context = SampleContext(*FName);
-  }
-  hash_code Hash = MD5SampleContextStart[Idx];
-  // Lazy computing of hash value, write back to the table to cache it. Only
-  // compute the context's hash value if it is being referenced for the first
-  // time.
-  if (Hash == hash_code(0)) {
-    assert(MD5SampleContextStart == MD5SampleContextTable.data());
-    Hash = Context.getHashCode();
-    MD5SampleContextTable[Idx] = Hash;
+    return SampleContext(*FName);
   }
-  return std::make_pair(Context, Hash);
 }
 
 std::error_code
@@ -676,18 +659,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   if (std::error_code EC = NumHeadSamples.getError())
     return EC;
 
-  auto FContextHash(readSampleContextFromTable());
-  if (std::error_code EC = FContextHash.getError())
+  ErrorOr<SampleContext> FContext(readSampleContextFromTable());
+  if (std::error_code EC = FContext.getError())
     return EC;
 
-  auto &[FContext, Hash] = *FContextHash;
-  // Use the cached hash value for insertion instead of recalculating it.
-  auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
-  FunctionSamples &FProfile = Res.first->second;
-  FProfile.setContext(FContext);
+  Profiles[*FContext] = FunctionSamples();
+  FunctionSamples &FProfile = Profiles[*FContext];
+  FProfile.setContext(*FContext);
   FProfile.addHeadSamples(*NumHeadSamples);
 
-  if (FContext.hasContext())
+  if (FContext->hasContext())
     CSProfileCount++;
 
   if (std::error_code EC = readProfile(FProfile))
@@ -835,21 +816,18 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
     FuncOffsetTable.reserve(*Size);
 
   for (uint64_t I = 0; I < *Size; ++I) {
-    auto FContextHash(readSampleContextFromTable());
-    if (std::error_code EC = FContextHash.getError())
+    auto FContext(readSampleContextFromTable());
+    if (std::error_code EC = FContext.getError())
       return EC;
 
-    auto &[FContext, Hash] = *FContextHash;
     auto Offset = readNumber<uint64_t>();
     if (std::error_code EC = Offset.getError())
       return EC;
 
     if (UseFuncOffsetList)
-      FuncOffsetList.emplace_back(FContext, *Offset);
+      FuncOffsetList.emplace_back(*FContext, *Offset);
     else
-      // Because Porfiles replace existing value with new value if collision
-      // happens, we also use the latest offset so that they are consistent.
-      FuncOffsetTable[Hash] = *Offset;
+      FuncOffsetTable[*FContext] = *Offset;
  }
 
  return sampleprof_error::success;
@@ -922,8 +900,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
     } else if (useMD5()) {
       assert(!useFuncOffsetList());
       for (auto Name : FuncsToUse) {
-        auto GUID = MD5Hash(Name);
-        auto iter = FuncOffsetTable.find(GUID);
+        auto GUID = std::to_string(MD5Hash(Name));
+        auto iter = FuncOffsetTable.find(StringRef(GUID));
         if (iter == FuncOffsetTable.end())
           continue;
         const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -944,7 +922,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
     } else {
       assert(!useFuncOffsetList());
       for (auto Name : FuncsToUse) {
-        auto iter = FuncOffsetTable.find(MD5Hash(Name));
+        auto iter = FuncOffsetTable.find(Name);
         if (iter == FuncOffsetTable.end())
           continue;
         const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -1072,30 +1050,17 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
 
   NameTable.clear();
   NameTable.reserve(*Size);
-  if (!ProfileIsCS) {
-    MD5SampleContextTable.clear();
-    if (UseMD5)
-      MD5SampleContextTable.reserve(*Size);
-    else
-      // If we are using strings, delay MD5 computation since only a portion of
-      // names are used by top level functions. Use 0 to indicate MD5 value is
-      // to be calculated as no known string has a MD5 value of 0.
-      MD5SampleContextTable.resize(*Size);
-  }
   for (size_t I = 0; I < *Size; ++I) {
     auto Name(readString());
     if (std::error_code EC = Name.getError())
       return EC;
     if (UseMD5) {
       uint64_t FID = MD5Hash(*Name);
-      if (!ProfileIsCS)
-        MD5SampleContextTable.emplace_back(FID);
       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
     } else
       NameTable.push_back(*Name);
   }
-  if (!ProfileIsCS)
-    MD5SampleContextStart = MD5SampleContextTable.data();
+
   return sampleprof_error::success;
 }
 
@@ -1123,8 +1088,6 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
     NameTable.clear();
     NameTable.resize(*Size);
     MD5NameMemStart = Data;
-    if (!ProfileIsCS)
-      MD5SampleContextStart = reinterpret_cast<const hash_code *>(Data);
     Data = Data + (*Size) * sizeof(uint64_t);
     return sampleprof_error::success;
   }
@@ -1138,20 +1101,12 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
     MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
     NameTable.clear();
     NameTable.reserve(*Size);
-    if (!ProfileIsCS) {
-      MD5SampleContextTable.clear();
-      MD5SampleContextTable.reserve(*Size);
-    }
     for (size_t I = 0; I < *Size; ++I) {
       auto FID = readNumber<uint64_t>();
       if (std::error_code EC = FID.getError())
         return EC;
-      if (!ProfileIsCS)
-        MD5SampleContextTable.emplace_back(*FID);
       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
     }
-    if (!ProfileIsCS)
-      MD5SampleContextStart = MD5SampleContextTable.data();
     return sampleprof_error::success;
   }
 
@@ -1169,14 +1124,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
 
   CSNameTable.clear();
   CSNameTable.reserve(*Size);
-  if (ProfileIsCS) {
-    // Delay MD5 computation of CS context until they are needed. Use 0 to
-    // indicate MD5 value is to be calculated as no known string has a MD5
-    // value of 0.
-    MD5SampleContextTable.clear();
-    MD5SampleContextTable.resize(*Size);
-    MD5SampleContextStart = MD5SampleContextTable.data();
-  }
   for (size_t I = 0; I < *Size; ++I) {
     CSNameTable.emplace_back(SampleContextFrameVector());
     auto ContextSize = readNumber<uint32_t>();
@@ -1240,17 +1187,16 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
         if (std::error_code EC = Discriminator.getError())
           return EC;
 
-        auto FContextHash(readSampleContextFromTable());
-        if (std::error_code EC = FContextHash.getError())
+        auto FContext(readSampleContextFromTable());
+        if (std::error_code EC = FContext.getError())
           return EC;
 
-        auto &[FContext, Hash] = *FContextHash;
         FunctionSamples *CalleeProfile = nullptr;
         if (FProfile) {
           CalleeProfile = const_cast<FunctionSamples *>(
               &FProfile->functionSamplesAt(LineLocation(
                   *LineOffset,
-                  *Discriminator))[std::string(FContext.getName())]);
+                  *Discriminator))[std::string(FContext.get().getName())]);
         }
         if (std::error_code EC =
                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
@@ -1265,12 +1211,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
-    auto FContextHash(readSampleContextFromTable());
-    if (std::error_code EC = FContextHash.getError())
+    auto FContext(readSampleContextFromTable());
+    if (std::error_code EC = FContext.getError())
       return EC;
-    auto &[FContext, Hash] = *FContextHash;
     FunctionSamples *FProfile = nullptr;
-    auto It = Profiles.find(FContext);
+    auto It = Profiles.find(*FContext);
     if (It != Profiles.end())
       FProfile = &It->second;
 

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index b3490c6bd0f99..0873093ad426c 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -364,6 +364,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
 std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
     const SampleProfileMap &ProfileMap) {
   for (const auto &I : ProfileMap) {
+    assert(I.first == I.second.getContext() && "Inconsistent profile map");
     addContext(I.second.getContext());
     addNames(I.second);
   }
@@ -725,7 +726,8 @@ SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
 
   // Generate the name table for all the functions referenced in the profile.
   for (const auto &I : ProfileMap) {
-    addContext(I.second.getContext());
+    assert(I.first == I.second.getContext() && "Inconsistent profile map");
+    addContext(I.first);
     addNames(I.second);
   }
 

diff  --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 79bee8362ef8a..3ddf5fe20edb2 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -201,7 +201,7 @@ SampleContextTracker::SampleContextTracker(
     : GUIDToFuncNameMap(GUIDToFuncNameMap) {
   for (auto &FuncSample : Profiles) {
     FunctionSamples *FSamples = &FuncSample.second;
-    SampleContext Context = FuncSample.second.getContext();
+    SampleContext Context = FuncSample.first;
     LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
                       << "\n");
     ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
@@ -638,7 +638,7 @@ void SampleContextTracker::createContextLessProfileMap(
     FunctionSamples *FProfile = Node->getFunctionSamples();
     // Profile's context can be empty, use ContextNode's func name.
     if (FProfile)
-      ContextLessProfiles.Create(Node->getFuncName()).merge(*FProfile);
+      ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
   }
 }
 } // namespace llvm

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata b/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
index 76f19bbc7e513..699972870dae4 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata and b/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/sample-nametable.test b/llvm/test/tools/llvm-profdata/sample-nametable.test
index d65b9737a77e3..1d59b9fa43725 100644
--- a/llvm/test/tools/llvm-profdata/sample-nametable.test
+++ b/llvm/test/tools/llvm-profdata/sample-nametable.test
@@ -8,5 +8,5 @@ RUN: not llvm-profdata show --sample %p/Inputs/sample-nametable-empty-string.pro
 
 3- The data of the name table is placed after the data of the profiles. The reader should handle it correctly.
 RUN: llvm-profdata merge --sample --text %p/Inputs/sample-nametable-after-samples.profdata | FileCheck %s
-CHECK: 18446744073709551613:2:9
+CHECK: 18446744073709551615:2:9
 

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index a11fc5753497f..da10ddcc58c64 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -594,7 +594,7 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
 
   auto checkSampleProfileHasFUnique = [&Reader]() {
     for (const auto &PD : Reader->getProfiles()) {
-      auto &FContext = PD.second.getContext();
+      auto &FContext = PD.first;
       if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
           std::string::npos) {
         return true;
@@ -2833,8 +2833,7 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
           "be printed");
 
     // TODO: parse context string to support filtering by contexts.
-    FunctionSamples *FS = Reader->getSamplesFor(StringRef(ShowFunction));
-    Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
+    Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
   }
 
   if (ShowProfileSymbolList) {

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index f043dbcd63a9a..6632b003a236d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,7 +449,7 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
   for (const auto &FS : ProfileMap) {
-    if (auto *Func = Binary->getBinaryFunction(FS.second.getName()))
+    if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
       ProfiledFunctions.insert(Func);
   }
   return true;
@@ -468,7 +468,12 @@ bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
 FunctionSamples &
 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
   SampleContext Context(FuncName);
-  return ProfileMap.Create(Context);
+  auto Ret = ProfileMap.emplace(Context, FunctionSamples());
+  if (Ret.second) {
+    FunctionSamples &FProfile = Ret.first->second;
+    FProfile.setContext(Context);
+  }
+  return Ret.first->second;
 }
 
 void ProfileGenerator::generateProfile() {
@@ -500,14 +505,14 @@ void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
     return;
 
   // Move cold profiles into a tmp container.
-  std::vector<hash_code> ColdProfileHashes;
+  std::vector<SampleContext> ColdProfiles;
   for (const auto &I : ProfileMap) {
     if (I.second.getTotalSamples() < ColdCntThreshold)
-      ColdProfileHashes.emplace_back(I.first);
+      ColdProfiles.emplace_back(I.first);
   }
 
   // Remove the cold profile from ProfileMap.
-  for (const auto &I : ColdProfileHashes)
+  for (const auto &I : ColdProfiles)
     ProfileMap.erase(I);
 }
 
@@ -1013,7 +1018,9 @@ void CSProfileGenerator::postProcessProfiles() {
 
   // Merge function samples of CS profile to calculate profile density.
   sampleprof::SampleProfileMap ContextLessProfiles;
-  ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
+  for (const auto &I : ProfileMap) {
+    ContextLessProfiles[I.second.getName()].merge(I.second);
+  }
 
   calculateAndShowDensity(ContextLessProfiles);
   if (GenCSNestedProfile) {

diff  --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
index c53baf69bbc0e..ad91ce36bcb5b 100644
--- a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
+++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
@@ -6,7 +6,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(LLVMProfdataTests
     OutputSizeLimitTest.cpp
-    MD5CollisionTest.cpp
   )
 
 target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport)

diff  --git a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp b/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
deleted file mode 100644
index d7fe15a33702f..0000000000000
--- a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===- llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-/// Test whether the MD5-key SampleProfileMap can handle collision correctly.
-/// Probability of collision is rare but not negligible since we only use the
-/// lower 64 bits of the MD5 value. A unit test is required because the function
-/// names are not printable ASCII characters.
-
-#include "llvm/ProfileData/SampleProfReader.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Testing/Support/Error.h"
-#include "gtest/gtest.h"
-
-/// According to https://en.wikipedia.org/wiki/MD5#Preimage_vulnerability, the
-/// MD5 of the two strings are 79054025255fb1a26e4bc422aef54eb4.
-
-// First 8 bytes of the MD5.
-const llvm::hash_code ExpectedHash = 0xa2b15f2525400579;
-
-// clang-format off
-const uint8_t ProfileData[] = {
-    0x84, 0xe4, 0xd0, 0xb1, 0xf4, 0xc9, 0x94, 0xa8,
-    0x53, 0x67, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00,
-
-    /// Name Table
-    0x02,
-    /// String1
-    0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
-    0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
-    0x2f, 0xca, 0xb5, 0x87, 0x12, 0x46, 0x7e, 0xab,
-    0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
-    0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
-    0x83, 0xe4, 0x88, 0x83, 0x25, 0x71, 0x41, 0x5a,
-    0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
-    0xd9, 0x1d, 0xbd, 0xf2, 0x80, 0x37, 0x3c, 0x5b,
-    0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
-    0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
-    0xdd, 0x53, 0xe2, 0xb4, 0x87, 0xda, 0x03, 0xfd,
-    0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
-    0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
-    0xce, 0x54, 0xb6, 0x70, 0x80, 0xa8, 0x0d, 0x1e,
-    0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
-    0x96, 0xf9, 0x65, 0x2b, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
-    /// String2
-    0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
-    0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
-    0x2f, 0xca, 0xb5, 0x07, 0x12, 0x46, 0x7e, 0xab,
-    0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
-    0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
-    0x83, 0xe4, 0x88, 0x83, 0x25, 0xf1, 0x41, 0x5a,
-    0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
-    0xd9, 0x1d, 0xbd, 0x72, 0x80, 0x37, 0x3c, 0x5b,
-    0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
-    0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
-    0xdd, 0x53, 0xe2, 0x34, 0x87, 0xda, 0x03, 0xfd,
-    0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
-    0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
-    0xce, 0x54, 0xb6, 0x70, 0x80, 0x28, 0x0d, 0x1e,
-    0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
-    0x96, 0xf9, 0x65, 0xab, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
-
-    /// FuncOffsetTable
-    0x02, 0x00, 0x00, 0x01, 0x17, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-    /// Samples
-    /// String1:10:1
-    ///  1: 5
-    ///  2.3: 6
-    ///  4: String2:100
-    ///   1: 100
-    /// String2:7:3
-    ///  9: 0
-    0x01, 0x00, 0x0a, 0x02, 0x01, 0x00, 0x05, 0x00,
-    0x02, 0x03, 0x06, 0x00, 0x01, 0x04, 0x00, 0x01,
-    0x64, 0x01, 0x01, 0x00, 0x64, 0x00, 0x00,
-
-    0x03, 0x01, 0x07, 0x01, 0x09, 0x00, 0x00, 0x00,
-    0x00};
-// clang-format on
-
-using namespace llvm;
-using namespace llvm::sampleprof;
-
-TEST(MD5CollisionTest, TestCollision) {
-  auto InputBuffer = MemoryBuffer::getMemBuffer(
-      StringRef(reinterpret_cast<const char *>(ProfileData),
-                sizeof(ProfileData)),
-      "", false);
-  LLVMContext Context;
-  auto FileSystem = vfs::getRealFileSystem();
-  auto Result = SampleProfileReader::create(InputBuffer, Context, *FileSystem);
-  ASSERT_TRUE(Result);
-  SampleProfileReader *Reader = Result->get();
-  ASSERT_FALSE(Reader->read());
-
-  std::vector<StringRef> &NameTable = *Reader->getNameTable();
-  ASSERT_EQ(NameTable.size(), 2);
-  StringRef S1 = NameTable[0];
-  StringRef S2 = NameTable[1];
-  ASSERT_NE(S1, S2);
-  ASSERT_EQ(hash_code(MD5Hash(S1)), ExpectedHash);
-  ASSERT_EQ(hash_code(MD5Hash(S2)), ExpectedHash);
-
-  // S2's MD5 value collides with S1, S1 is expected to be dropped when S2 is
-  // inserted, as if S1 never existed.
-
-  FunctionSamples ExpectedFS;
-  ExpectedFS.setName(S2);
-  ExpectedFS.setHeadSamples(3);
-  ExpectedFS.setTotalSamples(7);
-  ExpectedFS.addBodySamples(9, 0, 0);
-
-  SampleProfileMap &Profiles = Reader->getProfiles();
-  EXPECT_EQ(Profiles.size(), 1);
-  if (Profiles.size()) {
-    auto &[Hash, FS] = *Profiles.begin();
-    EXPECT_EQ(Hash, ExpectedHash);
-    EXPECT_EQ(FS, ExpectedFS);
-  }
-
-  // Inserting S2 again should fail, returning the existing sample unchanged.
-  auto [It1, Inserted1] = Profiles.try_emplace(S2, FunctionSamples());
-  EXPECT_FALSE(Inserted1);
-  EXPECT_EQ(Profiles.size(), 1);
-  if (Profiles.size()) {
-    auto &[Hash, FS] = *It1;
-    EXPECT_EQ(Hash, ExpectedHash);
-    EXPECT_EQ(FS, ExpectedFS);
-  }
-
-  // Inserting S1 should success as if S2 never existed, and S2 is erased.
-  FunctionSamples FS1;
-  FS1.setName(S1);
-  FS1.setHeadSamples(5);
-  FS1.setTotalSamples(10);
-  FS1.addBodySamples(1, 2, 5);
-
-  auto [It2, Inserted2] = Profiles.try_emplace(S1, FS1);
-  EXPECT_TRUE(Inserted2);
-  EXPECT_EQ(Profiles.size(), 1);
-  if (Profiles.size()) {
-    auto &[Hash, FS] = *It2;
-    EXPECT_EQ(Hash, ExpectedHash);
-    EXPECT_EQ(FS, FS1);
-  }
-}

diff  --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
index d21c378e8ce3b..522a2c2dd3367 100644
--- a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
+++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
@@ -126,7 +126,7 @@ static ExpectedErrorOr<void *> RunTest(StringRef Input, size_t SizeLimit,
   // For every sample in the new profile, confirm it is in the old profile and
   // unchanged.
   for (auto Sample : NewProfiles) {
-    auto FindResult = OldProfiles.find(Sample.second.getContext());
+    auto FindResult = OldProfiles.find(Sample.first);
     EXPECT_NE(FindResult, OldProfiles.end());
     if (FindResult != OldProfiles.end()) {
       EXPECT_EQ(Sample.second.getHeadSamples(),


        


More information about the llvm-commits mailing list