[llvm] 1a53b5c - Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase FDO build speed using MD5 as key to Sample Profile map"

Aaron Ballman via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 28 06:41:48 PDT 2023


Author: Aaron Ballman
Date: 2023-07-28T09:41:38-04:00
New Revision: 1a53b5c367b5ebf7d7f34afaa653ea337982f1d6

URL: https://github.com/llvm/llvm-project/commit/1a53b5c367b5ebf7d7f34afaa653ea337982f1d6
DIFF: https://github.com/llvm/llvm-project/commit/1a53b5c367b5ebf7d7f34afaa653ea337982f1d6.diff

LOG: Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase FDO build speed using MD5 as key to Sample Profile map"

This reverts commit 66ba71d913df7f7cd75e92c0c4265932b7c93292.

Addressing issues found by:
https://lab.llvm.org/buildbot/#/builders/245/builds/11732
https://lab.llvm.org/buildbot/#/builders/187/builds/12251
https://lab.llvm.org/buildbot/#/builders/186/builds/11099
https://lab.llvm.org/buildbot/#/builders/182/builds/6976

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/ProfileData/SampleProfReader.h
    llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
    llvm/lib/ProfileData/SampleProf.cpp
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/lib/Transforms/IPO/SampleContextTracker.cpp
    llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
    llvm/test/tools/llvm-profdata/sample-nametable.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/unittests/tools/llvm-profdata/CMakeLists.txt
    llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp

Removed: 
    llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 76d6b6c599d077..12cc1f2fd002b9 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -318,14 +318,6 @@ struct LineLocationHash {
 
 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
 
-static inline hash_code hashFuncName(StringRef F) {
-  // If function name is already MD5 string, do not hash again.
-  uint64_t Hash;
-  if (F.getAsInteger(10, Hash))
-    Hash = MD5Hash(F);
-  return Hash;
-}
-
 /// Representation of a single sample record.
 ///
 /// A sample record is represented by a positive integer value, which
@@ -638,13 +630,9 @@ class SampleContext {
     return getContextString(FullContext, false);
   }
 
-  hash_code getHashCode() const {
-    if (hasContext())
-      return hash_value(getContextFrames());
-
-    // For non-context function name, use its MD5 as hash value, so that it is
-    // consistent with the profile map's key.
-    return hashFuncName(getName());
+  uint64_t getHashCode() const {
+    return hasContext() ? hash_value(getContextFrames())
+                        : hash_value(getName());
   }
 
   /// Set the name of the function and clear the current context.
@@ -722,12 +710,9 @@ class SampleContext {
   uint32_t Attributes;
 };
 
-static inline hash_code hash_value(const SampleContext &Context) {
-  return Context.getHashCode();
-}
-
-inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) {
-  return OS << Context.toString();
+static inline hash_code hash_value(const SampleContext &arg) {
+  return arg.hasContext() ? hash_value(arg.getContextFrames())
+                          : hash_value(arg.getName());
 }
 
 class FunctionSamples;
@@ -1221,9 +1206,6 @@ class FunctionSamples {
     return !(*this == Other);
   }
 
-  template <typename T>
-  const T &getKey() const;
-
 private:
   /// CFG hash value for the function.
   uint64_t FunctionHash = 0;
@@ -1287,176 +1269,12 @@ class FunctionSamples {
   const LocToLocMap *IRToProfileLocationMap = nullptr;
 };
 
-template <>
-inline const SampleContext &FunctionSamples::getKey<SampleContext>() const {
-  return getContext();
-}
-
 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
 
-/// This class is a wrapper to associative container MapT<KeyT, ValueT> using
-/// the hash value of the original key as the new key. This greatly improves the
-/// performance of insert and query operations especially when hash values of
-/// keys are available a priori, and reduces memory usage if KeyT has a large
-/// size.
-/// When performing any action, if an existing entry with a given key is found,
-/// and the interface "KeyT ValueT::getKey<KeyT>() const" to retrieve a value's
-/// original key exists, this class checks if the given key actually matches
-/// the existing entry's original key. If they do not match, this class behaves
-/// as if the entry did not exist (for insertion, this means the new value will
-/// replace the existing entry's value, as if it is newly inserted). If
-/// ValueT::getKey<KeyT>() is not available, all keys with the same hash value
-/// are considered equivalent (i.e. hash collision is silently ignored). Given
-/// such feature this class should only be used where it does not affect
-/// compilation correctness, for example, when loading a sample profile.
-/// Assuming the hashing algorithm is uniform, the probability of hash collision
-/// with 1,000,000 entries is
-/// (2^64)!/((2^64-1000000)!*(2^64)^1000000) ~= 3*10^-8.
-template <template <typename, typename, typename...> typename MapT,
-          typename KeyT, typename ValueT, typename... MapTArgs>
-class HashKeyMap : public MapT<hash_code, ValueT, MapTArgs...> {
-public:
-  using base_type = MapT<hash_code, ValueT, MapTArgs...>;
-  using key_type = hash_code;
-  using original_key_type = KeyT;
-  using mapped_type = ValueT;
-  using value_type = typename base_type::value_type;
+using SampleProfileMap =
+    std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
 
-  using iterator = typename base_type::iterator;
-  using const_iterator = typename base_type::const_iterator;
-
-private:
-  // If the value type has getKey(), retrieve its original key for comparison.
-  template <typename U = mapped_type,
-            typename = decltype(U().template getKey<original_key_type>())>
-  static bool
-  CheckKeyMatch(const original_key_type &Key, const mapped_type &ExistingValue,
-                original_key_type *ExistingKeyIfDifferent = nullptr) {
-    const original_key_type &ExistingKey =
-        ExistingValue.template getKey<original_key_type>();
-    bool Result = (Key == ExistingKey);
-    if (!Result && ExistingKeyIfDifferent)
-      *ExistingKeyIfDifferent = ExistingKey;
-    return Result;
-  }
-
-  // If getKey() does not exist, this overload is selected, which assumes all
-  // keys with the same hash are equivalent.
-  static bool CheckKeyMatch(...) { return true; }
-
-public:
-  template <typename... Ts>
-  std::pair<iterator, bool> try_emplace(const key_type &Hash,
-                                        const original_key_type &Key,
-                                        Ts &&...Args) {
-    assert(Hash == hash_value(Key));
-    auto Ret = base_type::try_emplace(Hash, std::forward<Ts>(Args)...);
-    if (!Ret.second) {
-      original_key_type ExistingKey;
-      if (LLVM_UNLIKELY(!CheckKeyMatch(Key, Ret.first->second, &ExistingKey))) {
-        dbgs() << "MD5 collision detected: " << Key << " and " << ExistingKey
-               << " has same hash value " << Hash << "\n";
-        Ret.second = true;
-        Ret.first->second = mapped_type(std::forward<Ts>(Args)...);
-      }
-    }
-    return Ret;
-  }
-
-  template <typename... Ts>
-  std::pair<iterator, bool> try_emplace(const original_key_type &Key,
-                                        Ts &&...Args) {
-    key_type Hash = hash_value(Key);
-    return try_emplace(Hash, Key, std::forward<Ts>(Args)...);
-  }
-
-  template <typename... Ts> std::pair<iterator, bool> emplace(Ts &&...Args) {
-    return try_emplace(std::forward<Ts>(Args)...);
-  }
-
-  mapped_type &operator[](const original_key_type &Key) {
-    return try_emplace(Key, mapped_type()).first->second;
-  }
-
-  iterator find(const original_key_type &Key) {
-    key_type Hash = hash_value(Key);
-    auto It = base_type::find(Hash);
-    if (It != base_type::end())
-      if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
-        return It;
-    return base_type::end();
-  }
-
-  const_iterator find(const original_key_type &Key) const {
-    key_type Hash = hash_value(Key);
-    auto It = base_type::find(Hash);
-    if (It != base_type::end())
-      if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
-        return It;
-    return base_type::end();
-  }
-
-  size_t erase(const original_key_type &Ctx) {
-    auto It = find(Ctx);
-    if (It != base_type::end()) {
-      base_type::erase(It);
-      return 1;
-    }
-    return 0;
-  }
-};
-
-/// This class provides operator overloads to the map container using MD5 as the
-/// key type, so that existing code can still work in most cases using
-/// SampleContext as key.
-/// Note: when populating container, make sure to assign the SampleContext to
-/// the mapped value immediately because the key no longer holds it.
-class SampleProfileMap
-    : public HashKeyMap<DenseMap, SampleContext, FunctionSamples> {
-public:
-  // Convenience method because this is being used in many places. Set the
-  // FunctionSamples' context if its newly inserted.
-  mapped_type &Create(const SampleContext &Ctx) {
-    auto Ret = try_emplace(Ctx, FunctionSamples());
-    if (Ret.second)
-      Ret.first->second.setContext(Ctx);
-    return Ret.first->second;
-  }
-
-  iterator find(const SampleContext &Ctx) {
-    return HashKeyMap<llvm::DenseMap, SampleContext, FunctionSamples>::find(
-        Ctx);
-  }
-
-  const_iterator find(const SampleContext &Ctx) const {
-    return HashKeyMap<llvm::DenseMap, SampleContext, FunctionSamples>::find(
-        Ctx);
-  }
-
-  // Overloaded find() to lookup a function by name. This is called by IPO
-  // passes with an actual function name, and it is possible that the profile
-  // reader converted function names in the profile to MD5 strings, so we need
-  // to check if either representation matches.
-  iterator find(StringRef Fname) {
-    hash_code Hash = hashFuncName(Fname);
-    auto It = base_type::find(Hash);
-    if (It != end()) {
-      StringRef CtxName = It->second.getContext().getName();
-      if (LLVM_LIKELY(CtxName == Fname || CtxName == std::to_string(Hash)))
-        return It;
-    }
-    return end();
-  }
-
-  size_t erase(const SampleContext &Ctx) {
-    return HashKeyMap<llvm::DenseMap, SampleContext, FunctionSamples>::erase(
-        Ctx);
-  }
-
-  size_t erase(const key_type &Key) { return base_type::erase(Key); }
-};
-
-using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
+using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
 
 void sortFuncProfiles(const SampleProfileMap &ProfileMap,
                       std::vector<NameFunctionSamples> &SortedProfiles);
@@ -1502,6 +1320,8 @@ class SampleContextTrimmer {
                                        bool MergeColdContext,
                                        uint32_t ColdContextFrameLength,
                                        bool TrimBaseProfileOnly);
+  // Canonicalize context profile name and attributes.
+  void canonicalizeContextProfiles();
 
 private:
   SampleProfileMap &ProfileMap;
@@ -1547,12 +1367,12 @@ class ProfileConverter {
                              SampleProfileMap &OutputProfiles,
                              bool ProfileIsCS = false) {
     if (ProfileIsCS) {
-      for (const auto &I : InputProfiles) {
-        // Retain the profile name and clear the full context for each function
-        // profile.
-        FunctionSamples &FS = OutputProfiles.Create(I.second.getName());
-        FS.merge(I.second);
-      }
+      for (const auto &I : InputProfiles)
+        OutputProfiles[I.second.getName()].merge(I.second);
+      // Retain the profile name and clear the full context for each function
+      // profile.
+      for (auto &I : OutputProfiles)
+        I.second.setContext(SampleContext(I.first));
     } else {
       for (const auto &I : InputProfiles)
         flattenNestedProfile(OutputProfiles, I.second);

diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index d32516252eddbc..e14b0bfc7912aa 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -347,7 +347,7 @@ class SampleProfileReader {
 public:
   SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
                       SampleProfileFormat Format = SPF_None)
-      : Profiles(), Ctx(C), Buffer(std::move(B)), Format(Format) {}
+      : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
 
   virtual ~SampleProfileReader() = default;
 
@@ -383,8 +383,8 @@ class SampleProfileReader {
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
 
-  /// Print the profile for \p FunctionSamples on stream \p OS.
-  void dumpFunctionProfile(const FunctionSamples &FS, raw_ostream &OS = dbgs());
+  /// Print the profile for \p FContext on stream \p OS.
+  void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
 
   /// Collect functions with definitions in Module M. For reader which
   /// support loading function profiles on demand, return true when the
@@ -410,15 +410,23 @@ class SampleProfileReader {
   /// Return the samples collected for function \p F, create empty
   /// FunctionSamples if it doesn't exist.
   FunctionSamples *getOrCreateSamplesFor(const Function &F) {
+    std::string FGUID;
     StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
-    auto it = Profiles.find(CanonName);
-    if (it != Profiles.end())
-      return &it->second;
+    CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
+    auto It = Profiles.find(CanonName);
+    if (It != Profiles.end())
+      return &It->second;
+    if (!FGUID.empty()) {
+      assert(useMD5() && "New name should only be generated for md5 profile");
+      CanonName = *MD5NameBuffer.insert(FGUID).first;
+    }
     return &Profiles[CanonName];
   }
 
   /// Return the samples collected for function \p F.
-  FunctionSamples *getSamplesFor(StringRef Fname) {
+  virtual FunctionSamples *getSamplesFor(StringRef Fname) {
+    std::string FGUID;
+    Fname = getRepInFormat(Fname, useMD5(), FGUID);
     auto It = Profiles.find(Fname);
     if (It != Profiles.end())
       return &It->second;
@@ -646,16 +654,15 @@ class SampleProfileReaderBinary : public SampleProfileReader {
   /// Read the whole name table.
   std::error_code readNameTable();
 
-  /// Read a string indirectly via the name table. Optionally return the index.
-  ErrorOr<StringRef> readStringFromTable(size_t *RetIdx = nullptr);
+  /// Read a string indirectly via the name table.
+  ErrorOr<StringRef> readStringFromTable();
 
-  /// Read a context indirectly via the CSNameTable. Optionally return the
-  /// index.
-  ErrorOr<SampleContextFrames> readContextFromTable(size_t *RetIdx = nullptr);
+  /// Read a context indirectly via the CSNameTable.
+  ErrorOr<SampleContextFrames> readContextFromTable();
 
   /// Read a context indirectly via the CSNameTable if the profile has context,
-  /// otherwise same as readStringFromTable, also return its hash value.
-  ErrorOr<std::pair<SampleContext, hash_code>> readSampleContextFromTable();
+  /// otherwise same as readStringFromTable.
+  ErrorOr<SampleContext> readSampleContextFromTable();
 
   /// Points to the current location in the buffer.
   const uint8_t *Data = nullptr;
@@ -675,24 +682,13 @@ class SampleProfileReaderBinary : public SampleProfileReader {
   /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
   std::vector<std::string> MD5StringBuf;
 
-  /// The starting address of fixed length MD5 name table section.
+  /// The starting address of NameTable containing fixed length MD5.
   const uint8_t *MD5NameMemStart = nullptr;
 
   /// CSNameTable is used to save full context vectors. It is the backing buffer
   /// for SampleContextFrames.
   std::vector<SampleContextFrameVector> CSNameTable;
 
-  /// Table to cache MD5 values of sample contexts corresponding to
-  /// readSampleContextFromTable(), used to index into Profiles or
-  /// FuncOffsetTable.
-  std::vector<hash_code> MD5SampleContextTable;
-
-  /// The starting address of the table of MD5 values of sample contexts. For
-  /// fixed length MD5 non-CS profile it is same as MD5NameMemStart because
-  /// hashes of non-CS contexts are already in the profile. Otherwise it points
-  /// to the start of MD5SampleContextTable.
-  const hash_code *MD5SampleContextStart = nullptr;
-
 private:
   std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
   virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
@@ -766,10 +762,10 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
 
   std::unique_ptr<ProfileSymbolList> ProfSymList;
 
-  /// The table mapping from a function context's MD5 to the offset of its
+  /// The table mapping from function context to the offset of its
   /// FunctionSample towards file start.
   /// At most one of FuncOffsetTable and FuncOffsetList is populated.
-  DenseMap<hash_code, uint64_t> FuncOffsetTable;
+  DenseMap<SampleContext, uint64_t> FuncOffsetTable;
 
   /// The list version of FuncOffsetTable. This is used if every entry is
   /// being accessed.

diff  --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 3a45113b0a2eae..8e07478fb083b7 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -204,7 +204,9 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
   // profiles before computing profile summary.
   if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
                                 !UseContextLessSummary.getNumOccurrences())) {
-    ProfileConverter::flattenProfile(Profiles, ContextLessProfiles, true);
+    for (const auto &I : Profiles) {
+      ContextLessProfiles[I.second.getName()].merge(I.second);
+    }
     ProfilesToUse = &ContextLessProfiles;
   }
 

diff  --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index b14dc01be23624..fdae8a011e7120 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -202,12 +202,13 @@ void sampleprof::sortFuncProfiles(
     const SampleProfileMap &ProfileMap,
     std::vector<NameFunctionSamples> &SortedProfiles) {
   for (const auto &I : ProfileMap) {
-    SortedProfiles.push_back(std::make_pair(I.first, &I.second));
+    assert(I.first == I.second.getContext() && "Inconsistent profile map");
+    SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
   }
   llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
                                        const NameFunctionSamples &B) {
     if (A.second->getTotalSamples() == B.second->getTotalSamples())
-      return A.second->getContext() < B.second->getContext();
+      return A.first < B.first;
     return A.second->getTotalSamples() > B.second->getTotalSamples();
   });
 }
@@ -356,13 +357,13 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
 
   // Filter the cold profiles from ProfileMap and move them into a tmp
   // container
-  std::vector<std::pair<hash_code, const FunctionSamples *>> ColdProfiles;
+  std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
   for (const auto &I : ProfileMap) {
-    const SampleContext &Context = I.second.getContext();
+    const SampleContext &Context = I.first;
     const FunctionSamples &FunctionProfile = I.second;
     if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
         (!TrimBaseProfileOnly || Context.isBaseContext()))
-      ColdProfiles.emplace_back(I.first, &I.second);
+      ColdProfiles.emplace_back(Context, &I.second);
   }
 
   // Remove the cold profile from ProfileMap and merge them into
@@ -373,8 +374,8 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
       auto MergedContext = I.second->getContext().getContextFrames();
       if (ColdContextFrameLength < MergedContext.size())
         MergedContext = MergedContext.take_back(ColdContextFrameLength);
-      // Need to set MergedProfile's context here otherwise it will be lost.
-      FunctionSamples &MergedProfile = MergedProfileMap.Create(MergedContext);
+      auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
+      FunctionSamples &MergedProfile = Ret.first->second;
       MergedProfile.merge(*I.second);
     }
     ProfileMap.erase(I.first);
@@ -384,17 +385,57 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
   for (const auto &I : MergedProfileMap) {
     // Filter the cold merged profile
     if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
-        ProfileMap.find(I.second.getContext()) == ProfileMap.end())
+        ProfileMap.find(I.first) == ProfileMap.end())
       continue;
     // Merge the profile if the original profile exists, otherwise just insert
-    // as a new profile. If inserted as a new profile from MergedProfileMap, it
-    // already has the right context.
-    auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());
+    // as a new profile
+    auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
+    if (Ret.second) {
+      SampleContext FContext(Ret.first->first, RawContext);
+      FunctionSamples &FProfile = Ret.first->second;
+      FProfile.setContext(FContext);
+    }
     FunctionSamples &OrigProfile = Ret.first->second;
     OrigProfile.merge(I.second);
   }
 }
 
+void SampleContextTrimmer::canonicalizeContextProfiles() {
+  std::vector<SampleContext> ProfilesToBeRemoved;
+  SampleProfileMap ProfilesToBeAdded;
+  for (auto &I : ProfileMap) {
+    FunctionSamples &FProfile = I.second;
+    SampleContext &Context = FProfile.getContext();
+    if (I.first == Context)
+      continue;
+
+    // Use the context string from FunctionSamples to update the keys of
+    // ProfileMap. They can get out of sync after context profile promotion
+    // through pre-inliner.
+    // Duplicate the function profile for later insertion to avoid a conflict
+    // caused by a context both to be add and to be removed. This could happen
+    // when a context is promoted to another context which is also promoted to
+    // the third context. For example, given an original context A @ B @ C that
+    // is promoted to B @ C and the original context B @ C which is promoted to
+    // just C, adding B @ C to the profile map while removing same context (but
+    // with 
diff erent profiles) from the map can cause a conflict if they are
+    // not handled in a right order. This can be solved by just caching the
+    // profiles to be added.
+    auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
+    (void)Ret;
+    assert(Ret.second && "Context conflict during canonicalization");
+    ProfilesToBeRemoved.push_back(I.first);
+  }
+
+  for (auto &I : ProfilesToBeRemoved) {
+    ProfileMap.erase(I);
+  }
+
+  for (auto &I : ProfilesToBeAdded) {
+    ProfileMap.emplace(I.first, I.second);
+  }
+}
+
 std::error_code ProfileSymbolList::write(raw_ostream &OS) {
   // Sort the symbols before output. If doing compression.
   // It will make the compression much more effective.
@@ -468,7 +509,6 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
     if (!ChildProfile)
       continue;
     SampleContext OrigChildContext = ChildProfile->getContext();
-    hash_code OrigChildContextHash = OrigChildContext.getHashCode();
     // Reset the child context to be contextless.
     ChildProfile->getContext().setName(OrigChildContext.getName());
     if (NodeProfile) {
@@ -484,7 +524,6 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
       NodeProfile->removeTotalSamples(Count);
     }
 
-    hash_code NewChildProfileHash(0);
     // Separate child profile to be a standalone profile, if the current parent
     // profile doesn't exist. This is a duplicating operation when the child
     // profile is already incorporated into the parent which is still useful and
@@ -493,20 +532,15 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
     // profile in the prelink phase for to-be-fully-inlined functions.
     if (!NodeProfile) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
-      NewChildProfileHash = ChildProfile->getContext().getHashCode();
     } else if (GenerateMergedBaseProfiles) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
-      NewChildProfileHash = ChildProfile->getContext().getHashCode();
       auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
       SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
           ContextDuplicatedIntoBase);
     }
 
-    // Remove the original child profile. Check if MD5 of new child profile
-    // collides with old profile, in this case the [] operator already
-    // overwritten it without the need of erase.
-    if (NewChildProfileHash != OrigChildContextHash)
-      ProfileMap.erase(OrigChildContextHash);
+    // Remove the original child profile.
+    ProfileMap.erase(OrigChildContext);
   }
 }
 

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 0b470824062b66..fbdd9a307321bf 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -61,9 +61,9 @@ static cl::opt<bool> ProfileIsFSDisciminator(
 ///
 /// \param FContext Name + context of the function to print.
 /// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
+void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
                                               raw_ostream &OS) {
-  OS << "Function: " << FS.getContext().toString() << ": " << FS;
+  OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
 }
 
 /// Dump all the function profiles found on stream \p OS.
@@ -71,7 +71,7 @@ void SampleProfileReader::dump(raw_ostream &OS) {
   std::vector<NameFunctionSamples> V;
   sortFuncProfiles(Profiles, V);
   for (const auto &I : V)
-    dumpFunctionProfile(*I.second, OS);
+    dumpFunctionProfile(I.first, OS);
 }
 
 static void dumpFunctionProfileJson(const FunctionSamples &S,
@@ -355,7 +355,9 @@ std::error_code SampleProfileReaderText::readImpl() {
       SampleContext FContext(FName, CSNameTable);
       if (FContext.hasContext())
         ++CSProfileCount;
-      FunctionSamples &FProfile = Profiles.Create(FContext);
+      Profiles[FContext] = FunctionSamples();
+      FunctionSamples &FProfile = Profiles[FContext];
+      FProfile.setContext(FContext);
       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
       InlineStack.clear();
@@ -523,8 +525,7 @@ inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
   return *Idx;
 }
 
-ErrorOr<StringRef>
-SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
   auto Idx = readStringIndex(NameTable);
   if (std::error_code EC = Idx.getError())
     return EC;
@@ -542,52 +543,30 @@ SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
        MD5NameMemStart + (*Idx) * sizeof(uint64_t));
     SR = MD5StringBuf.emplace_back(std::to_string(FID));
   }
-  if (RetIdx)
-    *RetIdx = *Idx;
   return SR;
 }
 
-ErrorOr<SampleContextFrames>
-SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
+ErrorOr<SampleContextFrames> SampleProfileReaderBinary::readContextFromTable() {
   auto ContextIdx = readNumber<size_t>();
   if (std::error_code EC = ContextIdx.getError())
     return EC;
   if (*ContextIdx >= CSNameTable.size())
     return sampleprof_error::truncated_name_table;
-  if (RetIdx)
-    *RetIdx = *ContextIdx;
   return CSNameTable[*ContextIdx];
 }
 
-ErrorOr<std::pair<SampleContext, hash_code>>
-SampleProfileReaderBinary::readSampleContextFromTable() {
-  SampleContext Context;
-  size_t Idx;
+ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
   if (ProfileIsCS) {
-    auto FContext(readContextFromTable(&Idx));
+    auto FContext(readContextFromTable());
     if (std::error_code EC = FContext.getError())
       return EC;
-    Context = SampleContext(*FContext);
+    return SampleContext(*FContext);
   } else {
-    auto FName(readStringFromTable(&Idx));
+    auto FName(readStringFromTable());
     if (std::error_code EC = FName.getError())
       return EC;
-    Context = SampleContext(*FName);
-  }
-  // Since MD5SampleContextStart may point to the profile's file data, need to
-  // make sure it is reading the same value on big endian CPU.
-  hash_code Hash =
-      support::endian::read<hash_code, support::little, support::unaligned>(
-          MD5SampleContextStart + Idx);
-  // Lazy computing of hash value, write back to the table to cache it. Only
-  // compute the context's hash value if it is being referenced for the first
-  // time.
-  if (Hash == hash_code(0)) {
-    assert(MD5SampleContextStart == MD5SampleContextTable.data());
-    Hash = Context.getHashCode();
-    support::endian::write(&MD5SampleContextTable[Idx], Hash, support::little);
+    return SampleContext(*FName);
   }
-  return std::make_pair(Context, Hash);
 }
 
 std::error_code
@@ -680,18 +659,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   if (std::error_code EC = NumHeadSamples.getError())
     return EC;
 
-  auto FContextHash(readSampleContextFromTable());
-  if (std::error_code EC = FContextHash.getError())
+  ErrorOr<SampleContext> FContext(readSampleContextFromTable());
+  if (std::error_code EC = FContext.getError())
     return EC;
 
-  auto &[FContext, Hash] = *FContextHash;
-  // Use the cached hash value for insertion instead of recalculating it.
-  auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
-  FunctionSamples &FProfile = Res.first->second;
-  FProfile.setContext(FContext);
+  Profiles[*FContext] = FunctionSamples();
+  FunctionSamples &FProfile = Profiles[*FContext];
+  FProfile.setContext(*FContext);
   FProfile.addHeadSamples(*NumHeadSamples);
 
-  if (FContext.hasContext())
+  if (FContext->hasContext())
     CSProfileCount++;
 
   if (std::error_code EC = readProfile(FProfile))
@@ -839,21 +816,18 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
     FuncOffsetTable.reserve(*Size);
 
   for (uint64_t I = 0; I < *Size; ++I) {
-    auto FContextHash(readSampleContextFromTable());
-    if (std::error_code EC = FContextHash.getError())
+    auto FContext(readSampleContextFromTable());
+    if (std::error_code EC = FContext.getError())
       return EC;
 
-    auto &[FContext, Hash] = *FContextHash;
     auto Offset = readNumber<uint64_t>();
     if (std::error_code EC = Offset.getError())
       return EC;
 
     if (UseFuncOffsetList)
-      FuncOffsetList.emplace_back(FContext, *Offset);
+      FuncOffsetList.emplace_back(*FContext, *Offset);
     else
-      // Because Porfiles replace existing value with new value if collision
-      // happens, we also use the latest offset so that they are consistent.
-      FuncOffsetTable[Hash] = *Offset;
+      FuncOffsetTable[*FContext] = *Offset;
  }
 
  return sampleprof_error::success;
@@ -926,8 +900,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
     } else if (useMD5()) {
       assert(!useFuncOffsetList());
       for (auto Name : FuncsToUse) {
-        auto GUID = MD5Hash(Name);
-        auto iter = FuncOffsetTable.find(GUID);
+        auto GUID = std::to_string(MD5Hash(Name));
+        auto iter = FuncOffsetTable.find(StringRef(GUID));
         if (iter == FuncOffsetTable.end())
           continue;
         const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -948,7 +922,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
     } else {
       assert(!useFuncOffsetList());
       for (auto Name : FuncsToUse) {
-        auto iter = FuncOffsetTable.find(MD5Hash(Name));
+        auto iter = FuncOffsetTable.find(Name);
         if (iter == FuncOffsetTable.end())
           continue;
         const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -1076,30 +1050,17 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
 
   NameTable.clear();
   NameTable.reserve(*Size);
-  if (!ProfileIsCS) {
-    MD5SampleContextTable.clear();
-    if (UseMD5)
-      MD5SampleContextTable.reserve(*Size);
-    else
-      // If we are using strings, delay MD5 computation since only a portion of
-      // names are used by top level functions. Use 0 to indicate MD5 value is
-      // to be calculated as no known string has a MD5 value of 0.
-      MD5SampleContextTable.resize(*Size);
-  }
   for (size_t I = 0; I < *Size; ++I) {
     auto Name(readString());
     if (std::error_code EC = Name.getError())
       return EC;
     if (UseMD5) {
-      uint64_t FID = hashFuncName(*Name);
-      if (!ProfileIsCS)
-        MD5SampleContextTable.emplace_back(FID);
+      uint64_t FID = MD5Hash(*Name);
       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
     } else
       NameTable.push_back(*Name);
   }
-  if (!ProfileIsCS)
-    MD5SampleContextStart = MD5SampleContextTable.data();
+
   return sampleprof_error::success;
 }
 
@@ -1127,8 +1088,6 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
     NameTable.clear();
     NameTable.resize(*Size);
     MD5NameMemStart = Data;
-    if (!ProfileIsCS)
-      MD5SampleContextStart = reinterpret_cast<const hash_code *>(Data);
     Data = Data + (*Size) * sizeof(uint64_t);
     return sampleprof_error::success;
   }
@@ -1142,19 +1101,12 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
     MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
     NameTable.clear();
     NameTable.reserve(*Size);
-    if (!ProfileIsCS)
-      MD5SampleContextTable.resize(*Size);
     for (size_t I = 0; I < *Size; ++I) {
       auto FID = readNumber<uint64_t>();
       if (std::error_code EC = FID.getError())
         return EC;
-      if (!ProfileIsCS)
-        support::endian::write(&MD5SampleContextTable[I], *FID,
-                               support::little);
       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
     }
-    if (!ProfileIsCS)
-      MD5SampleContextStart = MD5SampleContextTable.data();
     return sampleprof_error::success;
   }
 
@@ -1172,14 +1124,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
 
   CSNameTable.clear();
   CSNameTable.reserve(*Size);
-  if (ProfileIsCS) {
-    // Delay MD5 computation of CS context until they are needed. Use 0 to
-    // indicate MD5 value is to be calculated as no known string has a MD5
-    // value of 0.
-    MD5SampleContextTable.clear();
-    MD5SampleContextTable.resize(*Size);
-    MD5SampleContextStart = MD5SampleContextTable.data();
-  }
   for (size_t I = 0; I < *Size; ++I) {
     CSNameTable.emplace_back(SampleContextFrameVector());
     auto ContextSize = readNumber<uint32_t>();
@@ -1243,17 +1187,16 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
         if (std::error_code EC = Discriminator.getError())
           return EC;
 
-        auto FContextHash(readSampleContextFromTable());
-        if (std::error_code EC = FContextHash.getError())
+        auto FContext(readSampleContextFromTable());
+        if (std::error_code EC = FContext.getError())
           return EC;
 
-        auto &[FContext, Hash] = *FContextHash;
         FunctionSamples *CalleeProfile = nullptr;
         if (FProfile) {
           CalleeProfile = const_cast<FunctionSamples *>(
               &FProfile->functionSamplesAt(LineLocation(
                   *LineOffset,
-                  *Discriminator))[std::string(FContext.getName())]);
+                  *Discriminator))[std::string(FContext.get().getName())]);
         }
         if (std::error_code EC =
                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
@@ -1268,12 +1211,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
-    auto FContextHash(readSampleContextFromTable());
-    if (std::error_code EC = FContextHash.getError())
+    auto FContext(readSampleContextFromTable());
+    if (std::error_code EC = FContext.getError())
       return EC;
-    auto &[FContext, Hash] = *FContextHash;
     FunctionSamples *FProfile = nullptr;
-    auto It = Profiles.find(FContext);
+    auto It = Profiles.find(*FContext);
     if (It != Profiles.end())
       FProfile = &It->second;
 

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index f418dbe7105709..0873093ad426c5 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -357,13 +357,14 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
   encodeULEB128(NameTable.size(), OS);
   support::endian::Writer Writer(OS, support::little);
   for (auto N : V)
-    Writer.write(hashFuncName(N));
+    Writer.write(MD5Hash(N));
   return sampleprof_error::success;
 }
 
 std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
     const SampleProfileMap &ProfileMap) {
   for (const auto &I : ProfileMap) {
+    assert(I.first == I.second.getContext() && "Inconsistent profile map");
     addContext(I.second.getContext());
     addNames(I.second);
   }
@@ -725,7 +726,8 @@ SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
 
   // Generate the name table for all the functions referenced in the profile.
   for (const auto &I : ProfileMap) {
-    addContext(I.second.getContext());
+    assert(I.first == I.second.getContext() && "Inconsistent profile map");
+    addContext(I.first);
     addNames(I.second);
   }
 

diff  --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 79bee8362ef8a9..3ddf5fe20edb27 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -201,7 +201,7 @@ SampleContextTracker::SampleContextTracker(
     : GUIDToFuncNameMap(GUIDToFuncNameMap) {
   for (auto &FuncSample : Profiles) {
     FunctionSamples *FSamples = &FuncSample.second;
-    SampleContext Context = FuncSample.second.getContext();
+    SampleContext Context = FuncSample.first;
     LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
                       << "\n");
     ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
@@ -638,7 +638,7 @@ void SampleContextTracker::createContextLessProfileMap(
     FunctionSamples *FProfile = Node->getFunctionSamples();
     // Profile's context can be empty, use ContextNode's func name.
     if (FProfile)
-      ContextLessProfiles.Create(Node->getFuncName()).merge(*FProfile);
+      ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
   }
 }
 } // namespace llvm

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata b/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
index 76f19bbc7e513a..699972870dae43 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata and b/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/sample-nametable.test b/llvm/test/tools/llvm-profdata/sample-nametable.test
index d65b9737a77e31..1d59b9fa43725d 100644
--- a/llvm/test/tools/llvm-profdata/sample-nametable.test
+++ b/llvm/test/tools/llvm-profdata/sample-nametable.test
@@ -8,5 +8,5 @@ RUN: not llvm-profdata show --sample %p/Inputs/sample-nametable-empty-string.pro
 
 3- The data of the name table is placed after the data of the profiles. The reader should handle it correctly.
 RUN: llvm-profdata merge --sample --text %p/Inputs/sample-nametable-after-samples.profdata | FileCheck %s
-CHECK: 18446744073709551613:2:9
+CHECK: 18446744073709551615:2:9
 

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index a11fc5753497f7..da10ddcc58c642 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -594,7 +594,7 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
 
   auto checkSampleProfileHasFUnique = [&Reader]() {
     for (const auto &PD : Reader->getProfiles()) {
-      auto &FContext = PD.second.getContext();
+      auto &FContext = PD.first;
       if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
           std::string::npos) {
         return true;
@@ -2833,8 +2833,7 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
           "be printed");
 
     // TODO: parse context string to support filtering by contexts.
-    FunctionSamples *FS = Reader->getSamplesFor(StringRef(ShowFunction));
-    Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
+    Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
   }
 
   if (ShowProfileSymbolList) {

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index d307ab46567629..97bc8d59b6cd7b 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,7 +449,7 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
   for (const auto &FS : ProfileMap) {
-    if (auto *Func = Binary->getBinaryFunction(FS.second.getName()))
+    if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
       ProfiledFunctions.insert(Func);
   }
   return true;
@@ -468,7 +468,12 @@ bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
 FunctionSamples &
 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
   SampleContext Context(FuncName);
-  return ProfileMap.Create(Context);
+  auto Ret = ProfileMap.emplace(Context, FunctionSamples());
+  if (Ret.second) {
+    FunctionSamples &FProfile = Ret.first->second;
+    FProfile.setContext(Context);
+  }
+  return Ret.first->second;
 }
 
 void ProfileGenerator::generateProfile() {
@@ -500,14 +505,14 @@ void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
     return;
 
   // Move cold profiles into a tmp container.
-  std::vector<hash_code> ColdProfileHashes;
+  std::vector<SampleContext> ColdProfiles;
   for (const auto &I : ProfileMap) {
     if (I.second.getTotalSamples() < ColdCntThreshold)
-      ColdProfileHashes.emplace_back(I.first);
+      ColdProfiles.emplace_back(I.first);
   }
 
   // Remove the cold profile from ProfileMap.
-  for (const auto &I : ColdProfileHashes)
+  for (const auto &I : ColdProfiles)
     ProfileMap.erase(I);
 }
 
@@ -1013,7 +1018,9 @@ void CSProfileGenerator::postProcessProfiles() {
 
   // Merge function samples of CS profile to calculate profile density.
   sampleprof::SampleProfileMap ContextLessProfiles;
-  ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
+  for (const auto &I : ProfileMap) {
+    ContextLessProfiles[I.second.getName()].merge(I.second);
+  }
 
   calculateAndShowDensity(ContextLessProfiles);
   if (GenCSNestedProfile) {

diff  --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
index c53baf69bbc0ef..ad91ce36bcb5be 100644
--- a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
+++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
@@ -6,7 +6,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(LLVMProfdataTests
     OutputSizeLimitTest.cpp
-    MD5CollisionTest.cpp
   )
 
 target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport)

diff  --git a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp b/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
deleted file mode 100644
index 4ae57cd711c179..00000000000000
--- a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===- llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-/// Test whether the MD5-key SampleProfileMap can handle collision correctly.
-/// Probability of collision is rare but not negligible since we only use the
-/// lower 64 bits of the MD5 value. A unit test is required because the function
-/// names are not printable ASCII characters.
-
-#include "llvm/ProfileData/SampleProfReader.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Testing/Support/Error.h"
-#include "gtest/gtest.h"
-
-/// According to https://en.wikipedia.org/wiki/MD5#Preimage_vulnerability, the
-/// MD5 of the two strings are 79054025255fb1a26e4bc422aef54eb4.
-
-// First 8 bytes of the MD5.
-const llvm::hash_code ExpectedHash = 0xa2b15f2525400579;
-
-// clang-format off
-const uint8_t ProfileData[] = {
-    0x84, 0xe4, 0xd0, 0xb1, 0xf4, 0xc9, 0x94, 0xa8,
-    0x53, 0x67, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00,
-
-    /// Name Table
-    0x02,
-    /// String1
-    0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
-    0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
-    0x2f, 0xca, 0xb5, 0x87, 0x12, 0x46, 0x7e, 0xab,
-    0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
-    0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
-    0x83, 0xe4, 0x88, 0x83, 0x25, 0x71, 0x41, 0x5a,
-    0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
-    0xd9, 0x1d, 0xbd, 0xf2, 0x80, 0x37, 0x3c, 0x5b,
-    0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
-    0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
-    0xdd, 0x53, 0xe2, 0xb4, 0x87, 0xda, 0x03, 0xfd,
-    0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
-    0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
-    0xce, 0x54, 0xb6, 0x70, 0x80, 0xa8, 0x0d, 0x1e,
-    0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
-    0x96, 0xf9, 0x65, 0x2b, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
-    /// String2
-    0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
-    0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
-    0x2f, 0xca, 0xb5, 0x07, 0x12, 0x46, 0x7e, 0xab,
-    0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
-    0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
-    0x83, 0xe4, 0x88, 0x83, 0x25, 0xf1, 0x41, 0x5a,
-    0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
-    0xd9, 0x1d, 0xbd, 0x72, 0x80, 0x37, 0x3c, 0x5b,
-    0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
-    0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
-    0xdd, 0x53, 0xe2, 0x34, 0x87, 0xda, 0x03, 0xfd,
-    0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
-    0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
-    0xce, 0x54, 0xb6, 0x70, 0x80, 0x28, 0x0d, 0x1e,
-    0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
-    0x96, 0xf9, 0x65, 0xab, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
-
-    /// FuncOffsetTable
-    0x02, 0x00, 0x00, 0x01, 0x17, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-    /// Samples
-    /// String1:10:1
-    ///  1: 5
-    ///  2.3: 6
-    ///  4: String2:100
-    ///   1: 100
-    /// String2:7:3
-    ///  9: 0
-    0x01, 0x00, 0x0a, 0x02, 0x01, 0x00, 0x05, 0x00,
-    0x02, 0x03, 0x06, 0x00, 0x01, 0x04, 0x00, 0x01,
-    0x64, 0x01, 0x01, 0x00, 0x64, 0x00, 0x00,
-
-    0x03, 0x01, 0x07, 0x01, 0x09, 0x00, 0x00, 0x00,
-    0x00};
-// clang-format on
-
-using namespace llvm;
-using namespace llvm::sampleprof;
-
-TEST(MD5CollisionTest, TestCollision) {
-  auto InputBuffer = MemoryBuffer::getMemBuffer(
-      StringRef(reinterpret_cast<const char *>(ProfileData),
-                sizeof(ProfileData)),
-      "", false);
-  LLVMContext Context;
-  auto FileSystem = vfs::getRealFileSystem();
-  auto Result = SampleProfileReader::create(InputBuffer, Context, *FileSystem);
-  ASSERT_TRUE(Result);
-  SampleProfileReader *Reader = Result->get();
-  ASSERT_FALSE(Reader->read());
-
-  std::vector<StringRef> &NameTable = *Reader->getNameTable();
-  ASSERT_EQ(NameTable.size(), 2U);
-  StringRef S1 = NameTable[0];
-  StringRef S2 = NameTable[1];
-  ASSERT_NE(S1, S2);
-  ASSERT_EQ(hash_code(MD5Hash(S1)), ExpectedHash);
-  ASSERT_EQ(hash_code(MD5Hash(S2)), ExpectedHash);
-
-  // S2's MD5 value collides with S1, S1 is expected to be dropped when S2 is
-  // inserted, as if S1 never existed.
-
-  FunctionSamples ExpectedFS;
-  ExpectedFS.setName(S2);
-  ExpectedFS.setHeadSamples(3);
-  ExpectedFS.setTotalSamples(7);
-  ExpectedFS.addBodySamples(9, 0, 0);
-
-  SampleProfileMap &Profiles = Reader->getProfiles();
-  EXPECT_EQ(Profiles.size(), 1U);
-  if (Profiles.size()) {
-    auto &[Hash, FS] = *Profiles.begin();
-    EXPECT_EQ(Hash, ExpectedHash);
-    EXPECT_EQ(FS, ExpectedFS);
-  }
-
-  // Inserting S2 again should fail, returning the existing sample unchanged.
-  auto [It1, Inserted1] = Profiles.try_emplace(S2, FunctionSamples());
-  EXPECT_FALSE(Inserted1);
-  EXPECT_EQ(Profiles.size(), 1U);
-  if (Profiles.size()) {
-    auto &[Hash, FS] = *It1;
-    EXPECT_EQ(Hash, ExpectedHash);
-    EXPECT_EQ(FS, ExpectedFS);
-  }
-
-  // Inserting S1 should success as if S2 never existed, and S2 is erased.
-  FunctionSamples FS1;
-  FS1.setName(S1);
-  FS1.setHeadSamples(5);
-  FS1.setTotalSamples(10);
-  FS1.addBodySamples(1, 2, 5);
-
-  auto [It2, Inserted2] = Profiles.try_emplace(S1, FS1);
-  EXPECT_TRUE(Inserted2);
-  EXPECT_EQ(Profiles.size(), 1U);
-  if (Profiles.size()) {
-    auto &[Hash, FS] = *It2;
-    EXPECT_EQ(Hash, ExpectedHash);
-    EXPECT_EQ(FS, FS1);
-  }
-}

diff  --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
index d21c378e8ce3b2..522a2c2dd33677 100644
--- a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
+++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
@@ -126,7 +126,7 @@ static ExpectedErrorOr<void *> RunTest(StringRef Input, size_t SizeLimit,
   // For every sample in the new profile, confirm it is in the old profile and
   // unchanged.
   for (auto Sample : NewProfiles) {
-    auto FindResult = OldProfiles.find(Sample.second.getContext());
+    auto FindResult = OldProfiles.find(Sample.first);
     EXPECT_NE(FindResult, OldProfiles.end());
     if (FindResult != OldProfiles.end()) {
       EXPECT_EQ(Sample.second.getHeadSamples(),


        


More information about the llvm-commits mailing list